diff --git a/LICENSE.txt b/LICENSE.txt index f8de86a1053..393ed0e7f90 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -553,7 +553,7 @@ For: hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js -hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.9.4/ +hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.7/ -------------------------------------------------------------------------------- Copyright (C) 2008-2016, SpryMedia Ltd. diff --git a/README.txt b/README.txt index 148cd31c86b..559099b73a1 100644 --- a/README.txt +++ b/README.txt @@ -1,6 +1,6 @@ For the latest information about Hadoop, please visit our website at: - http://hadoop.apache.org/core/ + http://hadoop.apache.org/ and our wiki, at: diff --git a/dev-support/bin/create-release b/dev-support/bin/create-release index d3d96d902f3..6ec3503fbfb 100755 --- a/dev-support/bin/create-release +++ b/dev-support/bin/create-release @@ -617,8 +617,8 @@ function makearelease #shellcheck disable=SC2038 find . -name rat.txt | xargs -I% cat % > "${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}-rat.txt" - # Stage CHANGES and RELEASENOTES files - for i in CHANGES RELEASENOTES; do + # Stage CHANGELOG and RELEASENOTES files + for i in CHANGELOG RELEASENOTES; do run cp -p \ "${BASEDIR}/hadoop-common-project/hadoop-common/src/site/markdown/release/${HADOOP_VERSION}"/${i}*.md \ "${ARTIFACTS_DIR}/${i}.md" diff --git a/dev-support/bin/ozone-dist-layout-stitching b/dev-support/bin/ozone-dist-layout-stitching index c30a37d2e5f..00854b45f2d 100755 --- a/dev-support/bin/ozone-dist-layout-stitching +++ b/dev-support/bin/ozone-dist-layout-stitching @@ -117,18 +117,15 @@ ROOT=$(cd "${BASEDIR}"/../..;pwd) echo echo "Current directory $(pwd)" echo -run rm -rf "ozone" -run mkdir "ozone" -run cd "ozone" +run rm -rf "ozone-${HDDS_VERSION}" +run mkdir "ozone-${HDDS_VERSION}" +run cd "ozone-${HDDS_VERSION}" run cp -p "${ROOT}/LICENSE.txt" . run cp -p "${ROOT}/NOTICE.txt" . -run cp -p "${ROOT}/README.txt" . # Copy hadoop-common first so that it have always have all dependencies. # Remaining projects will copy only libraries which are not present already in 'share' directory. run copy "${ROOT}/hadoop-common-project/hadoop-common/target/hadoop-common-${VERSION}" . -run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${VERSION}" . -run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-${VERSION}" . # HDDS @@ -145,15 +142,40 @@ run copy "${ROOT}/hadoop-ozone/ozone-manager/target/hadoop-ozone-ozone-manager-$ run copy "${ROOT}/hadoop-ozone/objectstore-service/target/hadoop-ozone-objectstore-service-${HDDS_VERSION}" . run copy "${ROOT}/hadoop-ozone/client/target/hadoop-ozone-client-${HDDS_VERSION}" . run copy "${ROOT}/hadoop-ozone/tools/target/hadoop-ozone-tools-${HDDS_VERSION}" . + +#shaded ozonefs mkdir -p "./share/hadoop/ozonefs" -cp "${ROOT}/hadoop-ozone/ozonefs/target/hadoop-ozone-filesystem-${HDDS_VERSION}.jar" "./share/hadoop/ozonefs/hadoop-ozone-filesystem.jar" +cp "${ROOT}/hadoop-ozone/ozonefs/target/hadoop-ozone-filesystem-${HDDS_VERSION}.jar" "./share/hadoop/ozonefs/hadoop-ozone-filesystem-${HDDS_VERSION}.jar" + +#shaded datanode service +mkdir -p "./share/hadoop/ozoneplugin" +cp "${ROOT}/hadoop-ozone/objectstore-service/target/hadoop-ozone-objectstore-service-${HDDS_VERSION}-plugin.jar" "./share/hadoop/ozoneplugin/hadoop-ozone-datanode-plugin-${HDDS_VERSION}.jar" + + # Optional documentation, could be missing cp -r "${ROOT}/hadoop-ozone/docs/target/classes/webapps/docs" ./share/hadoop/ozone/webapps/ozoneManager/ cp -r "${ROOT}/hadoop-ozone/docs/target/classes/webapps/docs" ./share/hadoop/hdds/webapps/scm/ +cp -r "${ROOT}/hadoop-ozone/docs/target/classes/webapps/docs" ./ + + +rm sbin/*all.sh +rm sbin/*all.cmd + +#remove test and java sources +find . -name "*tests.jar" | xargs rm +find . -name "*sources.jar" | xargs rm +find . -name jdiff -type d | xargs rm -rf + +#add ozone specific readme +run cp "${ROOT}/hadoop-dist/src/main/ozone/README.txt" README.txt +#Copy docker compose files and robot tests +run cp -p -r "${ROOT}/hadoop-dist/src/main/compose" . +run cp -p -r "${ROOT}/hadoop-dist/src/main/smoketest" . mkdir -p ./share/hadoop/mapreduce mkdir -p ./share/hadoop/yarn +mkdir -p ./share/hadoop/hdfs echo -echo "Hadoop Ozone dist layout available at: ${BASEDIR}/ozone" +echo "Hadoop Ozone dist layout available at: ${BASEDIR}/ozone-${HDDS_VERSION}" echo diff --git a/dev-support/bin/ozone-dist-tar-stitching b/dev-support/bin/ozone-dist-tar-stitching index d1116e4fe18..93d0525e7ec 100755 --- a/dev-support/bin/ozone-dist-tar-stitching +++ b/dev-support/bin/ozone-dist-tar-stitching @@ -36,13 +36,8 @@ function run() fi } -#To make the final dist directory easily mountable from docker we don't use -#version name in the directory name. -#To include the version name in the root directory of the tar file -# we create a symbolic link and dereference it during the tar creation -ln -s -f ozone ozone-${VERSION} -run tar -c --dereference -f "ozone-${VERSION}.tar" "ozone-${VERSION}" +run tar -c -f "ozone-${VERSION}.tar" "ozone-${VERSION}" run gzip -f "ozone-${VERSION}.tar" echo echo "Ozone dist tar available at: ${BASEDIR}/ozone-${VERSION}.tar.gz" -echo \ No newline at end of file +echo diff --git a/dev-support/bin/yetus-wrapper b/dev-support/bin/yetus-wrapper index 9f6bb331c34..ae05d426b2c 100755 --- a/dev-support/bin/yetus-wrapper +++ b/dev-support/bin/yetus-wrapper @@ -73,7 +73,7 @@ WANTED="$1" shift ARGV=("$@") -HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.4.0} +HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.8.0} BIN=$(yetus_abs "${BASH_SOURCE-$0}") BINDIR=$(dirname "${BIN}") diff --git a/dev-support/byteman/README.md b/dev-support/byteman/README.md new file mode 100644 index 00000000000..9a17fc55be0 --- /dev/null +++ b/dev-support/byteman/README.md @@ -0,0 +1,31 @@ + + +This folder contains example byteman scripts (http://byteman.jboss.org/) to help +Hadoop debuging. + +As the startup script of the hadoop-runner docker image supports byteman +instrumentation it's enough to set the URL of a script to a specific environment +variable to activate it with the docker runs: + + +``` +BYTEMAN_SCRIPT_URL=https://raw.githubusercontent.com/apache/hadoop/trunk/dev-support/byteman/hadooprpc.btm +``` + +For more info see HADOOP-15656 and HDDS-342 + diff --git a/dev-support/byteman/hadooprpc.btm b/dev-support/byteman/hadooprpc.btm new file mode 100644 index 00000000000..13894fe4ab0 --- /dev/null +++ b/dev-support/byteman/hadooprpc.btm @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This script instruments hadoop rpc layer to print out all the request/response messages to the standard output. +# + +RULE Hadoop RPC request +INTERFACE ^com.google.protobuf.BlockingService +METHOD callBlockingMethod +IF true +DO traceln("--> RPC message request: " + $3.getClass().getSimpleName() + " from " + linked(Thread.currentThread(), "source")); + traceln($3.toString()) +ENDRULE + + +RULE Hadoop RPC response +INTERFACE ^com.google.protobuf.BlockingService +METHOD callBlockingMethod +AT EXIT +IF true +DO traceln("--> RPC message response: " + $3.getClass().getSimpleName() + " to " + unlink(Thread.currentThread(), "source")); + traceln($!.toString()) +ENDRULE + + +RULE Hadoop RPC source IP +CLASS org.apache.hadoop.ipc.Server$RpcCall +METHOD run +IF true +DO link(Thread.currentThread(), "source", $0.connection.toString()) +ENDRULE diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile index 369c6060233..7679500f094 100644 --- a/dev-support/docker/Dockerfile +++ b/dev-support/docker/Dockerfile @@ -22,6 +22,8 @@ FROM ubuntu:xenial WORKDIR /root +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + ##### # Disable suggests/recommends ##### @@ -38,140 +40,159 @@ ENV DEBCONF_TERSE true # WARNING: DO NOT PUT JAVA APPS HERE! Otherwise they will install default # Ubuntu Java. See Java section below! ###### -RUN apt-get -q update && apt-get -q install -y \ - apt-utils \ - build-essential \ - bzip2 \ - clang \ - curl \ - doxygen \ - fuse \ - g++ \ - gcc \ - git \ - gnupg-agent \ - libbz2-dev \ - libcurl4-openssl-dev \ - libfuse-dev \ - libprotobuf-dev \ - libprotoc-dev \ - libsasl2-dev \ - libsnappy-dev \ - libssl-dev \ - libtool \ - locales \ - make \ - pinentry-curses \ - pkg-config \ - python \ - python2.7 \ - python-pip \ - python-pkg-resources \ - python-setuptools \ - python-wheel \ - rsync \ - software-properties-common \ - snappy \ - sudo \ - valgrind \ - zlib1g-dev +# hadolint ignore=DL3008 +RUN apt-get -q update \ + && apt-get -q install -y --no-install-recommends \ + apt-utils \ + build-essential \ + bzip2 \ + clang \ + curl \ + doxygen \ + fuse \ + g++ \ + gcc \ + git \ + gnupg-agent \ + libbz2-dev \ + libcurl4-openssl-dev \ + libfuse-dev \ + libprotobuf-dev \ + libprotoc-dev \ + libsasl2-dev \ + libsnappy-dev \ + libssl-dev \ + libtool \ + locales \ + make \ + pinentry-curses \ + pkg-config \ + python \ + python2.7 \ + python-pip \ + python-pkg-resources \ + python-setuptools \ + python-wheel \ + rsync \ + software-properties-common \ + snappy \ + sudo \ + valgrind \ + zlib1g-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* -####### -# OpenJDK 8 -####### -RUN apt-get -q install -y openjdk-8-jdk ####### -# OpenJDK 9 -# w/workaround for -# https://bugs.launchpad.net/ubuntu/+source/openjdk-9/+bug/1593191 +# OpenJDK 8 ####### -RUN apt-get -o Dpkg::Options::="--force-overwrite" \ - -q install -y \ - openjdk-9-jdk-headless +# hadolint ignore=DL3008 +RUN apt-get -q update \ + && apt-get -q install -y --no-install-recommends openjdk-8-jdk libbcprov-java \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* -####### -# Set default Java -####### -# -# By default, OpenJDK sets the default Java to the highest version. -# We want the opposite, soooooo.... -# -RUN update-java-alternatives --set java-1.8.0-openjdk-amd64 -RUN update-alternatives --get-selections | grep -i jdk | \ - while read line; do \ - alternative=$(echo $line | awk '{print $1}'); \ - path=$(echo $line | awk '{print $3}'); \ - newpath=$(echo $path | sed -e 's/java-9/java-8/'); \ - update-alternatives --set $alternative $newpath; \ - done ###### # Install cmake 3.1.0 (3.5.1 ships with Xenial) ###### -RUN mkdir -p /opt/cmake && \ - curl -L -s -S \ +RUN mkdir -p /opt/cmake \ + && curl -L -s -S \ https://cmake.org/files/v3.1/cmake-3.1.0-Linux-x86_64.tar.gz \ - -o /opt/cmake.tar.gz && \ - tar xzf /opt/cmake.tar.gz --strip-components 1 -C /opt/cmake + -o /opt/cmake.tar.gz \ + && tar xzf /opt/cmake.tar.gz --strip-components 1 -C /opt/cmake ENV CMAKE_HOME /opt/cmake ENV PATH "${PATH}:/opt/cmake/bin" ###### # Install Google Protobuf 2.5.0 (2.6.0 ships with Xenial) ###### -RUN mkdir -p /opt/protobuf-src && \ - curl -L -s -S \ +# hadolint ignore=DL3003 +RUN mkdir -p /opt/protobuf-src \ + && curl -L -s -S \ https://github.com/google/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz \ - -o /opt/protobuf.tar.gz && \ - tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src -RUN cd /opt/protobuf-src && ./configure --prefix=/opt/protobuf && make install + -o /opt/protobuf.tar.gz \ + && tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src \ + && cd /opt/protobuf-src \ + && ./configure --prefix=/opt/protobuf \ + && make install \ + && cd /root \ + && rm -rf /opt/protobuf-src ENV PROTOBUF_HOME /opt/protobuf ENV PATH "${PATH}:/opt/protobuf/bin" ###### # Install Apache Maven 3.3.9 (3.3.9 ships with Xenial) ###### -RUN apt-get -q update && apt-get -q install -y maven +# hadolint ignore=DL3008 +RUN apt-get -q update \ + && apt-get -q install -y --no-install-recommends maven \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* ENV MAVEN_HOME /usr ###### # Install findbugs 3.0.1 (3.0.1 ships with Xenial) # Ant is needed for findbugs ###### -RUN apt-get -q update && apt-get -q install -y findbugs ant +# hadolint ignore=DL3008 +RUN apt-get -q update \ + && apt-get -q install -y --no-install-recommends findbugs ant \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* ENV FINDBUGS_HOME /usr #### # Install shellcheck (0.4.6, the latest as of 2017-09-26) #### -RUN add-apt-repository -y ppa:jonathonf/ghc-8.0.2 -RUN apt-get -q update && apt-get -q install -y shellcheck +# hadolint ignore=DL3008 +RUN add-apt-repository -y ppa:jonathonf/ghc-8.0.2 \ + && apt-get -q update \ + && apt-get -q install -y --no-install-recommends shellcheck \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* #### # Install bats (0.4.0, the latest as of 2017-09-26, ships with Xenial) #### -RUN apt-get -q update && apt-get -q install -y bats +# hadolint ignore=DL3008 +RUN apt-get -q update \ + && apt-get -q install -y --no-install-recommends bats \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* #### -# Install pylint (always want latest) +# Install pylint at fixed version (2.0.0 removed python2 support) +# https://github.com/PyCQA/pylint/issues/2294 #### -RUN pip2 install pylint +RUN pip2 install pylint==1.9.2 #### # Install dateutil.parser #### -RUN pip2 install python-dateutil +RUN pip2 install python-dateutil==2.7.3 ### # Install node.js for web UI framework (4.2.6 ships with Xenial) ### -RUN apt-get -y install nodejs && \ - ln -s /usr/bin/nodejs /usr/bin/node && \ - apt-get -y install npm && \ - npm install npm@latest -g && \ - npm install -g bower && \ - npm install -g ember-cli +# hadolint ignore=DL3008, DL3016 +RUN apt-get -q update \ + && apt-get install -y --no-install-recommends nodejs npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && ln -s /usr/bin/nodejs /usr/bin/node \ + && npm install npm@latest -g \ + && npm install -g jshint + +### +# Install hadolint +#### +RUN curl -L -s -S \ + https://github.com/hadolint/hadolint/releases/download/v1.11.1/hadolint-Linux-x86_64 \ + -o /bin/hadolint \ + && chmod a+rx /bin/hadolint \ + && shasum -a 512 /bin/hadolint | \ + awk '$1!="734e37c1f6619cbbd86b9b249e69c9af8ee1ea87a2b1ff71dccda412e9dac35e63425225a95d71572091a3f0a11e9a04c2fc25d9e91b840530c26af32b9891ca" {exit(1)}' ### # Avoid out of memory errors in builds @@ -188,21 +209,27 @@ ENV MAVEN_OPTS -Xms256m -Xmx1536m #### # Install svn & Forrest (for Apache Hadoop website) ### -RUN apt-get -q update && apt-get -q install -y subversion - -RUN mkdir -p /opt/apache-forrest && \ - curl -L -s -S \ +# hadolint ignore=DL3008 +RUN apt-get -q update \ + && apt-get -q install -y --no-install-recommends subversion \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /opt/apache-forrest \ + && curl -L -s -S \ https://archive.apache.org/dist/forrest/0.8/apache-forrest-0.8.tar.gz \ - -o /opt/forrest.tar.gz && \ - tar xzf /opt/forrest.tar.gz --strip-components 1 -C /opt/apache-forrest + -o /opt/forrest.tar.gz \ + && tar xzf /opt/forrest.tar.gz --strip-components 1 -C /opt/apache-forrest RUN echo 'forrest.home=/opt/apache-forrest' > build.properties ENV FORREST_HOME=/opt/apache-forrest # Hugo static website generator (for new hadoop site and Ozone docs) -RUN curl -L -o hugo.deb https://github.com/gohugoio/hugo/releases/download/v0.30.2/hugo_0.30.2_Linux-64bit.deb && dpkg --install hugo.deb && rm hugo.deb +RUN curl -L -o hugo.deb https://github.com/gohugoio/hugo/releases/download/v0.30.2/hugo_0.30.2_Linux-64bit.deb \ + && dpkg --install hugo.deb \ + && rm hugo.deb # Add a welcome message and environment checks. -ADD hadoop_env_checks.sh /root/hadoop_env_checks.sh +COPY hadoop_env_checks.sh /root/hadoop_env_checks.sh RUN chmod 755 /root/hadoop_env_checks.sh -RUN echo '~/hadoop_env_checks.sh' >> /root/.bashrc - +# hadolint ignore=SC2016 +RUN echo '${HOME}/hadoop_env_checks.sh' >> /root/.bashrc diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-src-with-hdsl.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-src-with-hdds.xml similarity index 100% rename from hadoop-assemblies/src/main/resources/assemblies/hadoop-src-with-hdsl.xml rename to hadoop-assemblies/src/main/resources/assemblies/hadoop-src-with-hdds.xml diff --git a/hadoop-build-tools/src/main/resources/checkstyle/suppressions.xml b/hadoop-build-tools/src/main/resources/checkstyle/suppressions.xml index 084384d689c..ccc89c8bf0f 100644 --- a/hadoop-build-tools/src/main/resources/checkstyle/suppressions.xml +++ b/hadoop-build-tools/src/main/resources/checkstyle/suppressions.xml @@ -18,5 +18,4 @@ - diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml index 6fa24b49e54..ea8d6800d0d 100644 --- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml +++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml @@ -166,6 +166,14 @@ commons-io commons-io + + org.apache.commons + commons-lang3 + + + org.apache.commons + commons-text + commons-logging commons-logging @@ -491,6 +499,10 @@ commons-codec commons-codec + + org.apache.commons + commons-lang3 + commons-logging commons-logging diff --git a/hadoop-common-project/hadoop-annotations/pom.xml b/hadoop-common-project/hadoop-annotations/pom.xml index 64dde7187b4..fd329e29482 100644 --- a/hadoop-common-project/hadoop-annotations/pom.xml +++ b/hadoop-common-project/hadoop-annotations/pom.xml @@ -53,6 +53,30 @@ + + jdk10 + + [10,) + + + + + maven-compiler-plugin + + + org/apache/hadoop/classification/tools/ + + + + + maven-javadoc-plugin + + org.apache.hadoop.classification.tools + + + + + diff --git a/hadoop-common-project/hadoop-common/HadoopJNI.cmake b/hadoop-common-project/hadoop-common/HadoopJNI.cmake index 78d7ffda6fb..bf0d73eb56a 100644 --- a/hadoop-common-project/hadoop-common/HadoopJNI.cmake +++ b/hadoop-common-project/hadoop-common/HadoopJNI.cmake @@ -93,5 +93,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux") # Otherwise, use the standard FindJNI module to locate the JNI components. # else() + find_package(Java REQUIRED) + include(UseJava) find_package(JNI REQUIRED) endif() diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 67a5a54839f..695dcdee58a 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -171,6 +171,11 @@ commons-lang3 compile + + org.apache.commons + commons-text + compile + org.slf4j slf4j-api @@ -1074,6 +1079,8 @@ --projecttitle Apache Hadoop --usetoday + --fileversions + --dirversions --version ${project.version} diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh index cbedd972188..71ba7fffc10 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh @@ -600,6 +600,7 @@ function hadoop_bootstrap HDDS_LIB_JARS_DIR=${HDDS_LIB_JARS_DIR:-"share/hadoop/hdds/lib"} OZONE_DIR=${OZONE_DIR:-"share/hadoop/ozone"} OZONE_LIB_JARS_DIR=${OZONE_LIB_JARS_DIR:-"share/hadoop/ozone/lib"} + OZONEFS_DIR=${OZONEFS_DIR:-"share/hadoop/ozonefs"} HADOOP_TOOLS_HOME=${HADOOP_TOOLS_HOME:-${HADOOP_HOME}} HADOOP_TOOLS_DIR=${HADOOP_TOOLS_DIR:-"share/hadoop/tools"} diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index 3826f67a5ea..6db085a3261 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -88,7 +88,7 @@ # Extra Java runtime options for all Hadoop commands. We don't support # IPv6 yet/still, so by default the preference is set to IPv4. # export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true" -# For Kerberos debugging, an extended option set logs more invormation +# For Kerberos debugging, an extended option set logs more information # export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -Dsun.security.krb5.debug=true -Dsun.security.spnego.debug" # Some parts of the shell code may do special things dependent upon @@ -120,9 +120,9 @@ esac # # By default, Apache Hadoop overrides Java's CLASSPATH # environment variable. It is configured such -# that it sarts out blank with new entries added after passing +# that it starts out blank with new entries added after passing # a series of checks (file/dir exists, not already listed aka -# de-deduplication). During de-depulication, wildcards and/or +# de-deduplication). During de-deduplication, wildcards and/or # directories are *NOT* expanded to keep it simple. Therefore, # if the computed classpath has two specific mentions of # awesome-methods-1.0.jar, only the first one added will be seen. diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-metrics2.properties b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-metrics2.properties index 16fdcf05629..f061313cb4d 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-metrics2.properties +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-metrics2.properties @@ -47,7 +47,7 @@ #*.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40 # Tag values to use for the ganglia prefix. If not defined no tags are used. -# If '*' all tags are used. If specifiying multiple tags separate them with +# If '*' all tags are used. If specifying multiple tags separate them with # commas. Note that the last segment of the property name is the context name. # # A typical use of tags is separating the metrics by the HDFS rpc port diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-policy.xml b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-policy.xml index cf3dd1f4ec3..bd7c11124f5 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-policy.xml +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-policy.xml @@ -242,4 +242,24 @@ group list is separated by a blank. For e.g. "alice,bob users,wheel". A special value of "*" means all users are allowed. + + + security.applicationmaster-nodemanager.applicationmaster.protocol.acl + * + ACL for ApplicationMasterProtocol, used by the Nodemanager + and ApplicationMasters to communicate. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + + + + security.distributedscheduling.protocol.acl + * + ACL for DistributedSchedulingAMProtocol, used by the Nodemanager + and Resourcemanager to communicate. + The ACL is a comma-separated list of user and group names. The user and + group list is separated by a blank. For e.g. "alice,bob users,wheel". + A special value of "*" means all users are allowed. + diff --git a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties index 5783013040d..aeae2b81d1c 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties +++ b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties @@ -304,29 +304,6 @@ log4j.appender.FSLOGGER.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n log4j.appender.FSLOGGER.MaxFileSize=${hadoop.log.maxfilesize} log4j.appender.FSLOGGER.MaxBackupIndex=${hadoop.log.maxbackupindex} -# -# Add a logger for ozone that is separate from the Datanode. -# -log4j.logger.org.apache.hadoop.ozone=DEBUG,OZONE,FILE - -# Do not log into datanode logs. Remove this line to have single log. -log4j.additivity.org.apache.hadoop.ozone=false - -# For development purposes, log both to console and log file. -log4j.appender.OZONE=org.apache.log4j.ConsoleAppender -log4j.appender.OZONE.Threshold=info -log4j.appender.OZONE.layout=org.apache.log4j.PatternLayout -log4j.appender.OZONE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p \ - %X{component} %X{function} %X{resource} %X{user} %X{request} - %m%n - -# Real ozone logger that writes to ozone.log -log4j.appender.FILE=org.apache.log4j.DailyRollingFileAppender -log4j.appender.FILE.File=${hadoop.log.dir}/ozone.log -log4j.appender.FILE.Threshold=debug -log4j.appender.FILE.layout=org.apache.log4j.PatternLayout -log4j.appender.FILE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p \ -(%F:%L) %X{function} %X{resource} %X{user} %X{request} - \ -%m%n # # Fair scheduler state dump # diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationServlet.java index c5bdf4e021b..ef4eac69145 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationServlet.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationServlet.java @@ -18,7 +18,7 @@ package org.apache.hadoop.conf; -import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; import java.util.Collection; import java.util.Enumeration; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java index be85497209b..b55f84226d3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java @@ -27,22 +27,31 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Seekable; +import org.apache.hadoop.util.CleanerUtil; import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @InterfaceAudience.Private public class CryptoStreamUtils { private static final int MIN_BUFFER_SIZE = 512; - + private static final Logger LOG = + LoggerFactory.getLogger(CryptoStreamUtils.class); + /** Forcibly free the direct buffer. */ public static void freeDB(ByteBuffer buffer) { - if (buffer instanceof sun.nio.ch.DirectBuffer) { - final sun.misc.Cleaner bufferCleaner = - ((sun.nio.ch.DirectBuffer) buffer).cleaner(); - bufferCleaner.clean(); + if (CleanerUtil.UNMAP_SUPPORTED) { + try { + CleanerUtil.getCleaner().freeBuffer(buffer); + } catch (IOException e) { + LOG.info("Failed to free the buffer", e); + } + } else { + LOG.trace(CleanerUtil.UNMAP_NOT_SUPPORTED_REASON); } } - + /** Read crypto buffer size */ public static int getBufferSize(Configuration conf) { return conf.getInt(HADOOP_SECURITY_CRYPTO_BUFFER_SIZE_KEY, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java index fa84c47d26c..3f8b337f357 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java @@ -265,8 +265,7 @@ public void execute() throws IOException { } } } catch (IOException e) { - getOut().println("Cannot list keys for KeyProvider: " + provider - + ": " + e.toString()); + getOut().println("Cannot list keys for KeyProvider: " + provider); throw e; } } @@ -318,12 +317,12 @@ public void execute() throws NoSuchAlgorithmException, IOException { printProviderWritten(); } catch (NoSuchAlgorithmException e) { getOut().println("Cannot roll key: " + keyName + - " within KeyProvider: " + provider + ". " + e.toString()); + " within KeyProvider: " + provider + "."); throw e; } } catch (IOException e1) { getOut().println("Cannot roll key: " + keyName + " within KeyProvider: " - + provider + ". " + e1.toString()); + + provider + "."); throw e1; } } @@ -374,8 +373,8 @@ public boolean validate() { } return cont; } catch (IOException e) { - getOut().println(keyName + " will not be deleted."); - e.printStackTrace(getErr()); + getOut().println(keyName + " will not be deleted. " + + prettifyException(e)); } } return true; @@ -392,7 +391,7 @@ public void execute() throws IOException { getOut().println(keyName + " has been successfully deleted."); printProviderWritten(); } catch (IOException e) { - getOut().println(keyName + " has not been deleted. " + e.toString()); + getOut().println(keyName + " has not been deleted."); throw e; } } @@ -463,13 +462,13 @@ public void execute() throws IOException, NoSuchAlgorithmException { "with options " + options.toString() + "."); printProviderWritten(); } catch (InvalidParameterException e) { - getOut().println(keyName + " has not been created. " + e.toString()); + getOut().println(keyName + " has not been created."); throw e; } catch (IOException e) { - getOut().println(keyName + " has not been created. " + e.toString()); + getOut().println(keyName + " has not been created."); throw e; } catch (NoSuchAlgorithmException e) { - getOut().println(keyName + " has not been created. " + e.toString()); + getOut().println(keyName + " has not been created."); throw e; } } @@ -520,7 +519,7 @@ public void execute() throws NoSuchAlgorithmException, IOException { printProviderWritten(); } catch (IOException e) { getOut().println("Cannot invalidate cache for key: " + keyName + - " within KeyProvider: " + provider + ". " + e.toString()); + " within KeyProvider: " + provider + "."); throw e; } } @@ -531,6 +530,17 @@ public String getUsage() { } } + @Override + protected void printException(Exception e){ + getErr().println("Executing command failed with " + + "the following exception: " + prettifyException(e)); + } + + private String prettifyException(Exception e) { + return e.getClass().getSimpleName() + ": " + + e.getLocalizedMessage().split("\n")[0]; + } + /** * main() entry point for the KeyShell. While strictly speaking the * return is void, it will System.exit() with a return code: 0 is for diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java index 7b4607507b9..8125510a72e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java @@ -56,6 +56,7 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.lang.reflect.UndeclaredThrowableException; +import java.net.ConnectException; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.net.MalformedURLException; @@ -478,10 +479,14 @@ public HttpURLConnection run() throws Exception { return authUrl.openConnection(url, authToken, doAsUser); } }); + } catch (ConnectException ex) { + String msg = "Failed to connect to: " + url.toString(); + LOG.warn(msg); + throw new IOException(msg, ex); + } catch (SocketTimeoutException ex) { + LOG.warn("Failed to connect to {}:{}", url.getHost(), url.getPort()); + throw ex; } catch (IOException ex) { - if (ex instanceof SocketTimeoutException) { - LOG.warn("Failed to connect to {}:{}", url.getHost(), url.getPort()); - } throw ex; } catch (UndeclaredThrowableException ex) { throw new IOException(ex.getUndeclaredThrowable()); @@ -1036,13 +1041,13 @@ private String getDoAsUser() throws IOException { public Token run() throws Exception { // Not using the cached token here.. Creating a new token here // everytime. - LOG.debug("Getting new token from {}, renewer:{}", url, renewer); + LOG.info("Getting new token from {}, renewer:{}", url, renewer); return authUrl.getDelegationToken(url, new DelegationTokenAuthenticatedURL.Token(), renewer, doAsUser); } }); if (token != null) { - LOG.debug("New token received: ({})", token); + LOG.info("New token received: ({})", token); credentials.addToken(token.getService(), token); tokens = new Token[] { token }; } else { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java index 42cd47dd7a5..e68e8448aa3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.InterruptedIOException; +import java.net.ConnectException; import java.security.GeneralSecurityException; import java.security.NoSuchAlgorithmException; import java.util.Arrays; @@ -27,6 +28,8 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; +import javax.net.ssl.SSLHandshakeException; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.CryptoExtension; @@ -110,12 +113,11 @@ public LoadBalancingKMSClientProvider(KMSClientProvider[] providers, return providers; } - private T doOp(ProviderCallable op, int currPos) - throws IOException { + private T doOp(ProviderCallable op, int currPos, + boolean isIdempotent) throws IOException { if (providers.length == 0) { throw new IOException("No providers configured !"); } - IOException ex = null; int numFailovers = 0; for (int i = 0;; i++, numFailovers++) { KMSClientProvider provider = providers[(currPos + i) % providers.length]; @@ -130,11 +132,18 @@ public LoadBalancingKMSClientProvider(KMSClientProvider[] providers, } catch (IOException ioe) { LOG.warn("KMS provider at [{}] threw an IOException: ", provider.getKMSUrl(), ioe); - ex = ioe; - + // SSLHandshakeException can occur here because of lost connection + // with the KMS server, creating a ConnectException from it, + // so that the FailoverOnNetworkExceptionRetry policy will retry + if (ioe instanceof SSLHandshakeException) { + Exception cause = ioe; + ioe = new ConnectException("SSLHandshakeException: " + + cause.getMessage()); + ioe.initCause(cause); + } RetryAction action = null; try { - action = retryPolicy.shouldRetry(ioe, 0, numFailovers, false); + action = retryPolicy.shouldRetry(ioe, 0, numFailovers, isIdempotent); } catch (Exception e) { if (e instanceof IOException) { throw (IOException)e; @@ -145,7 +154,7 @@ public LoadBalancingKMSClientProvider(KMSClientProvider[] providers, // compatible with earlier versions of LBKMSCP if (action.action == RetryAction.RetryDecision.FAIL && numFailovers >= providers.length - 1) { - LOG.warn("Aborting since the Request has failed with all KMS" + LOG.error("Aborting since the Request has failed with all KMS" + " providers(depending on {}={} setting and numProviders={})" + " in the group OR the exception is not recoverable", CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, @@ -153,7 +162,7 @@ public LoadBalancingKMSClientProvider(KMSClientProvider[] providers, CommonConfigurationKeysPublic. KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, providers.length), providers.length); - throw ex; + throw ioe; } if (((numFailovers + 1) % providers.length) == 0) { // Sleep only after we try all the providers for every cycle. @@ -192,7 +201,7 @@ private int nextIdx() { public Token[] call(KMSClientProvider provider) throws IOException { return provider.addDelegationTokens(renewer, credentials); } - }, nextIdx()); + }, nextIdx(), false); } @Override @@ -202,7 +211,7 @@ public long renewDelegationToken(final Token token) throws IOException { public Long call(KMSClientProvider provider) throws IOException { return provider.renewDelegationToken(token); } - }, nextIdx()); + }, nextIdx(), false); } @Override @@ -213,7 +222,7 @@ public Void call(KMSClientProvider provider) throws IOException { provider.cancelDelegationToken(token); return null; } - }, nextIdx()); + }, nextIdx(), false); } // This request is sent to all providers in the load-balancing group @@ -266,7 +275,7 @@ public EncryptedKeyVersion call(KMSClientProvider provider) throws IOException, GeneralSecurityException { return provider.generateEncryptedKey(encryptionKeyName); } - }, nextIdx()); + }, nextIdx(), true); } catch (WrapperException we) { if (we.getCause() instanceof GeneralSecurityException) { throw (GeneralSecurityException) we.getCause(); @@ -286,7 +295,7 @@ public KeyVersion call(KMSClientProvider provider) throws IOException, GeneralSecurityException { return provider.decryptEncryptedKey(encryptedKeyVersion); } - }, nextIdx()); + }, nextIdx(), true); } catch (WrapperException we) { if (we.getCause() instanceof GeneralSecurityException) { throw (GeneralSecurityException) we.getCause(); @@ -306,7 +315,7 @@ public EncryptedKeyVersion call(KMSClientProvider provider) throws IOException, GeneralSecurityException { return provider.reencryptEncryptedKey(ekv); } - }, nextIdx()); + }, nextIdx(), true); } catch (WrapperException we) { if (we.getCause() instanceof GeneralSecurityException) { throw (GeneralSecurityException) we.getCause(); @@ -326,7 +335,7 @@ public Void call(KMSClientProvider provider) provider.reencryptEncryptedKeys(ekvs); return null; } - }, nextIdx()); + }, nextIdx(), true); } catch (WrapperException we) { if (we.getCause() instanceof GeneralSecurityException) { throw (GeneralSecurityException) we.getCause(); @@ -342,7 +351,7 @@ public KeyVersion getKeyVersion(final String versionName) throws IOException { public KeyVersion call(KMSClientProvider provider) throws IOException { return provider.getKeyVersion(versionName); } - }, nextIdx()); + }, nextIdx(), true); } @Override @@ -352,7 +361,7 @@ public KeyVersion call(KMSClientProvider provider) throws IOException { public List call(KMSClientProvider provider) throws IOException { return provider.getKeys(); } - }, nextIdx()); + }, nextIdx(), true); } @Override @@ -362,7 +371,7 @@ public KeyVersion call(KMSClientProvider provider) throws IOException { public Metadata[] call(KMSClientProvider provider) throws IOException { return provider.getKeysMetadata(names); } - }, nextIdx()); + }, nextIdx(), true); } @Override @@ -373,7 +382,7 @@ public KeyVersion call(KMSClientProvider provider) throws IOException { throws IOException { return provider.getKeyVersions(name); } - }, nextIdx()); + }, nextIdx(), true); } @Override @@ -383,8 +392,9 @@ public KeyVersion getCurrentKey(final String name) throws IOException { public KeyVersion call(KMSClientProvider provider) throws IOException { return provider.getCurrentKey(name); } - }, nextIdx()); + }, nextIdx(), true); } + @Override public Metadata getMetadata(final String name) throws IOException { return doOp(new ProviderCallable() { @@ -392,7 +402,7 @@ public Metadata getMetadata(final String name) throws IOException { public Metadata call(KMSClientProvider provider) throws IOException { return provider.getMetadata(name); } - }, nextIdx()); + }, nextIdx(), true); } @Override @@ -403,7 +413,7 @@ public KeyVersion createKey(final String name, final byte[] material, public KeyVersion call(KMSClientProvider provider) throws IOException { return provider.createKey(name, material, options); } - }, nextIdx()); + }, nextIdx(), false); } @Override @@ -416,7 +426,7 @@ public KeyVersion call(KMSClientProvider provider) throws IOException, NoSuchAlgorithmException { return provider.createKey(name, options); } - }, nextIdx()); + }, nextIdx(), false); } catch (WrapperException e) { if (e.getCause() instanceof GeneralSecurityException) { throw (NoSuchAlgorithmException) e.getCause(); @@ -433,7 +443,7 @@ public Void call(KMSClientProvider provider) throws IOException { provider.deleteKey(name); return null; } - }, nextIdx()); + }, nextIdx(), false); } @Override @@ -444,7 +454,7 @@ public KeyVersion rollNewVersion(final String name, final byte[] material) public KeyVersion call(KMSClientProvider provider) throws IOException { return provider.rollNewVersion(name, material); } - }, nextIdx()); + }, nextIdx(), false); invalidateCache(name); return newVersion; } @@ -459,7 +469,7 @@ public KeyVersion call(KMSClientProvider provider) throws IOException, NoSuchAlgorithmException { return provider.rollNewVersion(name); } - }, nextIdx()); + }, nextIdx(), false); invalidateCache(name); return newVersion; } catch (WrapperException e) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java index 75622ad3742..c56f6e09e2d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java @@ -27,10 +27,12 @@ import java.util.Arrays; import java.util.EnumSet; +import java.util.NoSuchElementException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.Options.ChecksumOpt; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.Progressable; import org.slf4j.Logger; @@ -527,4 +529,39 @@ public boolean reportChecksumFailure(Path f, FSDataInputStream in, } return results.toArray(new FileStatus[results.size()]); } + + @Override + public RemoteIterator listLocatedStatus(final Path f) + throws AccessControlException, FileNotFoundException, + UnresolvedLinkException, IOException { + final RemoteIterator iter = + getMyFs().listLocatedStatus(f); + return new RemoteIterator() { + + private LocatedFileStatus next = null; + + @Override + public boolean hasNext() throws IOException { + while (next == null && iter.hasNext()) { + LocatedFileStatus unfilteredNext = iter.next(); + if (!isChecksumFile(unfilteredNext.getPath())) { + next = unfilteredNext; + } + } + return next != null; + } + + @Override + public LocatedFileStatus next() throws IOException { + if (!hasNext()) { + throw new NoSuchElementException(); + } + LocatedFileStatus tmp = next; + next = null; + return tmp; + } + + }; + } + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java index c7f32f92a69..b101b3b3096 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java @@ -905,5 +905,14 @@ public static final String HADOOP_TAGS_SYSTEM = "hadoop.tags.system"; public static final String HADOOP_TAGS_CUSTOM = "hadoop.tags.custom"; + + /** Configuration option for the shutdown hook manager shutdown time: + * {@value}. */ + public static final String SERVICE_SHUTDOWN_TIMEOUT = + "hadoop.service.shutdown.timeout"; + + /** Default shutdown hook timeout: {@value} seconds. */ + public static final long SERVICE_SHUTDOWN_TIMEOUT_DEFAULT = 30; + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java index 383d65a06a3..c3e088b66d8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java @@ -116,7 +116,14 @@ * Enforce the file to be a replicated file, no matter what its parent * directory's replication or erasure coding policy is. */ - SHOULD_REPLICATE((short) 0x80); + SHOULD_REPLICATE((short) 0x80), + + /** + * Advise that the first block replica NOT take into account DataNode + * locality. The first block replica should be placed randomly within the + * cluster. Subsequent block replicas should follow DataNode locality rules. + */ + IGNORE_CLIENT_LOCALITY((short) 0x100); private final short mode; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemMultipartUploader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemMultipartUploader.java index b57ff3dc3a6..f13b50bd20c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemMultipartUploader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemMultipartUploader.java @@ -16,12 +16,6 @@ */ package org.apache.hadoop.fs; -import com.google.common.base.Charsets; -import org.apache.commons.compress.utils.IOUtils; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.permission.FsPermission; - import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; @@ -29,13 +23,26 @@ import java.util.List; import java.util.stream.Collectors; +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; + +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.permission.FsPermission; + +import static org.apache.hadoop.fs.Path.mergePaths; + /** * A MultipartUploader that uses the basic FileSystem commands. * This is done in three stages: - * Init - create a temp _multipart directory. - * PutPart - copying the individual parts of the file to the temp directory. - * Complete - use {@link FileSystem#concat} to merge the files; and then delete - * the temp directory. + *
    + *
  • Init - create a temp {@code _multipart} directory.
  • + *
  • PutPart - copying the individual parts of the file to the temp + * directory.
  • + *
  • Complete - use {@link FileSystem#concat} to merge the files; + * and then delete the temp directory.
  • + *
*/ public class FileSystemMultipartUploader extends MultipartUploader { @@ -61,31 +68,50 @@ public PartHandle putPart(Path filePath, InputStream inputStream, throws IOException { byte[] uploadIdByteArray = uploadId.toByteArray(); + checkUploadId(uploadIdByteArray); Path collectorPath = new Path(new String(uploadIdByteArray, 0, uploadIdByteArray.length, Charsets.UTF_8)); Path partPath = - Path.mergePaths(collectorPath, Path.mergePaths(new Path(Path.SEPARATOR), + mergePaths(collectorPath, mergePaths(new Path(Path.SEPARATOR), new Path(Integer.toString(partNumber) + ".part"))); - FSDataOutputStreamBuilder outputStream = fs.createFile(partPath); - FSDataOutputStream fsDataOutputStream = outputStream.build(); - IOUtils.copy(inputStream, fsDataOutputStream, 4096); - fsDataOutputStream.close(); + try(FSDataOutputStream fsDataOutputStream = + fs.createFile(partPath).build()) { + IOUtils.copy(inputStream, fsDataOutputStream, 4096); + } finally { + org.apache.hadoop.io.IOUtils.cleanupWithLogger(LOG, inputStream); + } return BBPartHandle.from(ByteBuffer.wrap( partPath.toString().getBytes(Charsets.UTF_8))); } private Path createCollectorPath(Path filePath) { - return Path.mergePaths(filePath.getParent(), - Path.mergePaths(new Path(filePath.getName().split("\\.")[0]), - Path.mergePaths(new Path("_multipart"), + return mergePaths(filePath.getParent(), + mergePaths(new Path(filePath.getName().split("\\.")[0]), + mergePaths(new Path("_multipart"), new Path(Path.SEPARATOR)))); } + private PathHandle getPathHandle(Path filePath) throws IOException { + FileStatus status = fs.getFileStatus(filePath); + return fs.getPathHandle(status); + } + @Override @SuppressWarnings("deprecation") // rename w/ OVERWRITE public PathHandle complete(Path filePath, List> handles, UploadHandle multipartUploadId) throws IOException { + + checkUploadId(multipartUploadId.toByteArray()); + + if (handles.isEmpty()) { + throw new IOException("Empty upload"); + } + // If destination already exists, we believe we already completed it. + if (fs.exists(filePath)) { + return getPathHandle(filePath); + } + handles.sort(Comparator.comparing(Pair::getKey)); List partHandles = handles .stream() @@ -97,22 +123,25 @@ public PathHandle complete(Path filePath, .collect(Collectors.toList()); Path collectorPath = createCollectorPath(filePath); - Path filePathInsideCollector = Path.mergePaths(collectorPath, + Path filePathInsideCollector = mergePaths(collectorPath, new Path(Path.SEPARATOR + filePath.getName())); fs.create(filePathInsideCollector).close(); fs.concat(filePathInsideCollector, partHandles.toArray(new Path[handles.size()])); fs.rename(filePathInsideCollector, filePath, Options.Rename.OVERWRITE); fs.delete(collectorPath, true); - FileStatus status = fs.getFileStatus(filePath); - return fs.getPathHandle(status); + return getPathHandle(filePath); } @Override public void abort(Path filePath, UploadHandle uploadId) throws IOException { byte[] uploadIdByteArray = uploadId.toByteArray(); + checkUploadId(uploadIdByteArray); Path collectorPath = new Path(new String(uploadIdByteArray, 0, uploadIdByteArray.length, Charsets.UTF_8)); + + // force a check for a file existing; raises FNFE if not found + fs.getFileStatus(collectorPath); fs.delete(collectorPath, true); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java index 1c216f430af..a4b158a85ab 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java @@ -24,8 +24,6 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.util.*; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -78,11 +76,25 @@ /** Used when size of file to be allocated is unknown. */ public static final int SIZE_UNKNOWN = -1; + private final DiskValidator diskValidator; + /**Create an allocator object * @param contextCfgItemName */ public LocalDirAllocator(String contextCfgItemName) { this.contextCfgItemName = contextCfgItemName; + try { + this.diskValidator = DiskValidatorFactory.getInstance( + BasicDiskValidator.NAME); + } catch (DiskErrorException e) { + throw new RuntimeException(e); + } + } + + public LocalDirAllocator(String contextCfgItemName, + DiskValidator diskValidator) { + this.contextCfgItemName = contextCfgItemName; + this.diskValidator = diskValidator; } /** This method must be used to obtain the dir allocation context for a @@ -96,7 +108,8 @@ private AllocatorPerContext obtainContext(String contextCfgItemName) { AllocatorPerContext l = contexts.get(contextCfgItemName); if (l == null) { contexts.put(contextCfgItemName, - (l = new AllocatorPerContext(contextCfgItemName))); + (l = new AllocatorPerContext(contextCfgItemName, + diskValidator))); } return l; } @@ -255,6 +268,7 @@ int getCurrentDirectoryIndex() { // NOTE: the context must be accessed via a local reference as it // may be updated at any time to reference a different context private AtomicReference currentContext; + private final DiskValidator diskValidator; private static class Context { private AtomicInteger dirNumLastAccessed = new AtomicInteger(0); @@ -280,9 +294,11 @@ public int getAndIncrDirNumLastAccessed(int delta) { } } - public AllocatorPerContext(String contextCfgItemName) { + public AllocatorPerContext(String contextCfgItemName, + DiskValidator diskValidator) { this.contextCfgItemName = contextCfgItemName; this.currentContext = new AtomicReference(new Context()); + this.diskValidator = diskValidator; } /** This method gets called everytime before any read/write to make sure @@ -312,7 +328,7 @@ private Context confChanged(Configuration conf) ? new File(ctx.localFS.makeQualified(tmpDir).toUri()) : new File(dirStrings[i]); - DiskChecker.checkDir(tmpFile); + diskValidator.checkStatus(tmpFile); dirs.add(new Path(tmpFile.getPath())); dfList.add(new DF(tmpFile, 30000)); } catch (DiskErrorException de) { @@ -348,7 +364,7 @@ private Path createPath(Path dir, String path, //check whether we are able to create a directory here. If the disk //happens to be RDONLY we will fail try { - DiskChecker.checkDir(new File(file.getParent().toUri().getPath())); + diskValidator.checkStatus(new File(file.getParent().toUri().getPath())); return file; } catch (DiskErrorException d) { LOG.warn("Disk Error Exception: ", d); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java index 24a92169a2b..76f58d35978 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java @@ -21,17 +21,21 @@ import java.io.InputStream; import java.util.List; -import org.apache.commons.lang3.tuple.Pair; - +import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.lang3.tuple.Pair; + /** * MultipartUploader is an interface for copying files multipart and across * multiple nodes. Users should: - * 1. Initialize an upload - * 2. Upload parts in any order - * 3. Complete the upload in order to have it materialize in the destination FS. + *
    + *
  1. Initialize an upload
  2. + *
  3. Upload parts in any order
  4. + *
  5. Complete the upload in order to have it materialize in the destination + * FS
  6. + *
* * Implementers should make sure that the complete function should make sure * that 'complete' will reorder parts if the destination FS doesn't already @@ -45,7 +49,7 @@ * Initialize a multipart upload. * @param filePath Target path for upload. * @return unique identifier associating part uploads. - * @throws IOException + * @throws IOException IO failure */ public abstract UploadHandle initialize(Path filePath) throws IOException; @@ -53,12 +57,13 @@ * Put part as part of a multipart upload. It should be possible to have * parts uploaded in any order (or in parallel). * @param filePath Target path for upload (same as {@link #initialize(Path)}). - * @param inputStream Data for this part. + * @param inputStream Data for this part. Implementations MUST close this + * stream after reading in the data. * @param partNumber Index of the part relative to others. * @param uploadId Identifier from {@link #initialize(Path)}. * @param lengthInBytes Target length to read from the stream. * @return unique PartHandle identifier for the uploaded part. - * @throws IOException + * @throws IOException IO failure */ public abstract PartHandle putPart(Path filePath, InputStream inputStream, int partNumber, UploadHandle uploadId, long lengthInBytes) @@ -67,12 +72,12 @@ public abstract PartHandle putPart(Path filePath, InputStream inputStream, /** * Complete a multipart upload. * @param filePath Target path for upload (same as {@link #initialize(Path)}. - * @param handles Identifiers with associated part numbers from - * {@link #putPart(Path, InputStream, int, UploadHandle, long)}. + * @param handles non-empty list of identifiers with associated part numbers + * from {@link #putPart(Path, InputStream, int, UploadHandle, long)}. * Depending on the backend, the list order may be significant. * @param multipartUploadId Identifier from {@link #initialize(Path)}. * @return unique PathHandle identifier for the uploaded file. - * @throws IOException + * @throws IOException IO failure or the handle list is empty. */ public abstract PathHandle complete(Path filePath, List> handles, UploadHandle multipartUploadId) @@ -81,10 +86,20 @@ public abstract PathHandle complete(Path filePath, /** * Aborts a multipart upload. * @param filePath Target path for upload (same as {@link #initialize(Path)}. - * @param multipartuploadId Identifier from {@link #initialize(Path)}. - * @throws IOException + * @param multipartUploadId Identifier from {@link #initialize(Path)}. + * @throws IOException IO failure */ - public abstract void abort(Path filePath, UploadHandle multipartuploadId) + public abstract void abort(Path filePath, UploadHandle multipartUploadId) throws IOException; + /** + * Utility method to validate uploadIDs + * @param uploadId + * @throws IllegalArgumentException + */ + protected void checkUploadId(byte[] uploadId) + throws IllegalArgumentException { + Preconditions.checkArgument(uploadId.length > 0, + "Empty UploadId is not valid"); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartHandle.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartHandle.java index df70b746cce..47ce3ab1894 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartHandle.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PartHandle.java @@ -16,14 +16,14 @@ */ package org.apache.hadoop.fs; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; - import java.io.Serializable; import java.nio.ByteBuffer; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + /** - * Opaque, serializable reference to an part id for multipart uploads. + * Opaque, serializable reference to a part id for multipart uploads. */ @InterfaceAudience.Public @InterfaceStability.Evolving diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathHandle.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathHandle.java index 60aa6a53bfb..d5304ba5493 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathHandle.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathHandle.java @@ -25,15 +25,16 @@ /** * Opaque, serializable reference to an entity in the FileSystem. May contain - * metadata sufficient to resolve or verify subsequent accesses indepedent of + * metadata sufficient to resolve or verify subsequent accesses independent of * other modifications to the FileSystem. */ @InterfaceAudience.Public @InterfaceStability.Evolving +@FunctionalInterface public interface PathHandle extends Serializable { /** - * @return Serialized from in bytes. + * @return Serialized form in bytes. */ default byte[] toByteArray() { ByteBuffer bb = bytes(); @@ -42,6 +43,10 @@ return ret; } + /** + * Get the bytes of this path handle. + * @return the bytes to get to the process completing the upload. + */ ByteBuffer bytes(); @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java index 265e967b01e..6e101a26e24 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java @@ -148,6 +148,20 @@ public boolean moveToTrash(Path path) throws IOException { LOG.warn("Can't create(mkdir) trash directory: " + baseTrashPath); return false; } + } catch (FileAlreadyExistsException e) { + // find the path which is not a directory, and modify baseTrashPath + // & trashPath, then mkdirs + Path existsFilePath = baseTrashPath; + while (!fs.exists(existsFilePath)) { + existsFilePath = existsFilePath.getParent(); + } + baseTrashPath = new Path(baseTrashPath.toString().replace( + existsFilePath.toString(), existsFilePath.toString() + Time.now()) + ); + trashPath = new Path(baseTrashPath, trashPath.getName()); + // retry, ignore current failure + --i; + continue; } catch (IOException e) { LOG.warn("Can't create trash directory: " + baseTrashPath, e); cause = e; @@ -235,7 +249,7 @@ public Runnable getEmptier() throws IOException { LOG.info("Namenode trash configuration: Deletion interval = " + (deletionInterval / MSECS_PER_MINUTE) + " minutes, Emptier interval = " - + (emptierInterval / MSECS_PER_MINUTE) + " minutes."); + + (this.emptierInterval / MSECS_PER_MINUTE) + " minutes."); } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java index 4a134148a09..784bbf33f78 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java @@ -66,7 +66,7 @@ public static void registerCommands(CommandFactory factory) { factory.registerCommands(Tail.class); factory.registerCommands(Head.class); factory.registerCommands(Test.class); - factory.registerCommands(Touch.class); + factory.registerCommands(TouchCommands.class); factory.registerCommands(Truncate.class); factory.registerCommands(SnapshotCommands.class); factory.registerCommands(XAttrCommands.class); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Mkdir.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Mkdir.java index 9f39da29ce2..5828b0bbf4d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Mkdir.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Mkdir.java @@ -68,11 +68,14 @@ protected void processPath(PathData item) throws IOException { @Override protected void processNonexistentPath(PathData item) throws IOException { - // check if parent exists. this is complicated because getParent(a/b/c/) returns a/b/c, but - // we want a/b - if (!createParents && - !item.fs.exists(new Path(item.path.toString()).getParent())) { - throw new PathNotFoundException(item.toString()); + if (!createParents) { + // check if parent exists. this is complicated because getParent(a/b/c/) returns a/b/c, but + // we want a/b + final Path itemPath = new Path(item.path.toString()); + final Path itemParentPath = itemPath.getParent(); + if (!item.fs.exists(itemParentPath)) { + throw new PathNotFoundException(itemParentPath.toString()); + } } if (!item.fs.mkdirs(item.path)) { throw new PathIOException(item.toString()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Touch.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Touch.java deleted file mode 100644 index a6c751ea6f0..00000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Touch.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.shell; - -import java.io.IOException; -import java.util.LinkedList; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.PathIOException; -import org.apache.hadoop.fs.PathIsDirectoryException; -import org.apache.hadoop.fs.PathNotFoundException; - -/** - * Unix touch like commands - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable - -class Touch extends FsCommand { - public static void registerCommands(CommandFactory factory) { - factory.addClass(Touchz.class, "-touchz"); - } - - /** - * (Re)create zero-length file at the specified path. - * This will be replaced by a more UNIX-like touch when files may be - * modified. - */ - public static class Touchz extends Touch { - public static final String NAME = "touchz"; - public static final String USAGE = " ..."; - public static final String DESCRIPTION = - "Creates a file of zero length " + - "at with current time as the timestamp of that . " + - "An error is returned if the file exists with non-zero length\n"; - - @Override - protected void processOptions(LinkedList args) { - CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE); - cf.parse(args); - } - - @Override - protected void processPath(PathData item) throws IOException { - if (item.stat.isDirectory()) { - // TODO: handle this - throw new PathIsDirectoryException(item.toString()); - } - if (item.stat.getLen() != 0) { - throw new PathIOException(item.toString(), "Not a zero-length file"); - } - touchz(item); - } - - @Override - protected void processNonexistentPath(PathData item) throws IOException { - if (!item.parentExists()) { - throw new PathNotFoundException(item.toString()) - .withFullyQualifiedPath(item.path.toUri().toString()); - } - touchz(item); - } - - private void touchz(PathData item) throws IOException { - item.fs.create(item.path).close(); - } - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/TouchCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/TouchCommands.java new file mode 100644 index 00000000000..be174b5e9cf --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/TouchCommands.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.shell; + +import java.io.IOException; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.LinkedList; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.PathIsDirectoryException; +import org.apache.hadoop.fs.PathNotFoundException; +import org.apache.hadoop.util.StringUtils; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Unix touch like commands + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable + +public class TouchCommands extends FsCommand { + public static void registerCommands(CommandFactory factory) { + factory.addClass(Touchz.class, "-touchz"); + factory.addClass(Touch.class, "-touch"); + } + + /** + * (Re)create zero-length file at the specified path. + * This will be replaced by a more UNIX-like touch when files may be + * modified. + */ + public static class Touchz extends TouchCommands { + public static final String NAME = "touchz"; + public static final String USAGE = " ..."; + public static final String DESCRIPTION = + "Creates a file of zero length " + + "at with current time as the timestamp of that . " + + "An error is returned if the file exists with non-zero length\n"; + + @Override + protected void processOptions(LinkedList args) { + CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE); + cf.parse(args); + } + + @Override + protected void processPath(PathData item) throws IOException { + if (item.stat.isDirectory()) { + // TODO: handle this + throw new PathIsDirectoryException(item.toString()); + } + if (item.stat.getLen() != 0) { + throw new PathIOException(item.toString(), "Not a zero-length file"); + } + touchz(item); + } + + @Override + protected void processNonexistentPath(PathData item) throws IOException { + if (!item.parentExists()) { + throw new PathNotFoundException(item.toString()) + .withFullyQualifiedPath(item.path.toUri().toString()); + } + touchz(item); + } + + private void touchz(PathData item) throws IOException { + item.fs.create(item.path).close(); + } + } + + /** + * A UNIX like touch command. + */ + public static class Touch extends TouchCommands { + private static final String OPTION_CHANGE_ONLY_MODIFICATION_TIME = "m"; + private static final String OPTION_CHANGE_ONLY_ACCESS_TIME = "a"; + private static final String OPTION_USE_TIMESTAMP = "t"; + private static final String OPTION_DO_NOT_CREATE_FILE = "c"; + + public static final String NAME = "touch"; + public static final String USAGE = "[-" + OPTION_CHANGE_ONLY_ACCESS_TIME + + "] [-" + OPTION_CHANGE_ONLY_MODIFICATION_TIME + "] [-" + + OPTION_USE_TIMESTAMP + " TIMESTAMP ] [-" + OPTION_DO_NOT_CREATE_FILE + + "] ..."; + public static final String DESCRIPTION = + "Updates the access and modification times of the file specified by the" + + " to the current time. If the file does not exist, then a zero" + + " length file is created at with current time as the timestamp" + + " of that .\n" + + "-" + OPTION_CHANGE_ONLY_ACCESS_TIME + + " Change only the access time \n" + "-" + + OPTION_CHANGE_ONLY_MODIFICATION_TIME + + " Change only the modification time \n" + "-" + + OPTION_USE_TIMESTAMP + " TIMESTAMP" + + " Use specified timestamp (in format yyyyMMddHHmmss) instead of current time \n" + + "-" + OPTION_DO_NOT_CREATE_FILE + " Do not create any files"; + + private boolean changeModTime = false; + private boolean changeAccessTime = false; + private boolean doNotCreate = false; + private String timestamp; + private final SimpleDateFormat dateFormat = + new SimpleDateFormat("yyyyMMdd:HHmmss"); + + @InterfaceAudience.Private + @VisibleForTesting + public DateFormat getDateFormat() { + return dateFormat; + } + + @Override + protected void processOptions(LinkedList args) { + this.timestamp = + StringUtils.popOptionWithArgument("-" + OPTION_USE_TIMESTAMP, args); + + CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE, + OPTION_USE_TIMESTAMP, OPTION_CHANGE_ONLY_ACCESS_TIME, + OPTION_CHANGE_ONLY_MODIFICATION_TIME); + cf.parse(args); + this.changeModTime = cf.getOpt(OPTION_CHANGE_ONLY_MODIFICATION_TIME); + this.changeAccessTime = cf.getOpt(OPTION_CHANGE_ONLY_ACCESS_TIME); + this.doNotCreate = cf.getOpt(OPTION_DO_NOT_CREATE_FILE); + } + + @Override + protected void processPath(PathData item) throws IOException { + if (item.stat.isDirectory()) { + throw new PathIsDirectoryException(item.toString()); + } + touch(item); + } + + @Override + protected void processNonexistentPath(PathData item) throws IOException { + if (!item.parentExists()) { + throw new PathNotFoundException(item.toString()) + .withFullyQualifiedPath(item.path.toUri().toString()); + } + touch(item); + } + + private void touch(PathData item) throws IOException { + if (!item.fs.exists(item.path)) { + if (doNotCreate) { + return; + } + item.fs.create(item.path).close(); + if (timestamp != null) { + // update the time only if user specified a timestamp using -t option. + updateTime(item); + } + } else { + updateTime(item); + } + } + + private void updateTime(PathData item) throws IOException { + long time = System.currentTimeMillis(); + if (timestamp != null) { + try { + time = dateFormat.parse(timestamp).getTime(); + } catch (ParseException e) { + throw new IllegalArgumentException( + "Unable to parse the specified timestamp " + timestamp, e); + } + } + if (changeModTime ^ changeAccessTime) { + long atime = changeModTime ? -1 : time; + long mtime = changeAccessTime ? -1 : time; + item.fs.setTimes(item.path, mtime, atime); + } else { + item.fs.setTimes(item.path, time, time); + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java index 9b7d7ba5d1a..e955979acde 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java @@ -574,8 +574,11 @@ private int help(String[] argv) { } return 0; } - - protected static class UsageInfo { + + /** + * UsageInfo class holds args and help details. + */ + public static class UsageInfo { public final String args; public final String help; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java index 2435671a31a..d2ba469ab43 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java @@ -141,6 +141,10 @@ "hadoop.http.selector.count"; // -1 to use default behavior of setting count based on CPU core count public static final int HTTP_SELECTOR_COUNT_DEFAULT = -1; + // idle timeout in milliseconds + public static final String HTTP_IDLE_TIMEOUT_MS_KEY = + "hadoop.http.idle_timeout.ms"; + public static final int HTTP_IDLE_TIMEOUT_MS_DEFAULT = 10000; public static final String HTTP_TEMP_DIR_KEY = "hadoop.http.temp.dir"; public static final String FILTER_INITIALIZER_PROPERTY @@ -445,6 +449,8 @@ public HttpServer2 build() throws IOException { int responseHeaderSize = conf.getInt( HTTP_MAX_RESPONSE_HEADER_SIZE_KEY, HTTP_MAX_RESPONSE_HEADER_SIZE_DEFAULT); + int idleTimeout = conf.getInt(HTTP_IDLE_TIMEOUT_MS_KEY, + HTTP_IDLE_TIMEOUT_MS_DEFAULT); HttpConfiguration httpConfig = new HttpConfiguration(); httpConfig.setRequestHeaderSize(requestHeaderSize); @@ -470,6 +476,7 @@ public HttpServer2 build() throws IOException { connector.setHost(ep.getHost()); connector.setPort(ep.getPort() == -1 ? 0 : ep.getPort()); connector.setAcceptQueueSize(backlogSize); + connector.setIdleTimeout(idleTimeout); server.addListener(connector); } server.loadListeners(); @@ -483,7 +490,13 @@ private ServerConnector createHttpChannelConnector( conf.getInt(HTTP_SELECTOR_COUNT_KEY, HTTP_SELECTOR_COUNT_DEFAULT)); ConnectionFactory connFactory = new HttpConnectionFactory(httpConfig); conn.addConnectionFactory(connFactory); - configureChannelConnector(conn); + if(Shell.WINDOWS) { + // result of setting the SO_REUSEADDR flag is different on Windows + // http://msdn.microsoft.com/en-us/library/ms740621(v=vs.85).aspx + // without this 2 NN's can start on the same machine and listen on + // the same port with indeterminate routing of incoming requests to them + conn.setReuseAddress(false); + } return conn; } @@ -659,17 +672,6 @@ private static void addNoCacheFilter(ServletContextHandler ctxt) { Collections. emptyMap(), new String[] { "/*" }); } - private static void configureChannelConnector(ServerConnector c) { - c.setIdleTimeout(10000); - if(Shell.WINDOWS) { - // result of setting the SO_REUSEADDR flag is different on Windows - // http://msdn.microsoft.com/en-us/library/ms740621(v=vs.85).aspx - // without this 2 NN's can start on the same machine and listen on - // the same port with indeterminate routing of incoming requests to them - c.setReuseAddress(false); - } - } - /** Get an array of FilterConfiguration specified in the conf */ private static FilterInitializer[] getFilterInitializers(Configuration conf) { if (conf == null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/IsActiveServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/IsActiveServlet.java new file mode 100644 index 00000000000..3838beb1990 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/IsActiveServlet.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.http; + +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.IOException; + +/** + * Used by Load Balancers to detect the active NameNode/ResourceManager/Router. + */ +public abstract class IsActiveServlet extends HttpServlet { + + /** Default serial identifier. */ + private static final long serialVersionUID = 1L; + + public static final String SERVLET_NAME = "isActive"; + public static final String PATH_SPEC = "/isActive"; + + public static final String RESPONSE_ACTIVE = + "I am Active!"; + + public static final String RESPONSE_NOT_ACTIVE = + "I am not Active!"; + + /** + * Check whether this instance is the Active one. + * @param req HTTP request + * @param resp HTTP response to write to + */ + @Override + public void doGet( + final HttpServletRequest req, final HttpServletResponse resp) + throws IOException { + + // By default requests are persistent. We don't want long-lived connections + // on server side. + resp.addHeader("Connection", "close"); + + if (!isActive()) { + // Report not SC_OK + resp.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED, + RESPONSE_NOT_ACTIVE); + return; + } + resp.setStatus(HttpServletResponse.SC_OK); + resp.getWriter().write(RESPONSE_ACTIVE); + resp.getWriter().flush(); + } + + /** + * @return true if this instance is in Active HA state. + */ + protected abstract boolean isActive(); +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java index fa85ed77a1f..c4347e0c1af 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java @@ -5,9 +5,9 @@ * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -24,6 +24,7 @@ import java.io.OutputStream; import java.util.ArrayList; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.CodecPool; import org.apache.hadoop.io.compress.CompressionCodec; @@ -78,25 +79,33 @@ public void flush() throws IOException { public enum Algorithm { LZO(TFile.COMPRESSION_LZO) { private transient boolean checked = false; + private transient ClassNotFoundException cnf; + private transient boolean reinitCodecInTests; private static final String defaultClazz = "org.apache.hadoop.io.compress.LzoCodec"; + private transient String clazz; private transient CompressionCodec codec = null; + private String getLzoCodecClass() { + String extClazzConf = conf.get(CONF_LZO_CLASS); + String extClazz = (extClazzConf != null) ? + extClazzConf : System.getProperty(CONF_LZO_CLASS); + return (extClazz != null) ? extClazz : defaultClazz; + } + @Override public synchronized boolean isSupported() { - if (!checked) { + if (!checked || reinitCodecInTests) { checked = true; - String extClazzConf = conf.get(CONF_LZO_CLASS); - String extClazz = (extClazzConf != null) ? - extClazzConf : System.getProperty(CONF_LZO_CLASS); - String clazz = (extClazz != null) ? extClazz : defaultClazz; + reinitCodecInTests = conf.getBoolean("test.reload.lzo.codec", false); + clazz = getLzoCodecClass(); try { LOG.info("Trying to load Lzo codec class: " + clazz); codec = (CompressionCodec) ReflectionUtils.newInstance(Class .forName(clazz), conf); } catch (ClassNotFoundException e) { - // that is okay + cnf = e; } } return codec != null; @@ -105,9 +114,9 @@ public synchronized boolean isSupported() { @Override CompressionCodec getCodec() throws IOException { if (!isSupported()) { - throw new IOException( - "LZO codec class not specified. Did you forget to set property " - + CONF_LZO_CLASS + "?"); + throw new IOException(String.format( + "LZO codec %s=%s could not be loaded", CONF_LZO_CLASS, clazz), + cnf); } return codec; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java index adc74bd52b8..4e0cd8fdd86 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java @@ -38,6 +38,7 @@ import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.SecureIOUtils.AlreadyExistsException; +import org.apache.hadoop.util.CleanerUtil; import org.apache.hadoop.util.NativeCodeLoader; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.PerformanceAdvisory; @@ -315,7 +316,7 @@ static void mlock(ByteBuffer buffer, long len) } mlock_native(buffer, len); } - + /** * Unmaps the block from memory. See munmap(2). * @@ -329,10 +330,14 @@ static void mlock(ByteBuffer buffer, long len) * @param buffer The buffer to unmap. */ public static void munmap(MappedByteBuffer buffer) { - if (buffer instanceof sun.nio.ch.DirectBuffer) { - sun.misc.Cleaner cleaner = - ((sun.nio.ch.DirectBuffer)buffer).cleaner(); - cleaner.clean(); + if (CleanerUtil.UNMAP_SUPPORTED) { + try { + CleanerUtil.getCleaner().freeBuffer(buffer); + } catch (IOException e) { + LOG.info("Failed to unmap the buffer", e); + } + } else { + LOG.trace(CleanerUtil.UNMAP_NOT_SUPPORTED_REASON); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/FailoverProxyProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/FailoverProxyProvider.java index c73e0837721..f2fa3af7d59 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/FailoverProxyProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/FailoverProxyProvider.java @@ -30,27 +30,30 @@ */ @InterfaceStability.Evolving public interface FailoverProxyProvider extends Closeable { - public static final class ProxyInfo { - public final T proxy; + static class ProxyInfo { + public T proxy; /* * The information (e.g., the IP address) of the current proxy object. It * provides information for debugging purposes. */ - public final String proxyInfo; + public String proxyInfo; public ProxyInfo(T proxy, String proxyInfo) { this.proxy = proxy; this.proxyInfo = proxyInfo; } + private String proxyName() { + return proxy != null ? proxy.getClass().getSimpleName() : "UnknownProxy"; + } + public String getString(String methodName) { - return proxy.getClass().getSimpleName() + "." + methodName - + " over " + proxyInfo; + return proxyName() + "." + methodName + " over " + proxyInfo; } @Override public String toString() { - return proxy.getClass().getSimpleName() + " over " + proxyInfo; + return proxyName() + " over " + proxyInfo; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java index d1bd1807b03..29649a6b6ff 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java @@ -81,8 +81,9 @@ public CallQueueManager(Class> backingClass, this.clientBackOffEnabled = clientBackOffEnabled; this.putRef = new AtomicReference>(bq); this.takeRef = new AtomicReference>(bq); - LOG.info("Using callQueue: " + backingClass + " queueCapacity: " + - maxQueueSize + " scheduler: " + schedulerClass); + LOG.info("Using callQueue: {}, queueCapacity: {}, " + + "scheduler: {}, ipcBackoff: {}.", + backingClass, maxQueueSize, schedulerClass, clientBackOffEnabled); } @VisibleForTesting // only! diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index 163e80dfc40..07a2f13a442 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -70,6 +70,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; import static org.apache.hadoop.ipc.RpcConstants.CONNECTION_CONTEXT_CALL_ID; import static org.apache.hadoop.ipc.RpcConstants.PING_CALL_ID; @@ -439,6 +440,8 @@ public synchronized Writable getRpcResponse() { private final Object sendRpcRequestLock = new Object(); + private AtomicReference connectingThread = new AtomicReference<>(); + public Connection(ConnectionId remoteId, int serviceClass) throws IOException { this.remoteId = remoteId; this.server = remoteId.getAddress(); @@ -677,7 +680,8 @@ private synchronized void setupConnection( this.socket.setReuseAddress(true); localAddr = NetUtils.bindToLocalAddress(localAddr, bindToWildCardAddress); - LOG.debug("Binding {} to {}", principal, localAddr); + LOG.debug("Binding {} to {}", principal, + (bindToWildCardAddress) ? "0.0.0.0" : localAddr); this.socket.bind(new InetSocketAddress(localAddr, 0)); } } @@ -776,6 +780,7 @@ private synchronized void setupIOstreams( } } try { + connectingThread.set(Thread.currentThread()); if (LOG.isDebugEnabled()) { LOG.debug("Connecting to "+server); } @@ -861,6 +866,8 @@ public AuthMethod run() markClosed(new IOException("Couldn't set up IO streams: " + t, t)); } close(); + } finally { + connectingThread.set(null); } } @@ -1214,6 +1221,13 @@ private synchronized void markClosed(IOException e) { notifyAll(); } } + + private void interruptConnectingThread() { + Thread connThread = connectingThread.get(); + if (connThread != null) { + connThread.interrupt(); + } + } /** Close the connection. */ private synchronized void close() { @@ -1281,9 +1295,6 @@ public Client(Class valueClass, Configuration conf, this.bindToWildCardAddress = conf .getBoolean(CommonConfigurationKeys.IPC_CLIENT_BIND_WILDCARD_ADDR_KEY, CommonConfigurationKeys.IPC_CLIENT_BIND_WILDCARD_ADDR_DEFAULT); - LOG.debug("{} set to true. Will bind client sockets to wildcard " - + "address.", - CommonConfigurationKeys.IPC_CLIENT_BIND_WILDCARD_ADDR_KEY); this.clientId = ClientId.getClientId(); this.sendParamsExecutor = clientExcecutorFactory.refAndGetInstance(); @@ -1323,6 +1334,7 @@ public void stop() { // wake up all connections for (Connection conn : connections.values()) { conn.interrupt(); + conn.interruptConnectingThread(); } // wait until all connections are closed diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java index f12ecb6462a..8bb0ce4c9d7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java @@ -391,6 +391,7 @@ private static Boolean parseBackOffByResponseTimeEnabled(String ns, * counts current. */ private void decayCurrentCounts() { + LOG.debug("Start to decay current counts."); try { long totalDecayedCount = 0; long totalRawCount = 0; @@ -410,7 +411,12 @@ private void decayCurrentCounts() { totalDecayedCount += nextValue; decayedCount.set(nextValue); + LOG.debug("Decaying counts for the user: {}, " + + "its decayedCount: {}, rawCount: {}", entry.getKey(), + nextValue, rawCount.get()); if (nextValue == 0) { + LOG.debug("The decayed count for the user {} is zero " + + "and being cleaned.", entry.getKey()); // We will clean up unused keys here. An interesting optimization // might be to have an upper bound on keyspace in callCounts and only // clean once we pass it. @@ -422,6 +428,8 @@ private void decayCurrentCounts() { totalDecayedCallCount.set(totalDecayedCount); totalRawCallCount.set(totalRawCount); + LOG.debug("After decaying the stored counts, totalDecayedCount: {}, " + + "totalRawCallCount: {}.", totalDecayedCount, totalRawCount); // Now refresh the cache of scheduling decisions recomputeScheduleCache(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedRoundRobinMultiplexer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedRoundRobinMultiplexer.java index d308725c053..096cc1ad43f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedRoundRobinMultiplexer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedRoundRobinMultiplexer.java @@ -109,6 +109,9 @@ private void moveToNextQueue() { // Finally, reset requestsLeft. This will enable moveToNextQueue to be // called again, for the new currentQueueIndex this.requestsLeft.set(this.queueWeights[nextIdx]); + LOG.debug("Moving to next queue from queue index {} to index {}, " + + "number of requests left for current queue: {}.", + thisIdx, nextIdx, requestsLeft); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/annotation/Metric.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/annotation/Metric.java index b8e6a8ace16..37fa760ee75 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/annotation/Metric.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/annotation/Metric.java @@ -68,4 +68,9 @@ * @return optional type (counter|gauge) of the metric */ Type type() default Type.DEFAULT; + + /** + * @return optional roll over interval in secs for MutableQuantiles + */ + int interval() default 10; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java index a3ca98d0407..c7adaa5d991 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java @@ -83,6 +83,10 @@ MutableMetric newForField(Field field, Metric annotation, return registry.newMutableRollingAverages(info.name(), annotation.valueName()); } + if (cls == MutableQuantiles.class) { + return registry.newQuantiles(info.name(), annotation.about(), + annotation.sampleName(), annotation.valueName(), annotation.interval()); + } throw new MetricsException("Unsupported metric field "+ field.getName() + " of type "+ field.getType().getName()); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java index ad0986511d8..63ec9a5d29e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java @@ -73,7 +73,8 @@ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving public class Groups { - private static final Logger LOG = LoggerFactory.getLogger(Groups.class); + @VisibleForTesting + static final Logger LOG = LoggerFactory.getLogger(Groups.class); private final GroupMappingServiceProvider impl; @@ -308,6 +309,7 @@ public long read() { */ @Override public List load(String user) throws Exception { + LOG.debug("GroupCacheLoader - load."); TraceScope scope = null; Tracer tracer = Tracer.curThreadTracer(); if (tracer != null) { @@ -346,6 +348,7 @@ public long read() { public ListenableFuture> reload(final String key, List oldValue) throws Exception { + LOG.debug("GroupCacheLoader - reload (async)."); if (!reloadGroupsInBackground) { return super.reload(key, oldValue); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index 29b9fea424c..6ce72edb8e2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -40,6 +40,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Date; import java.util.EnumMap; import java.util.HashMap; import java.util.Iterator; @@ -851,81 +852,121 @@ void spawnAutoRenewalThreadForUserCreds(boolean force) { } //spawn thread only if we have kerb credentials - Thread t = new Thread(new Runnable() { + KerberosTicket tgt = getTGT(); + if (tgt == null) { + return; + } + String cmd = conf.get("hadoop.kerberos.kinit.command", "kinit"); + long nextRefresh = getRefreshTime(tgt); + Thread t = + new Thread(new AutoRenewalForUserCredsRunnable(tgt, cmd, nextRefresh)); + t.setDaemon(true); + t.setName("TGT Renewer for " + getUserName()); + t.start(); + } + + @VisibleForTesting + class AutoRenewalForUserCredsRunnable implements Runnable { + private KerberosTicket tgt; + private RetryPolicy rp; + private String kinitCmd; + private long nextRefresh; + private boolean runRenewalLoop = true; + + AutoRenewalForUserCredsRunnable(KerberosTicket tgt, String kinitCmd, + long nextRefresh){ + this.tgt = tgt; + this.kinitCmd = kinitCmd; + this.nextRefresh = nextRefresh; + this.rp = null; + } + + public void setRunRenewalLoop(boolean runRenewalLoop) { + this.runRenewalLoop = runRenewalLoop; + } - @Override - public void run() { - String cmd = conf.get("hadoop.kerberos.kinit.command", "kinit"); - KerberosTicket tgt = getTGT(); - if (tgt == null) { + @Override + public void run() { + do { + try { + long now = Time.now(); + if (LOG.isDebugEnabled()) { + LOG.debug("Current time is " + now); + LOG.debug("Next refresh is " + nextRefresh); + } + if (now < nextRefresh) { + Thread.sleep(nextRefresh - now); + } + String output = Shell.execCommand(kinitCmd, "-R"); + if (LOG.isDebugEnabled()) { + LOG.debug("Renewed ticket. kinit output: {}", output); + } + reloginFromTicketCache(); + tgt = getTGT(); + if (tgt == null) { + LOG.warn("No TGT after renewal. Aborting renew thread for " + + getUserName()); + return; + } + nextRefresh = Math.max(getRefreshTime(tgt), + now + kerberosMinSecondsBeforeRelogin); + metrics.renewalFailures.set(0); + rp = null; + } catch (InterruptedException ie) { + LOG.warn("Terminating renewal thread"); return; - } - long nextRefresh = getRefreshTime(tgt); - RetryPolicy rp = null; - while (true) { + } catch (IOException ie) { + metrics.renewalFailuresTotal.incr(); + final long now = Time.now(); + + if (tgt.isDestroyed()) { + LOG.error("TGT is destroyed. Aborting renew thread for {}.", + getUserName()); + return; + } + + long tgtEndTime; + // As described in HADOOP-15593 we need to handle the case when + // tgt.getEndTime() throws NPE because of JDK issue JDK-8147772 + // NPE is only possible if this issue is not fixed in the JDK + // currently used try { - long now = Time.now(); - if (LOG.isDebugEnabled()) { - LOG.debug("Current time is " + now); - LOG.debug("Next refresh is " + nextRefresh); - } - if (now < nextRefresh) { - Thread.sleep(nextRefresh - now); - } - String output = Shell.execCommand(cmd, "-R"); - if (LOG.isDebugEnabled()) { - LOG.debug("Renewed ticket. kinit output: {}", output); - } - reloginFromTicketCache(); - tgt = getTGT(); - if (tgt == null) { - LOG.warn("No TGT after renewal. Aborting renew thread for " + - getUserName()); - return; - } - nextRefresh = Math.max(getRefreshTime(tgt), - now + kerberosMinSecondsBeforeRelogin); - metrics.renewalFailures.set(0); - rp = null; - } catch (InterruptedException ie) { - LOG.warn("Terminating renewal thread"); + tgtEndTime = tgt.getEndTime().getTime(); + } catch (NullPointerException npe) { + LOG.error("NPE thrown while getting KerberosTicket endTime. " + + "Aborting renew thread for {}.", getUserName()); + return; + } + + LOG.warn("Exception encountered while running the renewal " + + "command for {}. (TGT end time:{}, renewalFailures: {}," + + "renewalFailuresTotal: {})", getUserName(), tgtEndTime, + metrics.renewalFailures.value(), + metrics.renewalFailuresTotal.value(), ie); + if (rp == null) { + // Use a dummy maxRetries to create the policy. The policy will + // only be used to get next retry time with exponential back-off. + // The final retry time will be later limited within the + // tgt endTime in getNextTgtRenewalTime. + rp = RetryPolicies.exponentialBackoffRetry(Long.SIZE - 2, + kerberosMinSecondsBeforeRelogin, TimeUnit.MILLISECONDS); + } + try { + nextRefresh = getNextTgtRenewalTime(tgtEndTime, now, rp); + } catch (Exception e) { + LOG.error("Exception when calculating next tgt renewal time", e); + return; + } + metrics.renewalFailures.incr(); + // retry until close enough to tgt endTime. + if (now > nextRefresh) { + LOG.error("TGT is expired. Aborting renew thread for {}.", + getUserName()); return; - } catch (IOException ie) { - metrics.renewalFailuresTotal.incr(); - final long tgtEndTime = tgt.getEndTime().getTime(); - LOG.warn("Exception encountered while running the renewal " - + "command for {}. (TGT end time:{}, renewalFailures: {}," - + "renewalFailuresTotal: {})", getUserName(), tgtEndTime, - metrics.renewalFailures, metrics.renewalFailuresTotal, ie); - final long now = Time.now(); - if (rp == null) { - // Use a dummy maxRetries to create the policy. The policy will - // only be used to get next retry time with exponential back-off. - // The final retry time will be later limited within the - // tgt endTime in getNextTgtRenewalTime. - rp = RetryPolicies.exponentialBackoffRetry(Long.SIZE - 2, - kerberosMinSecondsBeforeRelogin, TimeUnit.MILLISECONDS); - } - try { - nextRefresh = getNextTgtRenewalTime(tgtEndTime, now, rp); - } catch (Exception e) { - LOG.error("Exception when calculating next tgt renewal time", e); - return; - } - metrics.renewalFailures.incr(); - // retry until close enough to tgt endTime. - if (now > nextRefresh) { - LOG.error("TGT is expired. Aborting renew thread for {}.", - getUserName()); - return; - } } } - } - }); - t.setDaemon(true); - t.setName("TGT Renewer for " + getUserName()); - t.start(); + } while (runRenewalLoop); + } } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java index 26cd7ab2614..b766d5c37fa 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java @@ -75,9 +75,9 @@ public void init(String configurationPrefix) { // $configPrefix.[ANY].hosts // String prefixRegEx = configPrefix.replace(".", "\\."); - String usersGroupsRegEx = prefixRegEx + "[^.]*(" + + String usersGroupsRegEx = prefixRegEx + "[\\S]*(" + Pattern.quote(CONF_USERS) + "|" + Pattern.quote(CONF_GROUPS) + ")"; - String hostsRegEx = prefixRegEx + "[^.]*" + Pattern.quote(CONF_HOSTS); + String hostsRegEx = prefixRegEx + "[\\S]*" + Pattern.quote(CONF_HOSTS); // get list of users and groups per proxyuser Map allMatchKeys = diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java index 33cb9ec98f0..25aac8853ef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/Token.java @@ -23,6 +23,7 @@ import com.google.common.primitives.Bytes; import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -358,6 +359,10 @@ private static String encodeWritable(Writable obj) throws IOException { */ private static void decodeWritable(Writable obj, String newValue) throws IOException { + if (newValue == null) { + throw new HadoopIllegalArgumentException( + "Invalid argument, newValue is null"); + } Base64 decoder = new Base64(0, null, true); DataInputBuffer buf = new DataInputBuffer(); byte[] decoded = decoder.decode(newValue); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/AbstractService.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/AbstractService.java index 70de647ab9d..5b96fbf4725 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/AbstractService.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/AbstractService.java @@ -254,7 +254,7 @@ public final void close() throws IOException { * @param exception the exception */ protected final void noteFailure(Exception exception) { - LOG.debug("noteFailure {}" + exception); + LOG.debug("noteFailure", exception); if (exception == null) { //make sure failure logic doesn't itself cause problems return; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/CommandShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/CommandShell.java index 70c8eaf936f..a53e2259e0e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/CommandShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tools/CommandShell.java @@ -76,7 +76,7 @@ public int run(String[] args) throws Exception { } } catch (Exception e) { printShellUsage(); - e.printStackTrace(err); + printException(e); return 1; } return exitCode; @@ -98,6 +98,10 @@ protected final void printShellUsage() { out.flush(); } + protected void printException(Exception ex){ + ex.printStackTrace(err); + } + /** * Base class for any subcommands of this shell command. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CleanerUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CleanerUtil.java new file mode 100644 index 00000000000..a56602eafab --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CleanerUtil.java @@ -0,0 +1,199 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.io.IOException; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.nio.ByteBuffer; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.Objects; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import static java.lang.invoke.MethodHandles.constant; +import static java.lang.invoke.MethodHandles.dropArguments; +import static java.lang.invoke.MethodHandles.filterReturnValue; +import static java.lang.invoke.MethodHandles.guardWithTest; +import static java.lang.invoke.MethodType.methodType; + +/** + * sun.misc.Cleaner has moved in OpenJDK 9 and + * sun.misc.Unsafe#invokeCleaner(ByteBuffer) is the replacement. + * This class is a hack to use sun.misc.Cleaner in Java 8 and + * use the replacement in Java 9+. + * This implementation is inspired by LUCENE-6989. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class CleanerUtil { + + // Prevent instantiation + private CleanerUtil(){} + + /** + * true, if this platform supports unmapping mmapped files. + */ + public static final boolean UNMAP_SUPPORTED; + + /** + * if {@link #UNMAP_SUPPORTED} is {@code false}, this contains the reason + * why unmapping is not supported. + */ + public static final String UNMAP_NOT_SUPPORTED_REASON; + + + private static final BufferCleaner CLEANER; + + /** + * Reference to a BufferCleaner that does unmapping. + * @return {@code null} if not supported. + */ + public static BufferCleaner getCleaner() { + return CLEANER; + } + + static { + final Object hack = AccessController.doPrivileged( + (PrivilegedAction) CleanerUtil::unmapHackImpl); + if (hack instanceof BufferCleaner) { + CLEANER = (BufferCleaner) hack; + UNMAP_SUPPORTED = true; + UNMAP_NOT_SUPPORTED_REASON = null; + } else { + CLEANER = null; + UNMAP_SUPPORTED = false; + UNMAP_NOT_SUPPORTED_REASON = hack.toString(); + } + } + + private static Object unmapHackImpl() { + final MethodHandles.Lookup lookup = MethodHandles.lookup(); + try { + try { + // *** sun.misc.Unsafe unmapping (Java 9+) *** + final Class unsafeClass = Class.forName("sun.misc.Unsafe"); + // first check if Unsafe has the right method, otherwise we can + // give up without doing any security critical stuff: + final MethodHandle unmapper = lookup.findVirtual(unsafeClass, + "invokeCleaner", methodType(void.class, ByteBuffer.class)); + // fetch the unsafe instance and bind it to the virtual MH: + final Field f = unsafeClass.getDeclaredField("theUnsafe"); + f.setAccessible(true); + final Object theUnsafe = f.get(null); + return newBufferCleaner(ByteBuffer.class, unmapper.bindTo(theUnsafe)); + } catch (SecurityException se) { + // rethrow to report errors correctly (we need to catch it here, + // as we also catch RuntimeException below!): + throw se; + } catch (ReflectiveOperationException | RuntimeException e) { + // *** sun.misc.Cleaner unmapping (Java 8) *** + final Class directBufferClass = + Class.forName("java.nio.DirectByteBuffer"); + + final Method m = directBufferClass.getMethod("cleaner"); + m.setAccessible(true); + final MethodHandle directBufferCleanerMethod = lookup.unreflect(m); + final Class cleanerClass = + directBufferCleanerMethod.type().returnType(); + + /* + * "Compile" a MethodHandle that basically is equivalent + * to the following code: + * + * void unmapper(ByteBuffer byteBuffer) { + * sun.misc.Cleaner cleaner = + * ((java.nio.DirectByteBuffer) byteBuffer).cleaner(); + * if (Objects.nonNull(cleaner)) { + * cleaner.clean(); + * } else { + * // the noop is needed because MethodHandles#guardWithTest + * // always needs ELSE + * noop(cleaner); + * } + * } + */ + final MethodHandle cleanMethod = lookup.findVirtual( + cleanerClass, "clean", methodType(void.class)); + final MethodHandle nonNullTest = lookup.findStatic(Objects.class, + "nonNull", methodType(boolean.class, Object.class)) + .asType(methodType(boolean.class, cleanerClass)); + final MethodHandle noop = dropArguments( + constant(Void.class, null).asType(methodType(void.class)), + 0, cleanerClass); + final MethodHandle unmapper = filterReturnValue( + directBufferCleanerMethod, + guardWithTest(nonNullTest, cleanMethod, noop)) + .asType(methodType(void.class, ByteBuffer.class)); + return newBufferCleaner(directBufferClass, unmapper); + } + } catch (SecurityException se) { + return "Unmapping is not supported, because not all required " + + "permissions are given to the Hadoop JAR file: " + se + + " [Please grant at least the following permissions: " + + "RuntimePermission(\"accessClassInPackage.sun.misc\") " + + " and ReflectPermission(\"suppressAccessChecks\")]"; + } catch (ReflectiveOperationException | RuntimeException e) { + return "Unmapping is not supported on this platform, " + + "because internal Java APIs are not compatible with " + + "this Hadoop version: " + e; + } + } + + private static BufferCleaner newBufferCleaner( + final Class unmappableBufferClass, final MethodHandle unmapper) { + assert Objects.equals( + methodType(void.class, ByteBuffer.class), unmapper.type()); + return buffer -> { + if (!buffer.isDirect()) { + throw new IllegalArgumentException( + "unmapping only works with direct buffers"); + } + if (!unmappableBufferClass.isInstance(buffer)) { + throw new IllegalArgumentException("buffer is not an instance of " + + unmappableBufferClass.getName()); + } + final Throwable error = AccessController.doPrivileged( + (PrivilegedAction) () -> { + try { + unmapper.invokeExact(buffer); + return null; + } catch (Throwable t) { + return t; + } + }); + if (error != null) { + throw new IOException("Unable to unmap the mapped buffer", error); + } + }; + } + + /** + * Pass in an implementation of this interface to cleanup ByteBuffers. + * CleanerUtil implements this to allow unmapping of bytebuffers + * with private Java APIs. + */ + @FunctionalInterface + public interface BufferCleaner { + void freeBuffer(ByteBuffer b) throws IOException; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCrc32.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCrc32.java index 0669b0abd24..3142df2da36 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCrc32.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCrc32.java @@ -28,12 +28,12 @@ * natively. */ class NativeCrc32 { - + private static final boolean isSparc = System.getProperty("os.arch").toLowerCase().startsWith("sparc"); /** * Return true if the JNI-based native CRC extensions are available. */ public static boolean isAvailable() { - if (System.getProperty("os.arch").toLowerCase().startsWith("sparc")) { + if (isSparc) { return false; } else { return NativeCodeLoader.isNativeCodeLoaded(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ServletUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ServletUtil.java index 2fd9b5589dc..9ba9e94dff5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ServletUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ServletUtil.java @@ -74,7 +74,7 @@ public static long parseLongParam(ServletRequest request, String param) } public static final String HTML_TAIL = "
\n" - + "Hadoop, " + + "Hadoop, " + Calendar.getInstance().get(Calendar.YEAR) + ".\n" + ""; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownHookManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownHookManager.java index 153f92b825e..2ca8e55f0bd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownHookManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownHookManager.java @@ -17,11 +17,17 @@ */ package org.apache.hadoop.util; +import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.util.concurrent.HadoopExecutors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; + import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -34,6 +40,9 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.SERVICE_SHUTDOWN_TIMEOUT; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.SERVICE_SHUTDOWN_TIMEOUT_DEFAULT; + /** * The ShutdownHookManager enables running shutdownHook * in a deterministic order, higher priority first. @@ -42,53 +51,55 @@ * This class registers a single JVM shutdownHook and run all the * shutdownHooks registered to it (to this class) in order based on their * priority. + * + * Unless a hook was registered with a shutdown explicitly set through + * {@link #addShutdownHook(Runnable, int, long, TimeUnit)}, + * the shutdown time allocated to it is set by the configuration option + * {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in + * {@code core-site.xml}, with a default value of + * {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT} + * seconds. */ -public class ShutdownHookManager { +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class ShutdownHookManager { private static final ShutdownHookManager MGR = new ShutdownHookManager(); private static final Logger LOG = LoggerFactory.getLogger(ShutdownHookManager.class); - private static final long TIMEOUT_DEFAULT = 10; - private static final TimeUnit TIME_UNIT_DEFAULT = TimeUnit.SECONDS; + + /** Minimum shutdown timeout: {@value} second(s). */ + public static final long TIMEOUT_MINIMUM = 1; + + /** The default time unit used: seconds. */ + public static final TimeUnit TIME_UNIT_DEFAULT = TimeUnit.SECONDS; private static final ExecutorService EXECUTOR = HadoopExecutors.newSingleThreadExecutor(new ThreadFactoryBuilder() - .setDaemon(true).build()); + .setDaemon(true) + .setNameFormat("shutdown-hook-%01d") + .build()); + static { try { Runtime.getRuntime().addShutdownHook( new Thread() { @Override public void run() { - MGR.shutdownInProgress.set(true); - for (HookEntry entry: MGR.getShutdownHooksInOrder()) { - Future future = EXECUTOR.submit(entry.getHook()); - try { - future.get(entry.getTimeout(), entry.getTimeUnit()); - } catch (TimeoutException ex) { - future.cancel(true); - LOG.warn("ShutdownHook '" + entry.getHook().getClass(). - getSimpleName() + "' timeout, " + ex.toString(), ex); - } catch (Throwable ex) { - LOG.warn("ShutdownHook '" + entry.getHook().getClass(). - getSimpleName() + "' failed, " + ex.toString(), ex); - } - } - try { - EXECUTOR.shutdown(); - if (!EXECUTOR.awaitTermination(TIMEOUT_DEFAULT, - TIME_UNIT_DEFAULT)) { - LOG.error("ShutdownHookManger shutdown forcefully."); - EXECUTOR.shutdownNow(); - } - LOG.debug("ShutdownHookManger complete shutdown."); - } catch (InterruptedException ex) { - LOG.error("ShutdownHookManger interrupted while waiting for " + - "termination.", ex); - EXECUTOR.shutdownNow(); - Thread.currentThread().interrupt(); + if (MGR.shutdownInProgress.getAndSet(true)) { + LOG.info("Shutdown process invoked a second time: ignoring"); + return; } + long started = System.currentTimeMillis(); + int timeoutCount = executeShutdown(); + long ended = System.currentTimeMillis(); + LOG.debug(String.format( + "Completed shutdown in %.3f seconds; Timeouts: %d", + (ended-started)/1000.0, timeoutCount)); + // each of the hooks have executed; now shut down the + // executor itself. + shutdownExecutor(new Configuration()); } } ); @@ -98,19 +109,93 @@ public void run() { } } + /** + * Execute the shutdown. + * This is exposed purely for testing: do not invoke it. + * @return the number of shutdown hooks which timed out. + */ + @InterfaceAudience.Private + @VisibleForTesting + static int executeShutdown() { + int timeouts = 0; + for (HookEntry entry: MGR.getShutdownHooksInOrder()) { + Future future = EXECUTOR.submit(entry.getHook()); + try { + future.get(entry.getTimeout(), entry.getTimeUnit()); + } catch (TimeoutException ex) { + timeouts++; + future.cancel(true); + LOG.warn("ShutdownHook '" + entry.getHook().getClass(). + getSimpleName() + "' timeout, " + ex.toString(), ex); + } catch (Throwable ex) { + LOG.warn("ShutdownHook '" + entry.getHook().getClass(). + getSimpleName() + "' failed, " + ex.toString(), ex); + } + } + return timeouts; + } + + /** + * Shutdown the executor thread itself. + * @param conf the configuration containing the shutdown timeout setting. + */ + private static void shutdownExecutor(final Configuration conf) { + try { + EXECUTOR.shutdown(); + long shutdownTimeout = getShutdownTimeout(conf); + if (!EXECUTOR.awaitTermination( + shutdownTimeout, + TIME_UNIT_DEFAULT)) { + // timeout waiting for the + LOG.error("ShutdownHookManger shutdown forcefully after" + + " {} seconds.", shutdownTimeout); + EXECUTOR.shutdownNow(); + } + LOG.debug("ShutdownHookManger completed shutdown."); + } catch (InterruptedException ex) { + // interrupted. + LOG.error("ShutdownHookManger interrupted while waiting for " + + "termination.", ex); + EXECUTOR.shutdownNow(); + Thread.currentThread().interrupt(); + } + } + /** * Return ShutdownHookManager singleton. * * @return ShutdownHookManager singleton. */ + @InterfaceAudience.Public public static ShutdownHookManager get() { return MGR; } + /** + * Get the shutdown timeout in seconds, from the supplied + * configuration. + * @param conf configuration to use. + * @return a timeout, always greater than or equal to {@link #TIMEOUT_MINIMUM} + */ + @InterfaceAudience.Private + @VisibleForTesting + static long getShutdownTimeout(Configuration conf) { + long duration = conf.getTimeDuration( + SERVICE_SHUTDOWN_TIMEOUT, + SERVICE_SHUTDOWN_TIMEOUT_DEFAULT, + TIME_UNIT_DEFAULT); + if (duration < TIMEOUT_MINIMUM) { + duration = TIMEOUT_MINIMUM; + } + return duration; + } + /** * Private structure to store ShutdownHook, its priority and timeout * settings. */ + @InterfaceAudience.Private + @VisibleForTesting static class HookEntry { private final Runnable hook; private final int priority; @@ -118,7 +203,9 @@ public static ShutdownHookManager get() { private final TimeUnit unit; HookEntry(Runnable hook, int priority) { - this(hook, priority, TIMEOUT_DEFAULT, TIME_UNIT_DEFAULT); + this(hook, priority, + getShutdownTimeout(new Configuration()), + TIME_UNIT_DEFAULT); } HookEntry(Runnable hook, int priority, long timeout, TimeUnit unit) { @@ -176,10 +263,12 @@ private ShutdownHookManager() { * * @return the list of shutdownHooks in order of execution. */ + @InterfaceAudience.Private + @VisibleForTesting List getShutdownHooksInOrder() { List list; synchronized (MGR.hooks) { - list = new ArrayList(MGR.hooks); + list = new ArrayList<>(MGR.hooks); } Collections.sort(list, new Comparator() { @@ -200,6 +289,8 @@ public int compare(HookEntry o1, HookEntry o2) { * @param shutdownHook shutdownHook Runnable * @param priority priority of the shutdownHook. */ + @InterfaceAudience.Public + @InterfaceStability.Stable public void addShutdownHook(Runnable shutdownHook, int priority) { if (shutdownHook == null) { throw new IllegalArgumentException("shutdownHook cannot be NULL"); @@ -223,6 +314,8 @@ public void addShutdownHook(Runnable shutdownHook, int priority) { * @param timeout timeout of the shutdownHook * @param unit unit of the timeout TimeUnit */ + @InterfaceAudience.Public + @InterfaceStability.Stable public void addShutdownHook(Runnable shutdownHook, int priority, long timeout, TimeUnit unit) { if (shutdownHook == null) { @@ -242,6 +335,8 @@ public void addShutdownHook(Runnable shutdownHook, int priority, long timeout, * @return TRUE if the shutdownHook was registered and removed, * FALSE otherwise. */ + @InterfaceAudience.Public + @InterfaceStability.Stable public boolean removeShutdownHook(Runnable shutdownHook) { if (shutdownInProgress.get()) { throw new IllegalStateException("Shutdown in progress, cannot remove a " + @@ -256,6 +351,8 @@ public boolean removeShutdownHook(Runnable shutdownHook) { * @param shutdownHook shutdownHook to check if registered. * @return TRUE/FALSE depending if the shutdownHook is is registered. */ + @InterfaceAudience.Public + @InterfaceStability.Stable public boolean hasShutdownHook(Runnable shutdownHook) { return hooks.contains(new HookEntry(shutdownHook, 0)); } @@ -265,6 +362,8 @@ public boolean hasShutdownHook(Runnable shutdownHook) { * * @return TRUE if the shutdown is in progress, otherwise FALSE. */ + @InterfaceAudience.Public + @InterfaceStability.Stable public boolean isShutdownInProgress() { return shutdownInProgress.get(); } @@ -272,7 +371,9 @@ public boolean isShutdownInProgress() { /** * clear all registered shutdownHooks. */ + @InterfaceAudience.Public + @InterfaceStability.Stable public void clearShutdownHooks() { hooks.clear(); } -} \ No newline at end of file +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java index 3db805fa4f7..f49698ca5ac 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java @@ -743,7 +743,7 @@ public static String createStartupShutdownMessage(String classname, return toStartupShutdownString("STARTUP_MSG: ", new String[] { "Starting " + classname, " host = " + hostname, - " args = " + Arrays.asList(args), + " args = " + (args != null ? Arrays.asList(args) : new ArrayList<>()), " version = " + VersionInfo.getVersion(), " classpath = " + System.getProperty("java.class.path"), " build = " + VersionInfo.getUrl() + " -r " diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java index 8276b6e29c6..d164138a39f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java @@ -196,7 +196,10 @@ public void start(List authInfos) throws IOException { */ public String getStringData(final String path) throws Exception { byte[] bytes = getData(path); - return new String(bytes, Charset.forName("UTF-8")); + if (bytes != null) { + return new String(bytes, Charset.forName("UTF-8")); + } + return null; } /** @@ -208,7 +211,10 @@ public String getStringData(final String path) throws Exception { */ public String getStringData(final String path, Stat stat) throws Exception { byte[] bytes = getData(path, stat); - return new String(bytes, Charset.forName("UTF-8")); + if (bytes != null) { + return new String(bytes, Charset.forName("UTF-8")); + } + return null; } /** diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 75acf489e64..81502dc24c6 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -553,6 +553,22 @@ + + hadoop.service.shutdown.timeout + 30s + + Timeout to wait for each shutdown operation to complete. + If a hook takes longer than this time to complete, it will be interrupted, + so the service will shutdown. This allows the service shutdown + to recover from a blocked operation. + Some shutdown hooks may need more time than this, for example when + a large amount of data needs to be uploaded to an object store. + In this situation: increase the timeout. + + The minimum duration of the timeout is 1 second, "1s". + + + hadoop.rpc.protection authentication @@ -961,9 +977,8 @@ If unspecified, then the default list of credential provider classes, queried in sequence, is: - 1. org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider: supports static - configuration of AWS access key ID and secret access key. See also - fs.s3a.access.key and fs.s3a.secret.key. + 1. org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider: + Uses the values of fs.s3a.access.key and fs.s3a.secret.key. 2. com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports configuration of AWS access key ID and secret access key in environment variables named AWS_ACCESS_KEY_ID and @@ -1033,7 +1048,19 @@ fs.s3a.assumed.role.sts.endpoint - AWS Simple Token Service Endpoint. If unset, uses the default endpoint. + AWS Security Token Service Endpoint. + If unset, uses the default endpoint. + Only used if AssumedRoleCredentialProvider is the AWS credential provider. + + + + + fs.s3a.assumed.role.sts.endpoint.region + us-west-1 + + AWS Security Token Service Endpoint's region; + Needed if fs.s3a.assumed.role.sts.endpoint points to an endpoint + other than the default one and the v4 signature is used. Only used if AssumedRoleCredentialProvider is the AWS credential provider. @@ -1058,7 +1085,9 @@ fs.s3a.connection.ssl.enabled true - Enables or disables SSL connections to S3. + Enables or disables SSL connections to AWS services. + Also sets the default port to use for the s3a proxy settings, + when not explicitly set in fs.s3a.proxy.port. @@ -1418,19 +1447,28 @@ fs.s3a.s3guard.ddb.max.retries 9 - Max retries on batched DynamoDB operations before giving up and - throwing an IOException. Each retry is delayed with an exponential + Max retries on throttled/incompleted DynamoDB operations + before giving up and throwing an IOException. + Each retry is delayed with an exponential backoff timer which starts at 100 milliseconds and approximately doubles each time. The minimum wait before throwing an exception is sum(100, 200, 400, 800, .. 100*2^N-1 ) == 100 * ((2^N)-1) - So N = 9 yields at least 51.1 seconds (51,100) milliseconds of blocking - before throwing an IOException. + + + + + fs.s3a.s3guard.ddb.throttle.retry.interval + 100ms + + Initial interval to retry after a request is throttled events; + the back-off policy is exponential until the number of retries of + fs.s3a.s3guard.ddb.max.retries is reached. fs.s3a.s3guard.ddb.background.sleep - 25 + 25ms Length (in milliseconds) of pause between each batch of deletes when pruning metadata. Prevents prune operations (which can typically be low diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md b/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md index 6b17c62b00e..03d162a18ac 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md @@ -187,7 +187,7 @@ existing documentation and tests and/or adding new documentation or tests. #### Java Binary compatibility for end-user applications i.e. Apache Hadoop ABI -Apache Hadoop revisions SHOUD retain binary compatability such that end-user +Apache Hadoop revisions SHOULD retain binary compatability such that end-user applications continue to work without any modifications. Minor Apache Hadoop revisions within the same major revision MUST retain compatibility such that existing MapReduce applications (e.g. end-user applications and projects such diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md index ec9d3c3668c..d9567b9a9da 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md @@ -741,6 +741,38 @@ Usage: `hadoop fs -text ` Takes a source file and outputs the file in text format. The allowed formats are zip and TextRecordInputStream. +touch +------ + +Usage: `hadoop fs -touch [-a] [-m] [-t TIMESTAMP] [-c] URI [URI ...]` + +Updates the access and modification times of the file specified by the URI to the current time. +If the file does not exist, then a zero length file is created at URI with current time as the +timestamp of that URI. + +* Use -a option to change only the access time +* Use -m option to change only the modification time +* Use -t option to specify timestamp (in format yyyyMMddHHmmss) instead of current time +* Use -c option to not create file if it does not exist + +The timestamp format is as follows +* yyyy Four digit year (e.g. 2018) +* MM Two digit month of the year (e.g. 08 for month of August) +* dd Two digit day of the month (e.g. 01 for first day of the month) +* HH Two digit hour of the day using 24 hour notation (e.g. 23 stands for 11 pm, 11 stands for 11 am) +* mm Two digit minutes of the hour +* ss Two digit seconds of the minute +e.g. 20180809230000 represents August 9th 2018, 11pm + +Example: + +* `hadoop fs -touch pathname` +* `hadoop fs -touch -m -t 20180809230000 pathname` +* `hadoop fs -touch -t 20180809230000 pathname` +* `hadoop fs -touch -a pathname` + +Exit Code: Returns 0 on success and -1 on error. + touchz ------ diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/InterfaceClassification.md b/hadoop-common-project/hadoop-common/src/site/markdown/InterfaceClassification.md index 451f9be3073..73480441ccf 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/InterfaceClassification.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/InterfaceClassification.md @@ -35,10 +35,9 @@ Interface Classification ------------------------ Hadoop adopts the following interface classification, -this classification was derived from the -[OpenSolaris taxonomy](http://www.opensolaris.org/os/community/arc/policies/interface-taxonomy/#Advice) -and, to some extent, from taxonomy used inside Yahoo. -Interfaces have two main attributes: Audience and Stability +this classification was derived from the OpenSolaris taxonomy and, to some extent, +from taxonomy used inside Yahoo. +Interfaces have two main attributes: Audience and Stability. ### Audience @@ -125,7 +124,7 @@ hence serves as a safe development target. A Stable interface may evolve compatibly between minor releases. Incompatible changes allowed: major (X.0.0) -Compatible changes allowed: maintenance (x.Y.0) +Compatible changes allowed: maintenance (x.y.Z) #### Evolving diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index 676ab0b508e..83ad40a2484 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -159,14 +159,17 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a | `TransactionsAvgTime` | Average time of Journal transactions in milliseconds | | `SyncsNumOps` | Total number of Journal syncs | | `SyncsAvgTime` | Average time of Journal syncs in milliseconds | +| `SyncsTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of Journal sync time in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `TransactionsBatchedInSync` | Total number of Journal transactions batched in sync | +| `TransactionsBatchedInSync`*num*`s(50/75/90/95/99)thPercentileCount` | The 50/75/90/95/99th percentile of number of batched Journal transactions (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `StorageBlockReportNumOps` | Total number of processing block reports from individual storages in DataNode | | `StorageBlockReportAvgTime` | Average time of processing block reports in milliseconds | +| `StorageBlockReport`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of block report processing time in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `CacheReportNumOps` | Total number of processing cache reports from DataNode | | `CacheReportAvgTime` | Average time of processing cache reports in milliseconds | +| `CacheReport`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of cached report processing time in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `SafeModeTime` | The interval between FSNameSystem starts and the last time safemode leaves in milliseconds.  (sometimes not equal to the time in SafeMode, see [HDFS-5156](https://issues.apache.org/jira/browse/HDFS-5156)) | | `FsImageLoadTime` | Time loading FS Image at startup in milliseconds | -| `FsImageLoadTime` | Time loading FS Image at startup in milliseconds | | `GetEditNumOps` | Total number of edits downloads from SecondaryNameNode | | `GetEditAvgTime` | Average edits download time in milliseconds | | `GetImageNumOps` | Total number of fsimage downloads from SecondaryNameNode | @@ -177,22 +180,23 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a | `NNStartedTimeInMillis`| NameNode start time in milliseconds | | `GenerateEDEKTimeNumOps` | Total number of generating EDEK | | `GenerateEDEKTimeAvgTime` | Average time of generating EDEK in milliseconds | +| `GenerateEDEKTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time spent in generating EDEK in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `WarmUpEDEKTimeNumOps` | Total number of warming up EDEK | | `WarmUpEDEKTimeAvgTime` | Average time of warming up EDEK in milliseconds | -| `ResourceCheckTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of NameNode resource check latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | -| `StorageBlockReport`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of storage block report latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `WarmUpEDEKTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time spent in warming up EDEK in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `ResourceCheckTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of of NameNode resource check latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `EditLogTailTimeNumOps` | Total number of times the standby NameNode tailed the edit log | | `EditLogTailTimeAvgTime` | Average time (in milliseconds) spent by standby NameNode in tailing edit log | -| `EditLogTailTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time spent in tailing edit logs by standby NameNode, in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `EditLogTailTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time spent in tailing edit logs by standby NameNode in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `EditLogFetchTimeNumOps` | Total number of times the standby NameNode fetched remote edit streams from journal nodes | | `EditLogFetchTimeAvgTime` | Average time (in milliseconds) spent by standby NameNode in fetching remote edit streams from journal nodes | -| `EditLogFetchTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time spent in fetching edit streams from journal nodes by standby NameNode, in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `EditLogFetchTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time spent in fetching edit streams from journal nodes by standby NameNode in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `NumEditLogLoadedNumOps` | Total number of times edits were loaded by standby NameNode | | `NumEditLogLoadedAvgCount` | Average number of edits loaded by standby NameNode in each edit log tailing | -| `NumEditLogLoaded`*num*`s(50/75/90/95/99)thPercentileCount` | The 50/75/90/95/99th percentile of number of edits loaded by standby NameNode in each edit log tailing. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `NumEditLogLoaded`*num*`s(50/75/90/95/99)thPercentileCount` | The 50/75/90/95/99th percentile of number of edits loaded by standby NameNode in each edit log tailing (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `EditLogTailIntervalNumOps` | Total number of intervals between edit log tailings by standby NameNode | | `EditLogTailIntervalAvgTime` | Average time of intervals between edit log tailings by standby NameNode in milliseconds | -| `EditLogTailInterval`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time between edit log tailings by standby NameNode, in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `EditLogTailInterval`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time between edit log tailings by standby NameNode in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | FSNamesystem ------------ @@ -240,6 +244,8 @@ Each metrics record contains tags such as HAState and Hostname as additional inf | `StaleDataNodes` | Current number of DataNodes marked stale due to delayed heartbeat | | `NumStaleStorages` | Number of storages marked as content stale (after NameNode restart/failover before first block report is received) | | `MissingReplOneBlocks` | Current number of missing blocks with replication factor 1 | +| `HighestPriorityLowRedundancyReplicatedBlocks` | Current number of non-corrupt, low redundancy replicated blocks with the highest risk of loss (have 0 or 1 replica). Will be recovered with the highest priority. | +| `HighestPriorityLowRedundancyECBlocks` | Current number of non-corrupt, low redundancy EC blocks with the highest risk of loss. Will be recovered with the highest priority. | | `NumFilesUnderConstruction` | Current number of files under construction | | `NumActiveClients` | Current number of active clients holding lease | | `HAState` | (HA-only) Current state of the NameNode: initializing or active or standby or stopping state | @@ -252,8 +258,10 @@ Each metrics record contains tags such as HAState and Hostname as additional inf | `NumInMaintenanceLiveDataNodes` | Number of live Datanodes which are in maintenance state | | `NumInMaintenanceDeadDataNodes` | Number of dead Datanodes which are in maintenance state | | `NumEnteringMaintenanceDataNodes` | Number of Datanodes that are entering the maintenance state | -| `FSN(Read/Write)Lock`*OperationName*`NumOps` | Total number of acquiring lock by operations | -| `FSN(Read/Write)Lock`*OperationName*`AvgTime` | Average time of holding the lock by operations in milliseconds | +| `FSN(Read/Write)Lock`*OperationName*`NanosNumOps` | Total number of acquiring lock by operations | +| `FSN(Read/Write)Lock`*OperationName*`NanosAvgTime` | Average time of holding the lock by operations in nanoseconds | +| `FSN(Read/Write)LockOverallNanosNumOps` | Total number of acquiring lock by all operations | +| `FSN(Read/Write)LockOverallNanosAvgTime` | Average time of holding the lock by all operations in nanoseconds | JournalNode ----------- @@ -338,13 +346,13 @@ Each metrics record contains tags such as SessionId and Hostname as additional i | `RamDiskBlocksEvictedWithoutRead` | Total number of blocks evicted in memory without ever being read from memory | | `RamDiskBlocksEvictionWindowMsNumOps` | Number of blocks evicted in memory| | `RamDiskBlocksEvictionWindowMsAvgTime` | Average time of blocks in memory before being evicted in milliseconds | -| `RamDiskBlocksEvictionWindows`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of latency between memory write and eviction in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `RamDiskBlocksEvictionWindows`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of latency between memory write and eviction in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `RamDiskBlocksLazyPersisted` | Total number of blocks written to disk by lazy writer | | `RamDiskBlocksDeletedBeforeLazyPersisted` | Total number of blocks deleted by application before being persisted to disk | | `RamDiskBytesLazyPersisted` | Total number of bytes written to disk by lazy writer | | `RamDiskBlocksLazyPersistWindowMsNumOps` | Number of blocks written to disk by lazy writer | | `RamDiskBlocksLazyPersistWindowMsAvgTime` | Average time of blocks written to disk by lazy writer in milliseconds | -| `RamDiskBlocksLazyPersistWindows`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of latency between memory write and disk persist in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `RamDiskBlocksLazyPersistWindows`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of latency between memory write and disk persist in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `FsyncCount` | Total number of fsync | | `VolumeFailures` | Total number of volume failures occurred | | `ReadBlockOpNumOps` | Total number of read operations | @@ -371,14 +379,19 @@ Each metrics record contains tags such as SessionId and Hostname as additional i | `CacheReportsAvgTime` | Average time of cache report operations in milliseconds | | `PacketAckRoundTripTimeNanosNumOps` | Total number of ack round trip | | `PacketAckRoundTripTimeNanosAvgTime` | Average time from ack send to receive minus the downstream ack time in nanoseconds | +| `PacketAckRoundTripTimeNanos`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile latency from ack send to receive minus the downstream ack time in nanoseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `FlushNanosNumOps` | Total number of flushes | | `FlushNanosAvgTime` | Average flush time in nanoseconds | +| `FlushNanos`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile flush time in nanoseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `FsyncNanosNumOps` | Total number of fsync | | `FsyncNanosAvgTime` | Average fsync time in nanoseconds | +| `FsyncNanos`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile fsync time in nanoseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `SendDataPacketBlockedOnNetworkNanosNumOps` | Total number of sending packets | | `SendDataPacketBlockedOnNetworkNanosAvgTime` | Average waiting time of sending packets in nanoseconds | +| `SendDataPacketBlockedOnNetworkNanos`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile waiting time of sending packets in nanoseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `SendDataPacketTransferNanosNumOps` | Total number of sending packets | | `SendDataPacketTransferNanosAvgTime` | Average transfer time of sending packets in nanoseconds | +| `SendDataPacketTransferNanos`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile transfer time of sending packets in nanoseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `TotalWriteTime`| Total number of milliseconds spent on write operation | | `TotalReadTime` | Total number of milliseconds spent on read operation | | `RemoteBytesRead` | Number of bytes read by remote clients | @@ -410,23 +423,23 @@ contains tags such as Hostname as additional information along with metrics. | `TotalMetadataOperations` | Total number (monotonically increasing) of metadata operations. Metadata operations include stat, list, mkdir, delete, move, open and posix_fadvise. | | `MetadataOperationRateNumOps` | The number of metadata operations within an interval time of metric | | `MetadataOperationRateAvgTime` | Mean time of metadata operations in milliseconds | -| `MetadataOperationLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of metadata operations latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `MetadataOperationLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of metadata operations latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `TotalDataFileIos` | Total number (monotonically increasing) of data file io operations | | `DataFileIoRateNumOps` | The number of data file io operations within an interval time of metric | | `DataFileIoRateAvgTime` | Mean time of data file io operations in milliseconds | -| `DataFileIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of data file io operations latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `DataFileIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of data file io operations latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `FlushIoRateNumOps` | The number of file flush io operations within an interval time of metric | | `FlushIoRateAvgTime` | Mean time of file flush io operations in milliseconds | -| `FlushIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file flush io operations latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `FlushIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file flush io operations latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `SyncIoRateNumOps` | The number of file sync io operations within an interval time of metric | | `SyncIoRateAvgTime` | Mean time of file sync io operations in milliseconds | -| `SyncIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file sync io operations latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `SyncIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file sync io operations latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `ReadIoRateNumOps` | The number of file read io operations within an interval time of metric | | `ReadIoRateAvgTime` | Mean time of file read io operations in milliseconds | -| `ReadIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file read io operations latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `ReadIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file read io operations latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `WriteIoRateNumOps` | The number of file write io operations within an interval time of metric | | `WriteIoRateAvgTime` | Mean time of file write io operations in milliseconds | -| `WriteIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file write io operations latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `WriteIoLatency`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of file write io operations latency in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `TotalFileIoErrors` | Total number (monotonically increasing) of file io error operations | | `FileIoErrorRateNumOps` | The number of file io error operations within an interval time of metric | | `FileIoErrorRateAvgTime` | It measures the mean time in milliseconds from the start of an operation to hitting a failure | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.1.0/CHANGES.0.1.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.1.0/CHANGELOG.0.1.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.1.0/CHANGES.0.1.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.1.0/CHANGELOG.0.1.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.1.1/CHANGES.0.1.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.1.1/CHANGELOG.0.1.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.1.1/CHANGES.0.1.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.1.1/CHANGELOG.0.1.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.10.0/CHANGES.0.10.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.10.0/CHANGELOG.0.10.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.10.0/CHANGES.0.10.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.10.0/CHANGELOG.0.10.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.10.1/CHANGES.0.10.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.10.1/CHANGELOG.0.10.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.10.1/CHANGES.0.10.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.10.1/CHANGELOG.0.10.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.0/CHANGES.0.11.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.0/CHANGELOG.0.11.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.0/CHANGES.0.11.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.0/CHANGELOG.0.11.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.1/CHANGES.0.11.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.1/CHANGELOG.0.11.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.1/CHANGES.0.11.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.1/CHANGELOG.0.11.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.2/CHANGES.0.11.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.2/CHANGELOG.0.11.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.2/CHANGES.0.11.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.11.2/CHANGELOG.0.11.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.0/CHANGES.0.12.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.0/CHANGELOG.0.12.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.0/CHANGES.0.12.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.0/CHANGELOG.0.12.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.1/CHANGES.0.12.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.1/CHANGELOG.0.12.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.1/CHANGES.0.12.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.1/CHANGELOG.0.12.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.2/CHANGES.0.12.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.2/CHANGELOG.0.12.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.2/CHANGES.0.12.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.2/CHANGELOG.0.12.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.3/CHANGES.0.12.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.3/CHANGELOG.0.12.3.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.3/CHANGES.0.12.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.12.3/CHANGELOG.0.12.3.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.13.0/CHANGES.0.13.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.13.0/CHANGELOG.0.13.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.13.0/CHANGES.0.13.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.13.0/CHANGELOG.0.13.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.0/CHANGES.0.14.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.0/CHANGELOG.0.14.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.0/CHANGES.0.14.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.0/CHANGELOG.0.14.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.1/CHANGES.0.14.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.1/CHANGELOG.0.14.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.1/CHANGES.0.14.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.1/CHANGELOG.0.14.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.2/CHANGES.0.14.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.2/CHANGELOG.0.14.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.2/CHANGES.0.14.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.2/CHANGELOG.0.14.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.3/CHANGES.0.14.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.3/CHANGELOG.0.14.3.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.3/CHANGES.0.14.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.3/CHANGELOG.0.14.3.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.4/CHANGES.0.14.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.4/CHANGELOG.0.14.4.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.4/CHANGES.0.14.4.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.14.4/CHANGELOG.0.14.4.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.0/CHANGES.0.15.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.0/CHANGELOG.0.15.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.0/CHANGES.0.15.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.0/CHANGELOG.0.15.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.1/CHANGES.0.15.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.1/CHANGELOG.0.15.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.1/CHANGES.0.15.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.1/CHANGELOG.0.15.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.2/CHANGES.0.15.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.2/CHANGELOG.0.15.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.2/CHANGES.0.15.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.2/CHANGELOG.0.15.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.3/CHANGES.0.15.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.3/CHANGELOG.0.15.3.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.3/CHANGES.0.15.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.3/CHANGELOG.0.15.3.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.4/CHANGES.0.15.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.4/CHANGELOG.0.15.4.md similarity index 95% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.4/CHANGES.0.15.4.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.4/CHANGELOG.0.15.4.md index f059f17c69b..8a6cae2e8e2 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.4/CHANGES.0.15.4.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.15.4/CHANGELOG.0.15.4.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 0.15.4 - Unreleased (as of 2017-08-28) +## Release 0.15.4 - Unreleased (as of 2018-09-01) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.0/CHANGES.0.16.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.0/CHANGELOG.0.16.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.0/CHANGES.0.16.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.0/CHANGELOG.0.16.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.1/CHANGES.0.16.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.1/CHANGELOG.0.16.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.1/CHANGES.0.16.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.1/CHANGELOG.0.16.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.2/CHANGES.0.16.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.2/CHANGELOG.0.16.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.2/CHANGES.0.16.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.2/CHANGELOG.0.16.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.3/CHANGES.0.16.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.3/CHANGELOG.0.16.3.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.3/CHANGES.0.16.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.3/CHANGELOG.0.16.3.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.4/CHANGES.0.16.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.4/CHANGELOG.0.16.4.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.4/CHANGES.0.16.4.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.16.4/CHANGELOG.0.16.4.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.0/CHANGES.0.17.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.0/CHANGELOG.0.17.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.0/CHANGES.0.17.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.0/CHANGELOG.0.17.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.1/CHANGES.0.17.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.1/CHANGELOG.0.17.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.1/CHANGES.0.17.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.1/CHANGELOG.0.17.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.2/CHANGES.0.17.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.2/CHANGELOG.0.17.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.2/CHANGES.0.17.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.2/CHANGELOG.0.17.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.3/CHANGES.0.17.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.3/CHANGELOG.0.17.3.md similarity index 97% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.3/CHANGES.0.17.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.3/CHANGELOG.0.17.3.md index 5a97d7c4695..14442a9ed19 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.3/CHANGES.0.17.3.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.17.3/CHANGELOG.0.17.3.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 0.17.3 - Unreleased (as of 2017-08-28) +## Release 0.17.3 - Unreleased (as of 2018-09-01) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.0/CHANGES.0.18.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.0/CHANGELOG.0.18.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.0/CHANGES.0.18.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.0/CHANGELOG.0.18.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.1/CHANGES.0.18.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.1/CHANGELOG.0.18.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.1/CHANGES.0.18.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.1/CHANGELOG.0.18.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.2/CHANGES.0.18.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.2/CHANGELOG.0.18.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.2/CHANGES.0.18.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.2/CHANGELOG.0.18.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.3/CHANGES.0.18.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.3/CHANGELOG.0.18.3.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.3/CHANGES.0.18.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.3/CHANGELOG.0.18.3.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.4/CHANGES.0.18.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.4/CHANGELOG.0.18.4.md similarity index 98% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.4/CHANGES.0.18.4.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.4/CHANGELOG.0.18.4.md index 6139716296d..85b9f4b53a2 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.4/CHANGES.0.18.4.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.18.4/CHANGELOG.0.18.4.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 0.18.4 - Unreleased (as of 2017-08-28) +## Release 0.18.4 - Unreleased (as of 2018-09-01) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.0/CHANGES.0.19.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.0/CHANGELOG.0.19.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.0/CHANGES.0.19.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.0/CHANGELOG.0.19.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.1/CHANGES.0.19.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.1/CHANGELOG.0.19.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.1/CHANGES.0.19.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.1/CHANGELOG.0.19.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.2/CHANGES.0.19.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.2/CHANGELOG.0.19.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.2/CHANGES.0.19.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.19.2/CHANGELOG.0.19.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.2.0/CHANGES.0.2.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.2.0/CHANGELOG.0.2.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.2.0/CHANGES.0.2.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.2.0/CHANGELOG.0.2.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.2.1/CHANGES.0.2.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.2.1/CHANGELOG.0.2.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.2.1/CHANGES.0.2.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.2.1/CHANGELOG.0.2.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.0/CHANGES.0.20.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.0/CHANGELOG.0.20.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.0/CHANGES.0.20.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.0/CHANGELOG.0.20.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.1/CHANGES.0.20.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.1/CHANGELOG.0.20.1.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.1/CHANGES.0.20.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.1/CHANGELOG.0.20.1.md index ceccdf5d010..1f915e7447a 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.1/CHANGES.0.20.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.1/CHANGELOG.0.20.1.md @@ -84,7 +84,7 @@ | [HADOOP-5921](https://issues.apache.org/jira/browse/HADOOP-5921) | JobTracker does not come up because of NotReplicatedYetException | Major | . | Amareshwari Sriramadasu | Amar Kamat | | [HADOOP-6017](https://issues.apache.org/jira/browse/HADOOP-6017) | NameNode and SecondaryNameNode fail to restart because of abnormal filenames. | Blocker | . | Raghu Angadi | Tsz Wo Nicholas Sze | | [HADOOP-5920](https://issues.apache.org/jira/browse/HADOOP-5920) | TestJobHistory fails some times. | Major | . | Amareshwari Sriramadasu | Amar Kamat | -| [HDFS-26](https://issues.apache.org/jira/browse/HDFS-26) | HADOOP-5862 for version .20 (Namespace quota exceeded message unclear) | Major | . | Boris Shkolnik | Boris Shkolnik | +| [HDFS-26](https://issues.apache.org/jira/browse/HDFS-26) | HADOOP-5862 for version .20 (Namespace quota exceeded message unclear) | Major | . | Boris Shkolnik | Boris Shkolnik | | [HDFS-438](https://issues.apache.org/jira/browse/HDFS-438) | Improve help message for quotas | Minor | . | Raghu Angadi | Raghu Angadi | | [MAPREDUCE-2](https://issues.apache.org/jira/browse/MAPREDUCE-2) | ArrayOutOfIndex error in KeyFieldBasedPartitioner on empty key | Major | . | Amar Kamat | Amar Kamat | | [MAPREDUCE-130](https://issues.apache.org/jira/browse/MAPREDUCE-130) | Delete the jobconf copy from the log directory of the JobTracker when the job is retired | Major | . | Devaraj Das | Amar Kamat | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.2/CHANGES.0.20.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.2/CHANGELOG.0.20.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.2/CHANGES.0.20.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.2/CHANGELOG.0.20.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.0/CHANGES.0.20.203.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.0/CHANGELOG.0.20.203.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.0/CHANGES.0.20.203.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.0/CHANGELOG.0.20.203.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.1/CHANGES.0.20.203.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.1/CHANGELOG.0.20.203.1.md similarity index 96% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.1/CHANGES.0.20.203.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.1/CHANGELOG.0.20.203.1.md index 66bce3fcdc8..036b16eb246 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.1/CHANGES.0.20.203.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.203.1/CHANGELOG.0.20.203.1.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 0.20.203.1 - Unreleased (as of 2017-08-28) +## Release 0.20.203.1 - Unreleased (as of 2018-09-01) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.204.0/CHANGES.0.20.204.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.204.0/CHANGELOG.0.20.204.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.204.0/CHANGES.0.20.204.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.204.0/CHANGELOG.0.20.204.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.205.0/CHANGES.0.20.205.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.205.0/CHANGELOG.0.20.205.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.205.0/CHANGES.0.20.205.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.205.0/CHANGELOG.0.20.205.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.3/CHANGES.0.20.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.3/CHANGELOG.0.20.3.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.3/CHANGES.0.20.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.3/CHANGELOG.0.20.3.md index 82644f4faa8..20e5aadeda9 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.3/CHANGES.0.20.3.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.20.3/CHANGELOG.0.20.3.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 0.20.3 - Unreleased (as of 2017-08-28) +## Release 0.20.3 - Unreleased (as of 2018-09-01) ### INCOMPATIBLE CHANGES: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.0/CHANGES.0.21.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.0/CHANGELOG.0.21.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.0/CHANGES.0.21.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.0/CHANGELOG.0.21.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.1/CHANGES.0.21.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.1/CHANGELOG.0.21.1.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.1/CHANGES.0.21.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.1/CHANGELOG.0.21.1.md index dcb5f6f50b6..45ca6594db8 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.1/CHANGES.0.21.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.21.1/CHANGELOG.0.21.1.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 0.21.1 - Unreleased (as of 2017-08-28) +## Release 0.21.1 - Unreleased (as of 2018-09-01) ### INCOMPATIBLE CHANGES: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.0/CHANGES.0.22.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.0/CHANGELOG.0.22.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.0/CHANGES.0.22.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.0/CHANGELOG.0.22.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.1/CHANGES.0.22.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.1/CHANGELOG.0.22.1.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.1/CHANGES.0.22.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.1/CHANGELOG.0.22.1.md index 2346cad4693..00446cf79db 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.1/CHANGES.0.22.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.22.1/CHANGELOG.0.22.1.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 0.22.1 - Unreleased (as of 2017-08-28) +## Release 0.22.1 - Unreleased (as of 2018-09-01) ### INCOMPATIBLE CHANGES: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.0/CHANGES.0.23.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.0/CHANGELOG.0.23.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.0/CHANGES.0.23.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.0/CHANGELOG.0.23.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.1/CHANGES.0.23.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.1/CHANGELOG.0.23.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.1/CHANGES.0.23.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.1/CHANGELOG.0.23.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.10/CHANGES.0.23.10.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.10/CHANGELOG.0.23.10.md similarity index 98% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.10/CHANGES.0.23.10.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.10/CHANGELOG.0.23.10.md index c94968f58da..f88c6f95cbb 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.10/CHANGES.0.23.10.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.10/CHANGELOG.0.23.10.md @@ -97,6 +97,7 @@ | [HDFS-5557](https://issues.apache.org/jira/browse/HDFS-5557) | Write pipeline recovery for the last packet in the block may cause rejection of valid replicas | Critical | . | Kihwal Lee | Kihwal Lee | | [HDFS-5526](https://issues.apache.org/jira/browse/HDFS-5526) | Datanode cannot roll back to previous layout version | Blocker | datanode | Tsz Wo Nicholas Sze | Kihwal Lee | | [HDFS-5558](https://issues.apache.org/jira/browse/HDFS-5558) | LeaseManager monitor thread can crash if the last block is complete but another block is not. | Major | . | Kihwal Lee | Kihwal Lee | +| [HDFS-5357](https://issues.apache.org/jira/browse/HDFS-5357) | TestFileSystemAccessService failures in JDK7 | Major | . | Robert Parker | Robert Parker | ### TESTS: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.11/CHANGES.0.23.11.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.11/CHANGELOG.0.23.11.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.11/CHANGES.0.23.11.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.11/CHANGELOG.0.23.11.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.2/CHANGES.0.23.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.2/CHANGELOG.0.23.2.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.2/CHANGES.0.23.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.2/CHANGELOG.0.23.2.md index 5f1ac095fc9..ca3fbb9f06b 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.2/CHANGES.0.23.2.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.2/CHANGELOG.0.23.2.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 0.23.2 - Unreleased (as of 2017-08-28) +## Release 0.23.2 - Unreleased (as of 2018-09-01) ### INCOMPATIBLE CHANGES: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.3/CHANGES.0.23.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.3/CHANGELOG.0.23.3.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.3/CHANGES.0.23.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.3/CHANGELOG.0.23.3.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.4/CHANGES.0.23.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.4/CHANGELOG.0.23.4.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.4/CHANGES.0.23.4.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.4/CHANGELOG.0.23.4.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.5/CHANGES.0.23.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.5/CHANGELOG.0.23.5.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.5/CHANGES.0.23.5.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.5/CHANGELOG.0.23.5.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.6/CHANGES.0.23.6.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.6/CHANGELOG.0.23.6.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.6/CHANGES.0.23.6.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.6/CHANGELOG.0.23.6.md index c6fe9ae8e33..eadf9617e8f 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.6/CHANGES.0.23.6.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.6/CHANGELOG.0.23.6.md @@ -35,7 +35,7 @@ | [MAPREDUCE-4845](https://issues.apache.org/jira/browse/MAPREDUCE-4845) | ClusterStatus.getMaxMemory() and getUsedMemory() exist in MR1 but not MR2 | Major | client | Sandy Ryza | Sandy Ryza | | [YARN-285](https://issues.apache.org/jira/browse/YARN-285) | RM should be able to provide a tracking link for apps that have already been purged | Major | . | Derek Dagit | Derek Dagit | | [MAPREDUCE-4899](https://issues.apache.org/jira/browse/MAPREDUCE-4899) | Provide a plugin to the Yarn Web App Proxy to generate tracking links for M/R appllications given the ID | Major | . | Derek Dagit | Derek Dagit | -| [MAPREDUCE-4810](https://issues.apache.org/jira/browse/MAPREDUCE-4810) | Add admin command options for ApplicationMaster | Minor | applicationmaster | Jason Lowe | Jerry Chen | +| [MAPREDUCE-4810](https://issues.apache.org/jira/browse/MAPREDUCE-4810) | Add admin command options for ApplicationMaster | Minor | applicationmaster | Jason Lowe | Haifeng Chen | ### BUG FIXES: @@ -84,7 +84,7 @@ | [HADOOP-9181](https://issues.apache.org/jira/browse/HADOOP-9181) | Set daemon flag for HttpServer's QueuedThreadPool | Major | . | Liang Xie | Liang Xie | | [YARN-320](https://issues.apache.org/jira/browse/YARN-320) | RM should always be able to renew its own tokens | Blocker | resourcemanager | Daryn Sharp | Daryn Sharp | | [YARN-325](https://issues.apache.org/jira/browse/YARN-325) | RM CapacityScheduler can deadlock when getQueueInfo() is called and a container is completing | Blocker | capacityscheduler | Jason Lowe | Arun C Murthy | -| [MAPREDUCE-4848](https://issues.apache.org/jira/browse/MAPREDUCE-4848) | TaskAttemptContext cast error during AM recovery | Major | mr-am | Jason Lowe | Jerry Chen | +| [MAPREDUCE-4848](https://issues.apache.org/jira/browse/MAPREDUCE-4848) | TaskAttemptContext cast error during AM recovery | Major | mr-am | Jason Lowe | Haifeng Chen | | [HADOOP-9097](https://issues.apache.org/jira/browse/HADOOP-9097) | Maven RAT plugin is not checking all source files | Critical | build | Tom White | Thomas Graves | | [HDFS-4385](https://issues.apache.org/jira/browse/HDFS-4385) | Maven RAT plugin is not checking all source files | Critical | build | Thomas Graves | Thomas Graves | | [MAPREDUCE-4934](https://issues.apache.org/jira/browse/MAPREDUCE-4934) | Maven RAT plugin is not checking all source files | Critical | build | Thomas Graves | Thomas Graves | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.7/CHANGES.0.23.7.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.7/CHANGELOG.0.23.7.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.7/CHANGES.0.23.7.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.7/CHANGELOG.0.23.7.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.8/CHANGES.0.23.8.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.8/CHANGELOG.0.23.8.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.8/CHANGES.0.23.8.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.8/CHANGELOG.0.23.8.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.9/CHANGES.0.23.9.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.9/CHANGELOG.0.23.9.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.9/CHANGES.0.23.9.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.23.9/CHANGELOG.0.23.9.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.24.0/CHANGES.0.24.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.24.0/CHANGELOG.0.24.0.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.24.0/CHANGES.0.24.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.24.0/CHANGELOG.0.24.0.md index a419c45055c..805ec19ede6 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.24.0/CHANGES.0.24.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.24.0/CHANGELOG.0.24.0.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 0.24.0 - Unreleased (as of 2017-08-28) +## Release 0.24.0 - Unreleased (as of 2018-09-02) ### INCOMPATIBLE CHANGES: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.0/CHANGES.0.3.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.0/CHANGELOG.0.3.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.0/CHANGES.0.3.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.0/CHANGELOG.0.3.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.1/CHANGES.0.3.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.1/CHANGELOG.0.3.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.1/CHANGES.0.3.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.1/CHANGELOG.0.3.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.2/CHANGES.0.3.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.2/CHANGELOG.0.3.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.2/CHANGES.0.3.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.3.2/CHANGELOG.0.3.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.4.0/CHANGES.0.4.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.4.0/CHANGELOG.0.4.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.4.0/CHANGES.0.4.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.4.0/CHANGELOG.0.4.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.5.0/CHANGES.0.5.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.5.0/CHANGELOG.0.5.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.5.0/CHANGES.0.5.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.5.0/CHANGELOG.0.5.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.0/CHANGES.0.6.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.0/CHANGELOG.0.6.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.0/CHANGES.0.6.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.0/CHANGELOG.0.6.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.1/CHANGES.0.6.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.1/CHANGELOG.0.6.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.1/CHANGES.0.6.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.1/CHANGELOG.0.6.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.2/CHANGES.0.6.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.2/CHANGELOG.0.6.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.2/CHANGES.0.6.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.6.2/CHANGELOG.0.6.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.0/CHANGES.0.7.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.0/CHANGELOG.0.7.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.0/CHANGES.0.7.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.0/CHANGELOG.0.7.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.1/CHANGES.0.7.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.1/CHANGELOG.0.7.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.1/CHANGES.0.7.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.1/CHANGELOG.0.7.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.2/CHANGES.0.7.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.2/CHANGELOG.0.7.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.2/CHANGES.0.7.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.7.2/CHANGELOG.0.7.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.8.0/CHANGES.0.8.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.8.0/CHANGELOG.0.8.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.8.0/CHANGES.0.8.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.8.0/CHANGELOG.0.8.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.0/CHANGES.0.9.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.0/CHANGELOG.0.9.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.0/CHANGES.0.9.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.0/CHANGELOG.0.9.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.1/CHANGES.0.9.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.1/CHANGELOG.0.9.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.1/CHANGES.0.9.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.1/CHANGELOG.0.9.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.2/CHANGES.0.9.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.2/CHANGELOG.0.9.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.2/CHANGES.0.9.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/0.9.2/CHANGELOG.0.9.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.0/CHANGES.1.0.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.0/CHANGELOG.1.0.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.0/CHANGES.1.0.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.0/CHANGELOG.1.0.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.1/CHANGES.1.0.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.1/CHANGELOG.1.0.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.1/CHANGES.1.0.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.1/CHANGELOG.1.0.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.2/CHANGES.1.0.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.2/CHANGELOG.1.0.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.2/CHANGES.1.0.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.2/CHANGELOG.1.0.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.3/CHANGES.1.0.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.3/CHANGELOG.1.0.3.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.3/CHANGES.1.0.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.3/CHANGELOG.1.0.3.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.4/CHANGES.1.0.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.4/CHANGELOG.1.0.4.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.4/CHANGES.1.0.4.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.0.4/CHANGELOG.1.0.4.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.0/CHANGES.1.1.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.0/CHANGELOG.1.1.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.0/CHANGES.1.1.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.0/CHANGELOG.1.1.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.1/CHANGES.1.1.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.1/CHANGELOG.1.1.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.1/CHANGES.1.1.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.1/CHANGELOG.1.1.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.2/CHANGES.1.1.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.2/CHANGELOG.1.1.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.2/CHANGES.1.1.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.2/CHANGELOG.1.1.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.3/CHANGES.1.1.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.3/CHANGELOG.1.1.3.md similarity index 95% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.3/CHANGES.1.1.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.3/CHANGELOG.1.1.3.md index 0944cef0a6e..35918b7931e 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.3/CHANGES.1.1.3.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.1.3/CHANGELOG.1.1.3.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 1.1.3 - Unreleased (as of 2017-08-28) +## Release 1.1.3 - Unreleased (as of 2018-09-02) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.0/CHANGES.1.2.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.0/CHANGELOG.1.2.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.0/CHANGES.1.2.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.0/CHANGELOG.1.2.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.1/CHANGES.1.2.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.1/CHANGELOG.1.2.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.1/CHANGES.1.2.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.1/CHANGELOG.1.2.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.2/CHANGES.1.2.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.2/CHANGELOG.1.2.2.md similarity index 96% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.2/CHANGES.1.2.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.2/CHANGELOG.1.2.2.md index cd67c45e242..ba945471d52 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.2/CHANGES.1.2.2.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.2.2/CHANGELOG.1.2.2.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 1.2.2 - Unreleased (as of 2017-08-28) +## Release 1.2.2 - Unreleased (as of 2018-09-02) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.3.0/CHANGES.1.3.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.3.0/CHANGELOG.1.3.0.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/1.3.0/CHANGES.1.3.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/1.3.0/CHANGELOG.1.3.0.md index f63021f63da..7ab7dcff3c4 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/1.3.0/CHANGES.1.3.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/1.3.0/CHANGELOG.1.3.0.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 1.3.0 - Unreleased (as of 2017-08-28) +## Release 1.3.0 - Unreleased (as of 2018-09-02) ### INCOMPATIBLE CHANGES: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.0-alpha/CHANGES.2.0.0-alpha.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.0-alpha/CHANGELOG.2.0.0-alpha.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.0-alpha/CHANGES.2.0.0-alpha.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.0-alpha/CHANGELOG.2.0.0-alpha.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.1-alpha/CHANGES.2.0.1-alpha.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.1-alpha/CHANGELOG.2.0.1-alpha.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.1-alpha/CHANGES.2.0.1-alpha.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.1-alpha/CHANGELOG.2.0.1-alpha.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.2-alpha/CHANGES.2.0.2-alpha.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.2-alpha/CHANGELOG.2.0.2-alpha.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.2-alpha/CHANGES.2.0.2-alpha.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.2-alpha/CHANGELOG.2.0.2-alpha.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.3-alpha/CHANGES.2.0.3-alpha.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.3-alpha/CHANGELOG.2.0.3-alpha.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.3-alpha/CHANGES.2.0.3-alpha.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.3-alpha/CHANGELOG.2.0.3-alpha.md index 85084850f91..31e0ff54e84 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.3-alpha/CHANGES.2.0.3-alpha.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.3-alpha/CHANGELOG.2.0.3-alpha.md @@ -45,7 +45,6 @@ |:---- |:---- | :--- |:---- |:---- |:---- | | [HADOOP-8597](https://issues.apache.org/jira/browse/HADOOP-8597) | FsShell's Text command should be able to read avro data files | Major | fs | Harsh J | Ivan Vladimirov Ivanov | | [MAPREDUCE-3678](https://issues.apache.org/jira/browse/MAPREDUCE-3678) | The Map tasks logs should have the value of input split it processed | Major | mrv1, mrv2 | Bejoy KS | Harsh J | -| [YARN-146](https://issues.apache.org/jira/browse/YARN-146) | Add unit tests for computing fair share in the fair scheduler | Major | resourcemanager | Sandy Ryza | Sandy Ryza | | [YARN-145](https://issues.apache.org/jira/browse/YARN-145) | Add a Web UI to the fair share scheduler | Major | resourcemanager | Sandy Ryza | Sandy Ryza | | [YARN-187](https://issues.apache.org/jira/browse/YARN-187) | Add hierarchical queues to the fair scheduler | Major | scheduler | Sandy Ryza | Sandy Ryza | | [HDFS-4213](https://issues.apache.org/jira/browse/HDFS-4213) | When the client calls hsync, allows the client to update the file length in the NameNode | Major | hdfs-client, namenode | Jing Zhao | Jing Zhao | @@ -154,7 +153,7 @@ | [HADOOP-9162](https://issues.apache.org/jira/browse/HADOOP-9162) | Add utility to check native library availability | Minor | native | Binglin Chang | Binglin Chang | | [YARN-315](https://issues.apache.org/jira/browse/YARN-315) | Use security token protobuf definition from hadoop common | Major | . | Suresh Srinivas | Suresh Srinivas | | [MAPREDUCE-4899](https://issues.apache.org/jira/browse/MAPREDUCE-4899) | Provide a plugin to the Yarn Web App Proxy to generate tracking links for M/R appllications given the ID | Major | . | Derek Dagit | Derek Dagit | -| [MAPREDUCE-4810](https://issues.apache.org/jira/browse/MAPREDUCE-4810) | Add admin command options for ApplicationMaster | Minor | applicationmaster | Jason Lowe | Jerry Chen | +| [MAPREDUCE-4810](https://issues.apache.org/jira/browse/MAPREDUCE-4810) | Add admin command options for ApplicationMaster | Minor | applicationmaster | Jason Lowe | Haifeng Chen | | [HADOOP-9118](https://issues.apache.org/jira/browse/HADOOP-9118) | FileSystemContractBaseTest test data for read/write isn't rigorous enough | Trivial | test | Steve Loughran | | | [MAPREDUCE-4907](https://issues.apache.org/jira/browse/MAPREDUCE-4907) | TrackerDistributedCacheManager issues too many getFileStatus calls | Major | mrv1, tasktracker | Sandy Ryza | Sandy Ryza | | [HDFS-4381](https://issues.apache.org/jira/browse/HDFS-4381) | Document fsimage format details in FSImageFormat class javadoc | Major | namenode | Jing Zhao | Jing Zhao | @@ -432,7 +431,7 @@ | [YARN-325](https://issues.apache.org/jira/browse/YARN-325) | RM CapacityScheduler can deadlock when getQueueInfo() is called and a container is completing | Blocker | capacityscheduler | Jason Lowe | Arun C Murthy | | [HDFS-4363](https://issues.apache.org/jira/browse/HDFS-4363) | Combine PBHelper and HdfsProtoUtil and remove redundant methods | Major | . | Suresh Srinivas | Suresh Srinivas | | [HDFS-4306](https://issues.apache.org/jira/browse/HDFS-4306) | PBHelper.convertLocatedBlock miss convert BlockToken | Major | . | Binglin Chang | Binglin Chang | -| [MAPREDUCE-4848](https://issues.apache.org/jira/browse/MAPREDUCE-4848) | TaskAttemptContext cast error during AM recovery | Major | mr-am | Jason Lowe | Jerry Chen | +| [MAPREDUCE-4848](https://issues.apache.org/jira/browse/MAPREDUCE-4848) | TaskAttemptContext cast error during AM recovery | Major | mr-am | Jason Lowe | Haifeng Chen | | [HADOOP-9155](https://issues.apache.org/jira/browse/HADOOP-9155) | FsPermission should have different default value, 777 for directory and 666 for file | Minor | . | Binglin Chang | Binglin Chang | | [HADOOP-9183](https://issues.apache.org/jira/browse/HADOOP-9183) | Potential deadlock in ActiveStandbyElector | Major | ha | Tom White | Tom White | | [HDFS-4377](https://issues.apache.org/jira/browse/HDFS-4377) | Some trivial DN comment cleanup | Trivial | . | Eli Collins | Eli Collins | @@ -486,7 +485,7 @@ | [HDFS-4404](https://issues.apache.org/jira/browse/HDFS-4404) | Create file failure when the machine of first attempted NameNode is down | Critical | ha, hdfs-client | liaowenrui | Todd Lipcon | | [HDFS-4344](https://issues.apache.org/jira/browse/HDFS-4344) | dfshealth.jsp throws NumberFormatException when dfs.hosts/dfs.hosts.exclude includes port number | Major | namenode | tamtam180 | Andy Isaacson | | [MAPREDUCE-4953](https://issues.apache.org/jira/browse/MAPREDUCE-4953) | HadoopPipes misuses fprintf | Major | pipes | Andy Isaacson | Andy Isaacson | -| [HADOOP-9260](https://issues.apache.org/jira/browse/HADOOP-9260) | Hadoop version may be not correct when starting name node or data node | Critical | . | Jerry Chen | Chris Nauroth | +| [HADOOP-9260](https://issues.apache.org/jira/browse/HADOOP-9260) | Hadoop version may be not correct when starting name node or data node | Critical | . | Haifeng Chen | Chris Nauroth | | [HADOOP-9278](https://issues.apache.org/jira/browse/HADOOP-9278) | HarFileSystem may leak file handle | Major | fs | Chris Nauroth | Chris Nauroth | | [HDFS-4468](https://issues.apache.org/jira/browse/HDFS-4468) | Fix TestHDFSCLI and TestQuota for HADOOP-9252 | Minor | . | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | | [YARN-357](https://issues.apache.org/jira/browse/YARN-357) | App submission should not be synchronized | Major | resourcemanager | Daryn Sharp | Daryn Sharp | @@ -502,6 +501,7 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | | [HDFS-4007](https://issues.apache.org/jira/browse/HDFS-4007) | Rehabilitate bit-rotted unit tests under hadoop-hdfs-project/hadoop-hdfs/src/test/unit/ | Minor | test | Colin P. McCabe | Colin P. McCabe | +| [YARN-146](https://issues.apache.org/jira/browse/YARN-146) | Add unit tests for computing fair share in the fair scheduler | Major | resourcemanager | Sandy Ryza | Sandy Ryza | | [HADOOP-9042](https://issues.apache.org/jira/browse/HADOOP-9042) | Add a test for umask in FileSystemContractBaseTest | Minor | . | Colin P. McCabe | Colin P. McCabe | | [HADOOP-9038](https://issues.apache.org/jira/browse/HADOOP-9038) | provide unit-test coverage of class org.apache.hadoop.fs.LocalDirAllocator.AllocatorPerContext.PathIterator | Minor | . | Ivan A. Veselovsky | Ivan A. Veselovsky | | [HDFS-4199](https://issues.apache.org/jira/browse/HDFS-4199) | Provide test for HdfsVolumeId | Minor | . | Ivan A. Veselovsky | Ivan A. Veselovsky | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.4-alpha/CHANGES.2.0.4-alpha.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.4-alpha/CHANGELOG.2.0.4-alpha.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.4-alpha/CHANGES.2.0.4-alpha.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.4-alpha/CHANGELOG.2.0.4-alpha.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.5-alpha/CHANGES.2.0.5-alpha.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.5-alpha/CHANGELOG.2.0.5-alpha.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.5-alpha/CHANGES.2.0.5-alpha.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.5-alpha/CHANGELOG.2.0.5-alpha.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.6-alpha/CHANGES.2.0.6-alpha.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.6-alpha/CHANGELOG.2.0.6-alpha.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.6-alpha/CHANGES.2.0.6-alpha.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.0.6-alpha/CHANGELOG.2.0.6-alpha.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.1.0-beta/CHANGES.2.1.0-beta.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.1.0-beta/CHANGELOG.2.1.0-beta.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.1.0-beta/CHANGES.2.1.0-beta.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.1.0-beta/CHANGELOG.2.1.0-beta.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.1.1-beta/CHANGES.2.1.1-beta.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.1.1-beta/CHANGELOG.2.1.1-beta.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.1.1-beta/CHANGES.2.1.1-beta.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.1.1-beta/CHANGELOG.2.1.1-beta.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.10.0/CHANGELOG.2.10.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.10.0/CHANGELOG.2.10.0.md new file mode 100644 index 00000000000..03af8e5716e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.10.0/CHANGELOG.2.10.0.md @@ -0,0 +1,484 @@ + + +# Apache Hadoop Changelog + +## Release 2.10.0 - Unreleased (as of 2018-09-02) + +### INCOMPATIBLE CHANGES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-12883](https://issues.apache.org/jira/browse/HDFS-12883) | RBF: Document Router and State Store metrics | Major | documentation | Yiqun Lin | Yiqun Lin | +| [HDFS-12895](https://issues.apache.org/jira/browse/HDFS-12895) | RBF: Add ACL support for mount table | Major | . | Yiqun Lin | Yiqun Lin | +| [HDFS-13099](https://issues.apache.org/jira/browse/HDFS-13099) | RBF: Use the ZooKeeper as the default State Store | Minor | documentation | Yiqun Lin | Yiqun Lin | + + +### IMPORTANT ISSUES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13083](https://issues.apache.org/jira/browse/HDFS-13083) | RBF: Fix doc error setting up client | Major | federation | tartarus | tartarus | + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13283](https://issues.apache.org/jira/browse/HDFS-13283) | Percentage based Reserved Space Calculation for DataNode | Major | datanode, hdfs | Lukas Majercak | Lukas Majercak | +| [HDFS-13553](https://issues.apache.org/jira/browse/HDFS-13553) | RBF: Support global quota | Major | . | Íñigo Goiri | Yiqun Lin | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-14987](https://issues.apache.org/jira/browse/HADOOP-14987) | Improve KMSClientProvider log around delegation token checking | Major | . | Xiaoyu Yao | Xiaoyu Yao | +| [HADOOP-14872](https://issues.apache.org/jira/browse/HADOOP-14872) | CryptoInputStream should implement unbuffer | Major | fs, security | John Zhuge | John Zhuge | +| [HADOOP-14960](https://issues.apache.org/jira/browse/HADOOP-14960) | Add GC time percentage monitor/alerter | Major | . | Misha Dmitriev | Misha Dmitriev | +| [HADOOP-15023](https://issues.apache.org/jira/browse/HADOOP-15023) | ValueQueue should also validate (lowWatermark \* numValues) \> 0 on construction | Minor | . | Xiao Chen | Xiao Chen | +| [YARN-6851](https://issues.apache.org/jira/browse/YARN-6851) | Capacity Scheduler: document configs for controlling # containers allowed to be allocated per node heartbeat | Minor | . | Wei Yan | Wei Yan | +| [YARN-7495](https://issues.apache.org/jira/browse/YARN-7495) | Improve robustness of the AggregatedLogDeletionService | Major | log-aggregation | Jonathan Eagles | Jonathan Eagles | +| [HADOOP-15056](https://issues.apache.org/jira/browse/HADOOP-15056) | Fix TestUnbuffer#testUnbufferException failure | Minor | test | Jack Bearden | Jack Bearden | +| [HADOOP-15012](https://issues.apache.org/jira/browse/HADOOP-15012) | Add readahead, dropbehind, and unbuffer to StreamCapabilities | Major | fs | John Zhuge | John Zhuge | +| [HADOOP-15104](https://issues.apache.org/jira/browse/HADOOP-15104) | AliyunOSS: change the default value of max error retry | Major | fs/oss | wujinhu | wujinhu | +| [YARN-7642](https://issues.apache.org/jira/browse/YARN-7642) | Add test case to verify context update after container promotion or demotion with or without auto update | Minor | nodemanager | Weiwei Yang | Weiwei Yang | +| [HADOOP-15111](https://issues.apache.org/jira/browse/HADOOP-15111) | AliyunOSS: backport HADOOP-14993 to branch-2 | Major | fs/oss | Genmao Yu | Genmao Yu | +| [HDFS-12818](https://issues.apache.org/jira/browse/HDFS-12818) | Support multiple storages in DataNodeCluster / SimulatedFSDataset | Minor | datanode, test | Erik Krogen | Erik Krogen | +| [HDFS-9023](https://issues.apache.org/jira/browse/HDFS-9023) | When NN is not able to identify DN for replication, reason behind it can be logged | Critical | hdfs-client, namenode | Surendra Singh Lilhore | Xiao Chen | +| [YARN-7678](https://issues.apache.org/jira/browse/YARN-7678) | Ability to enable logging of container memory stats | Major | nodemanager | Jim Brennan | Jim Brennan | +| [HDFS-12945](https://issues.apache.org/jira/browse/HDFS-12945) | Switch to ClientProtocol instead of NamenodeProtocols in NamenodeWebHdfsMethods | Minor | . | Wei Yan | Wei Yan | +| [YARN-7622](https://issues.apache.org/jira/browse/YARN-7622) | Allow fair-scheduler configuration on HDFS | Minor | fairscheduler, resourcemanager | Greg Phillips | Greg Phillips | +| [YARN-7590](https://issues.apache.org/jira/browse/YARN-7590) | Improve container-executor validation check | Major | security, yarn | Eric Yang | Eric Yang | +| [MAPREDUCE-7029](https://issues.apache.org/jira/browse/MAPREDUCE-7029) | FileOutputCommitter is slow on filesystems lacking recursive delete | Minor | . | Karthik Palaniappan | Karthik Palaniappan | +| [MAPREDUCE-6984](https://issues.apache.org/jira/browse/MAPREDUCE-6984) | MR AM to clean up temporary files from previous attempt in case of no recovery | Major | applicationmaster | Gergo Repas | Gergo Repas | +| [HADOOP-15189](https://issues.apache.org/jira/browse/HADOOP-15189) | backport HADOOP-15039 to branch-2 and branch-3 | Blocker | . | Genmao Yu | Genmao Yu | +| [HADOOP-15212](https://issues.apache.org/jira/browse/HADOOP-15212) | Add independent secret manager method for logging expired tokens | Major | security | Daryn Sharp | Daryn Sharp | +| [YARN-7728](https://issues.apache.org/jira/browse/YARN-7728) | Expose container preemptions related information in Capacity Scheduler queue metrics | Major | . | Eric Payne | Eric Payne | +| [MAPREDUCE-7048](https://issues.apache.org/jira/browse/MAPREDUCE-7048) | Uber AM can crash due to unknown task in statusUpdate | Major | mr-am | Peter Bacsko | Peter Bacsko | +| [HADOOP-13972](https://issues.apache.org/jira/browse/HADOOP-13972) | ADLS to support per-store configuration | Major | fs/adl | John Zhuge | Sharad Sonker | +| [YARN-7813](https://issues.apache.org/jira/browse/YARN-7813) | Capacity Scheduler Intra-queue Preemption should be configurable for each queue | Major | capacity scheduler, scheduler preemption | Eric Payne | Eric Payne | +| [HADOOP-15235](https://issues.apache.org/jira/browse/HADOOP-15235) | Authentication Tokens should use HMAC instead of MAC | Major | security | Robert Kanter | Robert Kanter | +| [HDFS-11187](https://issues.apache.org/jira/browse/HDFS-11187) | Optimize disk access for last partial chunk checksum of Finalized replica | Major | datanode | Wei-Chiu Chuang | Gabor Bota | +| [HADOOP-15266](https://issues.apache.org/jira/browse/HADOOP-15266) | [branch-2] Upper/Lower case conversion support for group names in LdapGroupsMapping | Major | . | Nanda kumar | Nanda kumar | +| [HADOOP-15279](https://issues.apache.org/jira/browse/HADOOP-15279) | increase maven heap size recommendations | Minor | build, documentation, test | Allen Wittenauer | Allen Wittenauer | +| [HDFS-12884](https://issues.apache.org/jira/browse/HDFS-12884) | BlockUnderConstructionFeature.truncateBlock should be of type BlockInfo | Major | namenode | Konstantin Shvachko | chencan | +| [HADOOP-15334](https://issues.apache.org/jira/browse/HADOOP-15334) | Upgrade Maven surefire plugin | Major | build | Arpit Agarwal | Arpit Agarwal | +| [HADOOP-15312](https://issues.apache.org/jira/browse/HADOOP-15312) | Undocumented KeyProvider configuration keys | Major | . | Wei-Chiu Chuang | LiXin Ge | +| [YARN-7623](https://issues.apache.org/jira/browse/YARN-7623) | Fix the CapacityScheduler Queue configuration documentation | Major | . | Arun Suresh | Jonathan Hung | +| [HDFS-13314](https://issues.apache.org/jira/browse/HDFS-13314) | NameNode should optionally exit if it detects FsImage corruption | Major | namenode | Arpit Agarwal | Arpit Agarwal | +| [HDFS-13418](https://issues.apache.org/jira/browse/HDFS-13418) | NetworkTopology should be configurable when enable DFSNetworkTopology | Major | . | Tao Jie | Tao Jie | +| [HADOOP-15394](https://issues.apache.org/jira/browse/HADOOP-15394) | Backport PowerShell NodeFencer HADOOP-14309 to branch-2 | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13462](https://issues.apache.org/jira/browse/HDFS-13462) | Add BIND\_HOST configuration for JournalNode's HTTP and RPC Servers | Major | hdfs, journal-node | Lukas Majercak | Lukas Majercak | +| [HDFS-13492](https://issues.apache.org/jira/browse/HDFS-13492) | Limit httpfs binds to certain IP addresses in branch-2 | Major | httpfs | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-12981](https://issues.apache.org/jira/browse/HDFS-12981) | renameSnapshot a Non-Existent snapshot to itself should throw error | Minor | hdfs | Sailesh Patel | Kitti Nanasi | +| [HADOOP-15441](https://issues.apache.org/jira/browse/HADOOP-15441) | Log kms url and token service at debug level. | Minor | . | Wei-Chiu Chuang | Gabor Bota | +| [YARN-8249](https://issues.apache.org/jira/browse/YARN-8249) | Few REST api's in RMWebServices are missing static user check | Critical | webapp, yarn | Sunil Govindan | Sunil Govindan | +| [HADOOP-15486](https://issues.apache.org/jira/browse/HADOOP-15486) | Make NetworkTopology#netLock fair | Major | net | Nanda kumar | Nanda kumar | +| [HADOOP-15449](https://issues.apache.org/jira/browse/HADOOP-15449) | Increase default timeout of ZK session to avoid frequent NameNode failover | Critical | common | Karthik Palanisamy | Karthik Palanisamy | +| [HDFS-13602](https://issues.apache.org/jira/browse/HDFS-13602) | Add checkOperation(WRITE) checks in FSNamesystem | Major | ha, namenode | Erik Krogen | Chao Sun | +| [HDFS-13644](https://issues.apache.org/jira/browse/HDFS-13644) | Backport HDFS-10376 to branch-2 | Major | . | Yiqun Lin | Zsolt Venczel | +| [HDFS-13653](https://issues.apache.org/jira/browse/HDFS-13653) | Make dfs.client.failover.random.order a per nameservice configuration | Major | federation | Ekanth Sethuramalingam | Ekanth Sethuramalingam | +| [HDFS-13714](https://issues.apache.org/jira/browse/HDFS-13714) | Fix TestNameNodePrunesMissingStorages test failures on Windows | Major | hdfs, namenode, test | Lukas Majercak | Lukas Majercak | +| [HDFS-13719](https://issues.apache.org/jira/browse/HDFS-13719) | Docs around dfs.image.transfer.timeout are misleading | Major | . | Kitti Nanasi | Kitti Nanasi | +| [HDFS-11060](https://issues.apache.org/jira/browse/HDFS-11060) | make DEFAULT\_MAX\_CORRUPT\_FILEBLOCKS\_RETURNED configurable | Minor | hdfs | Lantao Jin | Lantao Jin | +| [YARN-8155](https://issues.apache.org/jira/browse/YARN-8155) | Improve ATSv2 client logging in RM and NM publisher | Major | . | Rohith Sharma K S | Abhishek Modi | +| [HDFS-13814](https://issues.apache.org/jira/browse/HDFS-13814) | Remove super user privilege requirement for NameNode.getServiceStatus | Minor | namenode | Chao Sun | Chao Sun | +| [YARN-8559](https://issues.apache.org/jira/browse/YARN-8559) | Expose mutable-conf scheduler's configuration in RM /scheduler-conf endpoint | Major | resourcemanager | Anna Savarin | Weiwei Yang | +| [HDFS-13813](https://issues.apache.org/jira/browse/HDFS-13813) | Exit NameNode if dangling child inode is detected when saving FsImage | Major | hdfs, namenode | Siyao Meng | Siyao Meng | +| [HDFS-13821](https://issues.apache.org/jira/browse/HDFS-13821) | RBF: Add dfs.federation.router.mount-table.cache.enable so that users can disable cache | Major | hdfs | Fei Hui | Fei Hui | +| [HADOOP-15689](https://issues.apache.org/jira/browse/HADOOP-15689) | Add "\*.patch" into .gitignore file of branch-2 | Major | . | Rui Gao | Rui Gao | +| [HDFS-13831](https://issues.apache.org/jira/browse/HDFS-13831) | Make block increment deletion number configurable | Major | . | Yiqun Lin | Ryan Wu | +| [YARN-8051](https://issues.apache.org/jira/browse/YARN-8051) | TestRMEmbeddedElector#testCallbackSynchronization is flakey | Major | test | Robert Kanter | Robert Kanter | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-12052](https://issues.apache.org/jira/browse/HDFS-12052) | Set SWEBHDFS delegation token kind when ssl is enabled in HttpFS | Major | httpfs, webhdfs | Zoran Dimitrijevic | Zoran Dimitrijevic | +| [HDFS-12318](https://issues.apache.org/jira/browse/HDFS-12318) | Fix IOException condition for openInfo in DFSInputStream | Major | . | legend | legend | +| [HDFS-12614](https://issues.apache.org/jira/browse/HDFS-12614) | FSPermissionChecker#getINodeAttrs() throws NPE when INodeAttributesProvider configured | Major | . | Manoj Govindassamy | Manoj Govindassamy | +| [YARN-7370](https://issues.apache.org/jira/browse/YARN-7370) | Preemption properties should be refreshable | Major | capacity scheduler, scheduler preemption | Eric Payne | Gergely Novák | +| [YARN-7428](https://issues.apache.org/jira/browse/YARN-7428) | Add containerId to Localizer failed logs | Minor | nodemanager | Prabhu Joseph | Prabhu Joseph | +| [HDFS-12783](https://issues.apache.org/jira/browse/HDFS-12783) | [branch-2] "dfsrouter" should use hdfsScript | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HDFS-12788](https://issues.apache.org/jira/browse/HDFS-12788) | Reset the upload button when file upload fails | Critical | ui, webhdfs | Brahma Reddy Battula | Brahma Reddy Battula | +| [YARN-7469](https://issues.apache.org/jira/browse/YARN-7469) | Capacity Scheduler Intra-queue preemption: User can starve if newest app is exactly at user limit | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | +| [HADOOP-14982](https://issues.apache.org/jira/browse/HADOOP-14982) | Clients using FailoverOnNetworkExceptionRetry can go into a loop if they're used without authenticating with kerberos in HA env | Major | common | Peter Bacsko | Peter Bacsko | +| [YARN-7489](https://issues.apache.org/jira/browse/YARN-7489) | ConcurrentModificationException in RMAppImpl#getRMAppMetrics | Major | capacityscheduler | Tao Yang | Tao Yang | +| [YARN-7525](https://issues.apache.org/jira/browse/YARN-7525) | Incorrect query parameters in cluster nodes REST API document | Minor | documentation | Tao Yang | Tao Yang | +| [HDFS-12813](https://issues.apache.org/jira/browse/HDFS-12813) | RequestHedgingProxyProvider can hide Exception thrown from the Namenode for proxy size of 1 | Major | ha | Mukul Kumar Singh | Mukul Kumar Singh | +| [HADOOP-15045](https://issues.apache.org/jira/browse/HADOOP-15045) | ISA-L build options are documented in branch-2 | Major | build, documentation | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-15067](https://issues.apache.org/jira/browse/HADOOP-15067) | GC time percentage reported in JvmMetrics should be a gauge, not counter | Major | . | Misha Dmitriev | Misha Dmitriev | +| [YARN-7363](https://issues.apache.org/jira/browse/YARN-7363) | ContainerLocalizer doesn't have a valid log4j config when using LinuxContainerExecutor | Major | nodemanager | Yufei Gu | Yufei Gu | +| [HDFS-12754](https://issues.apache.org/jira/browse/HDFS-12754) | Lease renewal can hit a deadlock | Major | . | Kuhu Shukla | Kuhu Shukla | +| [HDFS-12832](https://issues.apache.org/jira/browse/HDFS-12832) | INode.getFullPathName may throw ArrayIndexOutOfBoundsException lead to NameNode exit | Critical | namenode | DENG FEI | Konstantin Shvachko | +| [HDFS-11754](https://issues.apache.org/jira/browse/HDFS-11754) | Make FsServerDefaults cache configurable. | Minor | . | Rushabh S Shah | Mikhail Erofeev | +| [HADOOP-15042](https://issues.apache.org/jira/browse/HADOOP-15042) | Azure PageBlobInputStream.skip() can return negative value when numberOfPagesRemaining is 0 | Minor | fs/azure | Rajesh Balamohan | Rajesh Balamohan | +| [HDFS-12638](https://issues.apache.org/jira/browse/HDFS-12638) | Delete copy-on-truncate block along with the original block, when deleting a file being truncated | Blocker | hdfs | Jiandan Yang | Konstantin Shvachko | +| [YARN-4813](https://issues.apache.org/jira/browse/YARN-4813) | TestRMWebServicesDelegationTokenAuthentication.testDoAs fails intermittently | Major | resourcemanager | Daniel Templeton | Gergo Repas | +| [MAPREDUCE-5124](https://issues.apache.org/jira/browse/MAPREDUCE-5124) | AM lacks flow control for task events | Major | mr-am | Jason Lowe | Peter Bacsko | +| [YARN-7455](https://issues.apache.org/jira/browse/YARN-7455) | quote\_and\_append\_arg can overflow buffer | Major | nodemanager | Jason Lowe | Jim Brennan | +| [YARN-5594](https://issues.apache.org/jira/browse/YARN-5594) | Handle old RMDelegationToken format when recovering RM | Major | resourcemanager | Tatyana But | Robert Kanter | +| [HADOOP-14985](https://issues.apache.org/jira/browse/HADOOP-14985) | Remove subversion related code from VersionInfoMojo.java | Minor | build | Akira Ajisaka | Ajay Kumar | +| [HDFS-11751](https://issues.apache.org/jira/browse/HDFS-11751) | DFSZKFailoverController daemon exits with wrong status code | Major | auto-failover | Doris Gu | Bharat Viswanadham | +| [HDFS-12889](https://issues.apache.org/jira/browse/HDFS-12889) | Router UI is missing robots.txt file | Major | . | Bharat Viswanadham | Bharat Viswanadham | +| [YARN-7607](https://issues.apache.org/jira/browse/YARN-7607) | Remove the trailing duplicated timestamp in container diagnostics message | Minor | nodemanager | Weiwei Yang | Weiwei Yang | +| [HADOOP-15080](https://issues.apache.org/jira/browse/HADOOP-15080) | Aliyun OSS: update oss sdk from 2.8.1 to 2.8.3 to remove its dependency on Cat-x "json-lib" | Blocker | fs/oss | Chris Douglas | Sammi Chen | +| [YARN-7608](https://issues.apache.org/jira/browse/YARN-7608) | Incorrect sTarget column causing DataTable warning on RM application and scheduler web page | Major | resourcemanager, webapp | Weiwei Yang | Gergely Novák | +| [HDFS-12833](https://issues.apache.org/jira/browse/HDFS-12833) | Distcp : Update the usage of delete option for dependency with update and overwrite option | Minor | distcp, hdfs | Harshakiran Reddy | usharani | +| [YARN-7647](https://issues.apache.org/jira/browse/YARN-7647) | NM print inappropriate error log when node-labels is enabled | Minor | . | Yang Wang | Yang Wang | +| [HDFS-12907](https://issues.apache.org/jira/browse/HDFS-12907) | Allow read-only access to reserved raw for non-superusers | Major | namenode | Daryn Sharp | Rushabh S Shah | +| [HDFS-12881](https://issues.apache.org/jira/browse/HDFS-12881) | Output streams closed with IOUtils suppressing write errors | Major | . | Jason Lowe | Ajay Kumar | +| [YARN-7595](https://issues.apache.org/jira/browse/YARN-7595) | Container launching code suppresses close exceptions after writes | Major | nodemanager | Jason Lowe | Jim Brennan | +| [HADOOP-15085](https://issues.apache.org/jira/browse/HADOOP-15085) | Output streams closed with IOUtils suppressing write errors | Major | . | Jason Lowe | Jim Brennan | +| [HADOOP-15123](https://issues.apache.org/jira/browse/HADOOP-15123) | KDiag tries to load krb5.conf from KRB5CCNAME instead of KRB5\_CONFIG | Minor | security | Vipin Rathor | Vipin Rathor | +| [YARN-7661](https://issues.apache.org/jira/browse/YARN-7661) | NodeManager metrics return wrong value after update node resource | Major | . | Yang Wang | Yang Wang | +| [HDFS-12347](https://issues.apache.org/jira/browse/HDFS-12347) | TestBalancerRPCDelay#testBalancerRPCDelay fails very frequently | Critical | test | Xiao Chen | Bharat Viswanadham | +| [YARN-7662](https://issues.apache.org/jira/browse/YARN-7662) | [Atsv2] Define new set of configurations for reader and collectors to bind. | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-7674](https://issues.apache.org/jira/browse/YARN-7674) | Update Timeline Reader web app address in UI2 | Major | . | Rohith Sharma K S | Sunil Govindan | +| [YARN-7542](https://issues.apache.org/jira/browse/YARN-7542) | Fix issue that causes some Running Opportunistic Containers to be recovered as PAUSED | Major | . | Arun Suresh | Sampada Dehankar | +| [HADOOP-15143](https://issues.apache.org/jira/browse/HADOOP-15143) | NPE due to Invalid KerberosTicket in UGI | Major | . | Jitendra Nath Pandey | Mukul Kumar Singh | +| [YARN-7692](https://issues.apache.org/jira/browse/YARN-7692) | Skip validating priority acls while recovering applications | Blocker | resourcemanager | Charan Hebri | Sunil Govindan | +| [MAPREDUCE-7028](https://issues.apache.org/jira/browse/MAPREDUCE-7028) | Concurrent task progress updates causing NPE in Application Master | Blocker | mr-am | Gergo Repas | Gergo Repas | +| [YARN-7619](https://issues.apache.org/jira/browse/YARN-7619) | Max AM Resource value in Capacity Scheduler UI has to be refreshed for every user | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | +| [YARN-7699](https://issues.apache.org/jira/browse/YARN-7699) | queueUsagePercentage is coming as INF for getApp REST api call | Major | webapp | Sunil Govindan | Sunil Govindan | +| [HDFS-12985](https://issues.apache.org/jira/browse/HDFS-12985) | NameNode crashes during restart after an OpenForWrite file present in the Snapshot got deleted | Major | hdfs | Manoj Govindassamy | Manoj Govindassamy | +| [YARN-4227](https://issues.apache.org/jira/browse/YARN-4227) | Ignore expired containers from removed nodes in FairScheduler | Critical | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [YARN-7508](https://issues.apache.org/jira/browse/YARN-7508) | NPE in FiCaSchedulerApp when debug log enabled in async-scheduling mode | Major | capacityscheduler | Tao Yang | Tao Yang | +| [YARN-7663](https://issues.apache.org/jira/browse/YARN-7663) | RMAppImpl:Invalid event: START at KILLED | Major | resourcemanager | lujie | lujie | +| [YARN-6948](https://issues.apache.org/jira/browse/YARN-6948) | Invalid event: ATTEMPT\_ADDED at FINAL\_SAVING | Major | yarn | lujie | lujie | +| [HADOOP-15060](https://issues.apache.org/jira/browse/HADOOP-15060) | TestShellBasedUnixGroupsMapping.testFiniteGroupResolutionTime flaky | Major | . | Miklos Szegedi | Miklos Szegedi | +| [YARN-7735](https://issues.apache.org/jira/browse/YARN-7735) | Fix typo in YARN documentation | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-7727](https://issues.apache.org/jira/browse/YARN-7727) | Incorrect log levels in few logs with QueuePriorityContainerCandidateSelector | Minor | yarn | Prabhu Joseph | Prabhu Joseph | +| [HDFS-11915](https://issues.apache.org/jira/browse/HDFS-11915) | Sync rbw dir on the first hsync() to avoid file lost on power failure | Critical | . | Kanaka Kumar Avvaru | Vinayakumar B | +| [YARN-7705](https://issues.apache.org/jira/browse/YARN-7705) | Create the container log directory with correct sticky bit in C code | Major | nodemanager | Yufei Gu | Yufei Gu | +| [HDFS-9049](https://issues.apache.org/jira/browse/HDFS-9049) | Make Datanode Netty reverse proxy port to be configurable | Major | datanode | Vinayakumar B | Vinayakumar B | +| [YARN-7758](https://issues.apache.org/jira/browse/YARN-7758) | Add an additional check to the validity of container and application ids passed to container-executor | Major | nodemanager | Miklos Szegedi | Yufei Gu | +| [HADOOP-15150](https://issues.apache.org/jira/browse/HADOOP-15150) | in FsShell, UGI params should be overidden through env vars(-D arg) | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HADOOP-15181](https://issues.apache.org/jira/browse/HADOOP-15181) | Typo in SecureMode.md | Trivial | documentation | Masahiro Tanaka | Masahiro Tanaka | +| [YARN-7806](https://issues.apache.org/jira/browse/YARN-7806) | Distributed Shell should use timeline async api's | Major | distributed-shell | Sumana Sathish | Rohith Sharma K S | +| [HADOOP-15121](https://issues.apache.org/jira/browse/HADOOP-15121) | Encounter NullPointerException when using DecayRpcScheduler | Major | . | Tao Jie | Tao Jie | +| [MAPREDUCE-7015](https://issues.apache.org/jira/browse/MAPREDUCE-7015) | Possible race condition in JHS if the job is not loaded | Major | jobhistoryserver | Peter Bacsko | Peter Bacsko | +| [YARN-7737](https://issues.apache.org/jira/browse/YARN-7737) | prelaunch.err file not found exception on container failure | Major | . | Jonathan Hung | Keqiu Hu | +| [HDFS-13063](https://issues.apache.org/jira/browse/HDFS-13063) | Fix the incorrect spelling in HDFSHighAvailabilityWithQJM.md | Trivial | documentation | Jianfei Jiang | Jianfei Jiang | +| [YARN-7102](https://issues.apache.org/jira/browse/YARN-7102) | NM heartbeat stuck when responseId overflows MAX\_INT | Critical | . | Botong Huang | Botong Huang | +| [MAPREDUCE-7041](https://issues.apache.org/jira/browse/MAPREDUCE-7041) | MR should not try to clean up at first job attempt | Major | . | Takanobu Asanuma | Gergo Repas | +| [MAPREDUCE-7020](https://issues.apache.org/jira/browse/MAPREDUCE-7020) | Task timeout in uber mode can crash AM | Major | mr-am | Akira Ajisaka | Peter Bacsko | +| [YARN-7765](https://issues.apache.org/jira/browse/YARN-7765) | [Atsv2] GSSException: No valid credentials provided - Failed to find any Kerberos tgt thrown by Timelinev2Client & HBaseClient in NM | Blocker | . | Sumana Sathish | Rohith Sharma K S | +| [HDFS-12974](https://issues.apache.org/jira/browse/HDFS-12974) | Exception message is not printed when creating an encryption zone fails with AuthorizationException | Minor | encryption | fang zhenyi | fang zhenyi | +| [YARN-7698](https://issues.apache.org/jira/browse/YARN-7698) | A misleading variable's name in ApplicationAttemptEventDispatcher | Minor | resourcemanager | Jinjiang Ling | Jinjiang Ling | +| [HDFS-12528](https://issues.apache.org/jira/browse/HDFS-12528) | Add an option to not disable short-circuit reads on failures | Major | hdfs-client, performance | Andre Araujo | Xiao Chen | +| [HDFS-13100](https://issues.apache.org/jira/browse/HDFS-13100) | Handle IllegalArgumentException when GETSERVERDEFAULTS is not implemented in webhdfs. | Critical | hdfs, webhdfs | Yongjun Zhang | Yongjun Zhang | +| [YARN-7849](https://issues.apache.org/jira/browse/YARN-7849) | TestMiniYarnClusterNodeUtilization#testUpdateNodeUtilization fails due to heartbeat sync error | Major | test | Jason Lowe | Botong Huang | +| [YARN-7801](https://issues.apache.org/jira/browse/YARN-7801) | AmFilterInitializer should addFilter after fill all parameters | Critical | . | Sumana Sathish | Wangda Tan | +| [YARN-7890](https://issues.apache.org/jira/browse/YARN-7890) | NPE during container relaunch | Major | . | Billie Rinaldi | Jason Lowe | +| [HDFS-13115](https://issues.apache.org/jira/browse/HDFS-13115) | In getNumUnderConstructionBlocks(), ignore the inodeIds for which the inodes have been deleted | Major | . | Yongjun Zhang | Yongjun Zhang | +| [HDFS-12935](https://issues.apache.org/jira/browse/HDFS-12935) | Get ambiguous result for DFSAdmin command in HA mode when only one namenode is up | Major | tools | Jianfei Jiang | Jianfei Jiang | +| [HDFS-13120](https://issues.apache.org/jira/browse/HDFS-13120) | Snapshot diff could be corrupted after concat | Major | namenode, snapshots | Xiaoyu Yao | Xiaoyu Yao | +| [HDFS-10453](https://issues.apache.org/jira/browse/HDFS-10453) | ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster. | Major | namenode | He Xiaoqiao | He Xiaoqiao | +| [HDFS-8693](https://issues.apache.org/jira/browse/HDFS-8693) | refreshNamenodes does not support adding a new standby to a running DN | Critical | datanode, ha | Jian Fang | Ajith S | +| [MAPREDUCE-7052](https://issues.apache.org/jira/browse/MAPREDUCE-7052) | TestFixedLengthInputFormat#testFormatCompressedIn is flaky | Major | client, test | Peter Bacsko | Peter Bacsko | +| [HDFS-13112](https://issues.apache.org/jira/browse/HDFS-13112) | Token expiration edits may cause log corruption or deadlock | Critical | namenode | Daryn Sharp | Daryn Sharp | +| [MAPREDUCE-7053](https://issues.apache.org/jira/browse/MAPREDUCE-7053) | Timed out tasks can fail to produce thread dump | Major | . | Jason Lowe | Jason Lowe | +| [HADOOP-15206](https://issues.apache.org/jira/browse/HADOOP-15206) | BZip2 drops and duplicates records when input split size is small | Major | . | Aki Tanaka | Aki Tanaka | +| [YARN-7937](https://issues.apache.org/jira/browse/YARN-7937) | Fix http method name in Cluster Application Timeout Update API example request | Minor | docs, documentation | Charan Hebri | Charan Hebri | +| [YARN-7947](https://issues.apache.org/jira/browse/YARN-7947) | Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps | Major | capacity scheduler, scheduler preemption | Eric Payne | Eric Payne | +| [YARN-7945](https://issues.apache.org/jira/browse/YARN-7945) | Java Doc error in UnmanagedAMPoolManager for branch-2 | Major | . | Rohith Sharma K S | Botong Huang | +| [HADOOP-14903](https://issues.apache.org/jira/browse/HADOOP-14903) | Add json-smart explicitly to pom.xml | Major | common | Ray Chiang | Ray Chiang | +| [HADOOP-15236](https://issues.apache.org/jira/browse/HADOOP-15236) | Fix typo in RequestHedgingProxyProvider and RequestHedgingRMFailoverProxyProvider | Trivial | documentation | Akira Ajisaka | Gabor Bota | +| [MAPREDUCE-7027](https://issues.apache.org/jira/browse/MAPREDUCE-7027) | HadoopArchiveLogs shouldn't delete the original logs if the HAR creation fails | Critical | mrv2 | Gergely Novák | Gergely Novák | +| [HDFS-12781](https://issues.apache.org/jira/browse/HDFS-12781) | After Datanode down, In Namenode UI Datanode tab is throwing warning message. | Major | datanode | Harshakiran Reddy | Brahma Reddy Battula | +| [HDFS-12070](https://issues.apache.org/jira/browse/HDFS-12070) | Failed block recovery leaves files open indefinitely and at risk for data loss | Major | . | Daryn Sharp | Kihwal Lee | +| [HADOOP-15251](https://issues.apache.org/jira/browse/HADOOP-15251) | Backport HADOOP-13514 (surefire upgrade) to branch-2 | Major | test | Chris Douglas | Chris Douglas | +| [HDFS-13194](https://issues.apache.org/jira/browse/HDFS-13194) | CachePool permissions incorrectly checked | Major | . | Yiqun Lin | Jianfei Jiang | +| [HADOOP-15276](https://issues.apache.org/jira/browse/HADOOP-15276) | branch-2 site not building after ADL troubleshooting doc added | Major | documentation | Steve Loughran | Steve Loughran | +| [YARN-7835](https://issues.apache.org/jira/browse/YARN-7835) | [Atsv2] Race condition in NM while publishing events if second attempt is launched on the same node | Critical | . | Rohith Sharma K S | Rohith Sharma K S | +| [HADOOP-15275](https://issues.apache.org/jira/browse/HADOOP-15275) | Incorrect javadoc for return type of RetryPolicy#shouldRetry | Minor | documentation | Nanda kumar | Nanda kumar | +| [YARN-7511](https://issues.apache.org/jira/browse/YARN-7511) | NPE in ContainerLocalizer when localization failed for running container | Major | nodemanager | Tao Yang | Tao Yang | +| [MAPREDUCE-7023](https://issues.apache.org/jira/browse/MAPREDUCE-7023) | TestHadoopArchiveLogs.testCheckFilesAndSeedApps fails on rerun | Minor | test | Gergely Novák | Gergely Novák | +| [HADOOP-15283](https://issues.apache.org/jira/browse/HADOOP-15283) | Upgrade from findbugs 3.0.1 to spotbugs 3.1.2 in branch-2 to fix docker image build | Major | . | Xiao Chen | Akira Ajisaka | +| [HADOOP-15286](https://issues.apache.org/jira/browse/HADOOP-15286) | Remove unused imports from TestKMSWithZK.java | Minor | test | Akira Ajisaka | Ajay Kumar | +| [HDFS-13040](https://issues.apache.org/jira/browse/HDFS-13040) | Kerberized inotify client fails despite kinit properly | Major | namenode | Wei-Chiu Chuang | Xiao Chen | +| [YARN-7736](https://issues.apache.org/jira/browse/YARN-7736) | Fix itemization in YARN federation document | Minor | documentation | Akira Ajisaka | Sen Zhao | +| [HDFS-13164](https://issues.apache.org/jira/browse/HDFS-13164) | File not closed if streamer fail with DSQuotaExceededException | Major | hdfs-client | Xiao Chen | Xiao Chen | +| [HDFS-13109](https://issues.apache.org/jira/browse/HDFS-13109) | Support fully qualified hdfs path in EZ commands | Major | hdfs | Hanisha Koneru | Hanisha Koneru | +| [MAPREDUCE-6930](https://issues.apache.org/jira/browse/MAPREDUCE-6930) | mapreduce.map.cpu.vcores and mapreduce.reduce.cpu.vcores are both present twice in mapred-default.xml | Major | mrv2 | Daniel Templeton | Sen Zhao | +| [HDFS-10618](https://issues.apache.org/jira/browse/HDFS-10618) | TestPendingReconstruction#testPendingAndInvalidate is flaky due to race condition | Major | . | Eric Badger | Eric Badger | +| [HDFS-10803](https://issues.apache.org/jira/browse/HDFS-10803) | TestBalancerWithMultipleNameNodes#testBalancing2OutOf3Blockpools fails intermittently due to no free space available | Major | . | Yiqun Lin | Yiqun Lin | +| [HDFS-12156](https://issues.apache.org/jira/browse/HDFS-12156) | TestFSImage fails without -Pnative | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HDFS-13261](https://issues.apache.org/jira/browse/HDFS-13261) | Fix incorrect null value check | Minor | hdfs | Jianfei Jiang | Jianfei Jiang | +| [HDFS-12886](https://issues.apache.org/jira/browse/HDFS-12886) | Ignore minReplication for block recovery | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | +| [YARN-8039](https://issues.apache.org/jira/browse/YARN-8039) | Clean up log dir configuration in TestLinuxContainerExecutorWithMocks.testStartLocalizer | Minor | . | Miklos Szegedi | Miklos Szegedi | +| [HDFS-13296](https://issues.apache.org/jira/browse/HDFS-13296) | GenericTestUtils generates paths with drive letter in Windows and fail webhdfs related test cases | Major | . | Xiao Liang | Xiao Liang | +| [HDFS-13268](https://issues.apache.org/jira/browse/HDFS-13268) | TestWebHdfsFileContextMainOperations fails on Windows | Major | . | Íñigo Goiri | Xiao Liang | +| [YARN-8054](https://issues.apache.org/jira/browse/YARN-8054) | Improve robustness of the LocalDirsHandlerService MonitoringTimerTask thread | Major | . | Jonathan Eagles | Jonathan Eagles | +| [YARN-7873](https://issues.apache.org/jira/browse/YARN-7873) | Revert YARN-6078 | Blocker | . | Billie Rinaldi | Billie Rinaldi | +| [HDFS-13195](https://issues.apache.org/jira/browse/HDFS-13195) | DataNode conf page cannot display the current value after reconfig | Minor | datanode | maobaolong | maobaolong | +| [YARN-8063](https://issues.apache.org/jira/browse/YARN-8063) | DistributedShellTimelinePlugin wrongly check for entityId instead of entityType | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8068](https://issues.apache.org/jira/browse/YARN-8068) | Application Priority field causes NPE in app timeline publish when Hadoop 2.7 based clients to 2.8+ | Blocker | yarn | Sunil Govindan | Sunil Govindan | +| [HADOOP-12862](https://issues.apache.org/jira/browse/HADOOP-12862) | LDAP Group Mapping over SSL can not specify trust store | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-15317](https://issues.apache.org/jira/browse/HADOOP-15317) | Improve NetworkTopology chooseRandom's loop | Major | . | Xiao Chen | Xiao Chen | +| [HADOOP-15355](https://issues.apache.org/jira/browse/HADOOP-15355) | TestCommonConfigurationFields is broken by HADOOP-15312 | Major | test | Konstantin Shvachko | LiXin Ge | +| [HDFS-13176](https://issues.apache.org/jira/browse/HDFS-13176) | WebHdfs file path gets truncated when having semicolon (;) inside | Major | webhdfs | Zsolt Venczel | Zsolt Venczel | +| [HADOOP-15375](https://issues.apache.org/jira/browse/HADOOP-15375) | Branch-2 pre-commit failed to build docker image | Major | . | Xiao Chen | Xiao Chen | +| [HADOOP-15357](https://issues.apache.org/jira/browse/HADOOP-15357) | Configuration.getPropsWithPrefix no longer does variable substitution | Major | . | Jim Brennan | Jim Brennan | +| [MAPREDUCE-7062](https://issues.apache.org/jira/browse/MAPREDUCE-7062) | Update mapreduce.job.tags description for making use for ATSv2 purpose. | Major | . | Charan Hebri | Charan Hebri | +| [YARN-8073](https://issues.apache.org/jira/browse/YARN-8073) | TimelineClientImpl doesn't honor yarn.timeline-service.versions configuration | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-6629](https://issues.apache.org/jira/browse/YARN-6629) | NPE occurred when container allocation proposal is applied but its resource requests are removed before | Critical | . | Tao Yang | Tao Yang | +| [HDFS-13427](https://issues.apache.org/jira/browse/HDFS-13427) | Fix the section titles of transparent encryption document | Minor | documentation | Akira Ajisaka | Akira Ajisaka | +| [YARN-7527](https://issues.apache.org/jira/browse/YARN-7527) | Over-allocate node resource in async-scheduling mode of CapacityScheduler | Major | capacityscheduler | Tao Yang | Tao Yang | +| [HDFS-7101](https://issues.apache.org/jira/browse/HDFS-7101) | Potential null dereference in DFSck#doWork() | Minor | . | Ted Yu | skrho | +| [YARN-8120](https://issues.apache.org/jira/browse/YARN-8120) | JVM can crash with SIGSEGV when exiting due to custom leveldb logger | Major | nodemanager, resourcemanager | Jason Lowe | Jason Lowe | +| [YARN-8147](https://issues.apache.org/jira/browse/YARN-8147) | TestClientRMService#testGetApplications sporadically fails | Major | test | Jason Lowe | Jason Lowe | +| [HADOOP-14970](https://issues.apache.org/jira/browse/HADOOP-14970) | MiniHadoopClusterManager doesn't respect lack of format option | Minor | . | Erik Krogen | Erik Krogen | +| [YARN-8156](https://issues.apache.org/jira/browse/YARN-8156) | Increase the default value of yarn.timeline-service.app-collector.linger-period.ms | Major | . | Rohith Sharma K S | Charan Hebri | +| [YARN-8165](https://issues.apache.org/jira/browse/YARN-8165) | Incorrect queue name logging in AbstractContainerAllocator | Trivial | capacityscheduler | Weiwei Yang | Weiwei Yang | +| [YARN-8164](https://issues.apache.org/jira/browse/YARN-8164) | Fix a potential NPE in AbstractSchedulerPlanFollower | Major | . | lujie | lujie | +| [HDFS-12828](https://issues.apache.org/jira/browse/HDFS-12828) | OIV ReverseXML Processor fails with escaped characters | Critical | hdfs | Erik Krogen | Erik Krogen | +| [HADOOP-15180](https://issues.apache.org/jira/browse/HADOOP-15180) | branch-2 : daemon processes' sysout overwrites 'ulimit -a' in daemon's out file | Minor | scripts | Ranith Sardar | Ranith Sardar | +| [HADOOP-15396](https://issues.apache.org/jira/browse/HADOOP-15396) | Some java source files are executable | Minor | . | Akira Ajisaka | Shashikant Banerjee | +| [YARN-6827](https://issues.apache.org/jira/browse/YARN-6827) | [ATS1/1.5] NPE exception while publishing recovering applications into ATS during RM restart. | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-7786](https://issues.apache.org/jira/browse/YARN-7786) | NullPointerException while launching ApplicationMaster | Major | . | lujie | lujie | +| [HDFS-13408](https://issues.apache.org/jira/browse/HDFS-13408) | MiniDFSCluster to support being built on randomized base directory | Major | test | Xiao Liang | Xiao Liang | +| [HADOOP-15390](https://issues.apache.org/jira/browse/HADOOP-15390) | Yarn RM logs flooded by DelegationTokenRenewer trying to renew KMS tokens | Critical | . | Xiao Chen | Xiao Chen | +| [HDFS-13336](https://issues.apache.org/jira/browse/HDFS-13336) | Test cases of TestWriteToReplica failed in windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-7598](https://issues.apache.org/jira/browse/YARN-7598) | Document how to use classpath isolation for aux-services in YARN | Major | . | Xuan Gong | Xuan Gong | +| [HADOOP-15385](https://issues.apache.org/jira/browse/HADOOP-15385) | Many tests are failing in hadoop-distcp project in branch-2 | Critical | tools/distcp | Rushabh S Shah | Jason Lowe | +| [MAPREDUCE-7042](https://issues.apache.org/jira/browse/MAPREDUCE-7042) | Killed MR job data does not move to mapreduce.jobhistory.done-dir when ATS v2 is enabled | Major | . | Yesha Vora | Xuan Gong | +| [YARN-8205](https://issues.apache.org/jira/browse/YARN-8205) | Application State is not updated to ATS if AM launching is delayed. | Critical | . | Sumana Sathish | Rohith Sharma K S | +| [MAPREDUCE-7072](https://issues.apache.org/jira/browse/MAPREDUCE-7072) | mapred job -history prints duplicate counter in human output | Major | client | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [YARN-8221](https://issues.apache.org/jira/browse/YARN-8221) | RMWebServices also need to honor yarn.resourcemanager.display.per-user-apps | Major | webapp | Sunil Govindan | Sunil Govindan | +| [HDFS-13509](https://issues.apache.org/jira/browse/HDFS-13509) | Bug fix for breakHardlinks() of ReplicaInfo/LocalReplica, and fix TestFileAppend failures on Windows | Major | . | Xiao Liang | Xiao Liang | +| [MAPREDUCE-7073](https://issues.apache.org/jira/browse/MAPREDUCE-7073) | Optimize TokenCache#obtainTokensForNamenodesInternal | Major | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8232](https://issues.apache.org/jira/browse/YARN-8232) | RMContainer lost queue name when RM HA happens | Major | resourcemanager | Hu Ziqian | Hu Ziqian | +| [HDFS-13537](https://issues.apache.org/jira/browse/HDFS-13537) | TestHdfsHelper does not generate jceks path properly for relative path in Windows | Major | . | Xiao Liang | Xiao Liang | +| [HADOOP-15446](https://issues.apache.org/jira/browse/HADOOP-15446) | WASB: PageBlobInputStream.skip breaks HBASE replication | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | +| [YARN-8244](https://issues.apache.org/jira/browse/YARN-8244) | TestContainerSchedulerQueuing.testStartMultipleContainers failed | Major | . | Miklos Szegedi | Jim Brennan | +| [HDFS-13586](https://issues.apache.org/jira/browse/HDFS-13586) | Fsync fails on directories on Windows | Critical | datanode, hdfs | Lukas Majercak | Lukas Majercak | +| [HDFS-13590](https://issues.apache.org/jira/browse/HDFS-13590) | Backport HDFS-12378 to branch-2 | Major | datanode, hdfs, test | Lukas Majercak | Lukas Majercak | +| [HADOOP-15478](https://issues.apache.org/jira/browse/HADOOP-15478) | WASB: hflush() and hsync() regression | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | +| [HADOOP-15450](https://issues.apache.org/jira/browse/HADOOP-15450) | Avoid fsync storm triggered by DiskChecker and handle disk full situation | Blocker | . | Kihwal Lee | Arpit Agarwal | +| [HDFS-13601](https://issues.apache.org/jira/browse/HDFS-13601) | Optimize ByteString conversions in PBHelper | Major | . | Andrew Wang | Andrew Wang | +| [HDFS-13588](https://issues.apache.org/jira/browse/HDFS-13588) | Fix TestFsDatasetImpl test failures on Windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-8310](https://issues.apache.org/jira/browse/YARN-8310) | Handle old NMTokenIdentifier, AMRMTokenIdentifier, and ContainerTokenIdentifier formats | Major | . | Robert Kanter | Robert Kanter | +| [YARN-8344](https://issues.apache.org/jira/browse/YARN-8344) | Missing nm.stop() in TestNodeManagerResync to fix testKillContainersOnResync | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8327](https://issues.apache.org/jira/browse/YARN-8327) | Fix TestAggregatedLogFormat#testReadAcontainerLogs1 on Windows | Major | log-aggregation | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8346](https://issues.apache.org/jira/browse/YARN-8346) | Upgrading to 3.1 kills running containers with error "Opportunistic container queue is full" | Blocker | . | Rohith Sharma K S | Jason Lowe | +| [HDFS-13611](https://issues.apache.org/jira/browse/HDFS-13611) | Unsafe use of Text as a ConcurrentHashMap key in PBHelperClient | Major | . | Andrew Wang | Andrew Wang | +| [HDFS-13618](https://issues.apache.org/jira/browse/HDFS-13618) | Fix TestDataNodeFaultInjector test failures on Windows | Major | test | Xiao Liang | Xiao Liang | +| [HADOOP-15473](https://issues.apache.org/jira/browse/HADOOP-15473) | Configure serialFilter in KeyProvider to avoid UnrecoverableKeyException caused by JDK-8189997 | Critical | kms | Gabor Bota | Gabor Bota | +| [HDFS-13626](https://issues.apache.org/jira/browse/HDFS-13626) | Fix incorrect username when deny the setOwner operation | Minor | namenode | luhuachao | Zsolt Venczel | +| [MAPREDUCE-7103](https://issues.apache.org/jira/browse/MAPREDUCE-7103) | Fix TestHistoryViewerPrinter on windows due to a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8359](https://issues.apache.org/jira/browse/YARN-8359) | Exclude containermanager.linux test classes on Windows | Major | . | Giovanni Matteo Fumarola | Jason Lowe | +| [HDFS-13664](https://issues.apache.org/jira/browse/HDFS-13664) | Refactor ConfiguredFailoverProxyProvider to make inheritance easier | Minor | hdfs-client | Chao Sun | Chao Sun | +| [YARN-8405](https://issues.apache.org/jira/browse/YARN-8405) | RM zk-state-store.parent-path ACLs has been changed since HADOOP-14773 | Major | . | Rohith Sharma K S | Íñigo Goiri | +| [MAPREDUCE-7108](https://issues.apache.org/jira/browse/MAPREDUCE-7108) | TestFileOutputCommitter fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [MAPREDUCE-7101](https://issues.apache.org/jira/browse/MAPREDUCE-7101) | Add config parameter to allow JHS to alway scan user dir irrespective of modTime | Critical | . | Wangda Tan | Thomas Marquardt | +| [YARN-8404](https://issues.apache.org/jira/browse/YARN-8404) | Timeline event publish need to be async to avoid Dispatcher thread leak in case ATS is down | Blocker | . | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-13675](https://issues.apache.org/jira/browse/HDFS-13675) | Speed up TestDFSAdminWithHA | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | +| [HDFS-13673](https://issues.apache.org/jira/browse/HDFS-13673) | TestNameNodeMetrics fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HDFS-13676](https://issues.apache.org/jira/browse/HDFS-13676) | TestEditLogRace fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HADOOP-15523](https://issues.apache.org/jira/browse/HADOOP-15523) | Shell command timeout given is in seconds whereas it is taken as millisec while scheduling | Major | . | Bilwa S T | Bilwa S T | +| [YARN-8444](https://issues.apache.org/jira/browse/YARN-8444) | NodeResourceMonitor crashes on bad swapFree value | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8457](https://issues.apache.org/jira/browse/YARN-8457) | Compilation is broken with -Pyarn-ui | Major | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-8401](https://issues.apache.org/jira/browse/YARN-8401) | [UI2] new ui is not accessible with out internet connection | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8451](https://issues.apache.org/jira/browse/YARN-8451) | Multiple NM heartbeat thread created when a slow NM resync with RM | Major | nodemanager | Botong Huang | Botong Huang | +| [HADOOP-15548](https://issues.apache.org/jira/browse/HADOOP-15548) | Randomize local dirs | Minor | . | Jim Brennan | Jim Brennan | +| [YARN-8473](https://issues.apache.org/jira/browse/YARN-8473) | Containers being launched as app tears down can leave containers in NEW state | Major | nodemanager | Jason Lowe | Jason Lowe | +| [HDFS-13729](https://issues.apache.org/jira/browse/HDFS-13729) | Fix broken links to RBF documentation | Minor | documentation | jwhitter | Gabor Bota | +| [YARN-8515](https://issues.apache.org/jira/browse/YARN-8515) | container-executor can crash with SIGPIPE after nodemanager restart | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8421](https://issues.apache.org/jira/browse/YARN-8421) | when moving app, activeUsers is increased, even though app does not have outstanding request | Major | . | kyungwan nam | | +| [HADOOP-15614](https://issues.apache.org/jira/browse/HADOOP-15614) | TestGroupsCaching.testExceptionOnBackgroundRefreshHandled reliably fails | Major | . | Kihwal Lee | Weiwei Yang | +| [YARN-4606](https://issues.apache.org/jira/browse/YARN-4606) | CapacityScheduler: applications could get starved because computation of #activeUsers considers pending apps | Critical | capacity scheduler, capacityscheduler | Karam Singh | Manikandan R | +| [HADOOP-15637](https://issues.apache.org/jira/browse/HADOOP-15637) | LocalFs#listLocatedStatus does not filter out hidden .crc files | Minor | fs | Erik Krogen | Erik Krogen | +| [HADOOP-15644](https://issues.apache.org/jira/browse/HADOOP-15644) | Hadoop Docker Image Pip Install Fails on branch-2 | Critical | build | Haibo Chen | Haibo Chen | +| [YARN-6966](https://issues.apache.org/jira/browse/YARN-6966) | NodeManager metrics may return wrong negative values when NM restart | Major | . | Yang Wang | Szilard Nemeth | +| [YARN-8331](https://issues.apache.org/jira/browse/YARN-8331) | Race condition in NM container launched after done | Major | . | Yang Wang | Pradeep Ambati | +| [HDFS-13758](https://issues.apache.org/jira/browse/HDFS-13758) | DatanodeManager should throw exception if it has BlockRecoveryCommand but the block is not under construction | Major | namenode | Wei-Chiu Chuang | chencan | +| [YARN-8612](https://issues.apache.org/jira/browse/YARN-8612) | Fix NM Collector Service Port issue in YarnConfiguration | Major | ATSv2 | Prabha Manepalli | Prabha Manepalli | +| [HADOOP-15674](https://issues.apache.org/jira/browse/HADOOP-15674) | Test failure TestSSLHttpServer.testExcludedCiphers with TLS\_ECDHE\_RSA\_WITH\_AES\_128\_CBC\_SHA256 cipher suite | Major | common | Gabor Bota | Szilard Nemeth | +| [YARN-8640](https://issues.apache.org/jira/browse/YARN-8640) | Restore previous state in container-executor after failure | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8679](https://issues.apache.org/jira/browse/YARN-8679) | [ATSv2] If HBase cluster is down for long time, high chances that NM ContainerManager dispatcher get blocked | Major | . | Rohith Sharma K S | Wangda Tan | +| [HADOOP-14314](https://issues.apache.org/jira/browse/HADOOP-14314) | The OpenSolaris taxonomy link is dead in InterfaceClassification.md | Major | documentation | Daniel Templeton | Rui Gao | +| [YARN-8649](https://issues.apache.org/jira/browse/YARN-8649) | NPE in localizer hearbeat processing if a container is killed while localizing | Major | . | lujie | lujie | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13337](https://issues.apache.org/jira/browse/HDFS-13337) | Backport HDFS-4275 to branch-2.9 | Minor | . | Íñigo Goiri | Xiao Liang | +| [HDFS-13503](https://issues.apache.org/jira/browse/HDFS-13503) | Fix TestFsck test failures on Windows | Major | hdfs | Xiao Liang | Xiao Liang | +| [HDFS-13542](https://issues.apache.org/jira/browse/HDFS-13542) | TestBlockManager#testNeededReplicationWhileAppending fails due to improper cluster shutdown in TestBlockManager#testBlockManagerMachinesArray on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13551](https://issues.apache.org/jira/browse/HDFS-13551) | TestMiniDFSCluster#testClusterSetStorageCapacity does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-11700](https://issues.apache.org/jira/browse/HDFS-11700) | TestHDFSServerPorts#testBackupNodePorts doesn't pass on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13548](https://issues.apache.org/jira/browse/HDFS-13548) | TestResolveHdfsSymlink#testFcResolveAfs fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13567](https://issues.apache.org/jira/browse/HDFS-13567) | TestNameNodeMetrics#testGenerateEDEKTime,TestNameNodeMetrics#testResourceCheck should use a different cluster basedir | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13557](https://issues.apache.org/jira/browse/HDFS-13557) | TestDFSAdmin#testListOpenFiles fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13550](https://issues.apache.org/jira/browse/HDFS-13550) | TestDebugAdmin#testComputeMetaCommand fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13559](https://issues.apache.org/jira/browse/HDFS-13559) | TestBlockScanner does not close TestContext properly | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13570](https://issues.apache.org/jira/browse/HDFS-13570) | TestQuotaByStorageType,TestQuota,TestDFSOutputStream fail on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13558](https://issues.apache.org/jira/browse/HDFS-13558) | TestDatanodeHttpXFrame does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13554](https://issues.apache.org/jira/browse/HDFS-13554) | TestDatanodeRegistration#testForcedRegistration does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13556](https://issues.apache.org/jira/browse/HDFS-13556) | TestNestedEncryptionZones does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13560](https://issues.apache.org/jira/browse/HDFS-13560) | Insufficient system resources exist to complete the requested service for some tests on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13592](https://issues.apache.org/jira/browse/HDFS-13592) | TestNameNodePrunesMissingStorages#testNameNodePrunesUnreportedStorages does not shut down cluster properly | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13593](https://issues.apache.org/jira/browse/HDFS-13593) | TestBlockReaderLocalLegacy#testBlockReaderLocalLegacyWithAppend fails on Windows | Minor | test | Anbang Hu | Anbang Hu | +| [HDFS-13587](https://issues.apache.org/jira/browse/HDFS-13587) | TestQuorumJournalManager fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13620](https://issues.apache.org/jira/browse/HDFS-13620) | Randomize the test directory path for TestHDFSFileSystemContract | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13591](https://issues.apache.org/jira/browse/HDFS-13591) | TestDFSShell#testSetrepLow fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13632](https://issues.apache.org/jira/browse/HDFS-13632) | Randomize baseDir for MiniJournalCluster in MiniQJMHACluster for TestDFSAdminWithHA | Minor | . | Anbang Hu | Anbang Hu | +| [MAPREDUCE-7102](https://issues.apache.org/jira/browse/MAPREDUCE-7102) | Fix TestJavaSerialization for Windows due a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HDFS-13652](https://issues.apache.org/jira/browse/HDFS-13652) | Randomize baseDir for MiniDFSCluster in TestBlockScanner | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8370](https://issues.apache.org/jira/browse/YARN-8370) | Some Node Manager tests fail on Windows due to improper path/file separator | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8422](https://issues.apache.org/jira/browse/YARN-8422) | TestAMSimulator failing with NPE | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15532](https://issues.apache.org/jira/browse/HADOOP-15532) | TestBasicDiskValidator fails with NoSuchFileException | Minor | . | Íñigo Goiri | Giovanni Matteo Fumarola | +| [HDFS-13563](https://issues.apache.org/jira/browse/HDFS-13563) | TestDFSAdminWithHA times out on Windows | Minor | . | Anbang Hu | Lukas Majercak | +| [HDFS-13681](https://issues.apache.org/jira/browse/HDFS-13681) | Fix TestStartup.testNNFailToStartOnReadOnlyNNDir test failure on Windows | Major | test | Xiao Liang | Xiao Liang | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-14799](https://issues.apache.org/jira/browse/HADOOP-14799) | Update nimbus-jose-jwt to 4.41.1 | Major | . | Ray Chiang | Ray Chiang | +| [HADOOP-14997](https://issues.apache.org/jira/browse/HADOOP-14997) | Add hadoop-aliyun as dependency of hadoop-cloud-storage | Minor | fs/oss | Genmao Yu | Genmao Yu | +| [HDFS-12801](https://issues.apache.org/jira/browse/HDFS-12801) | RBF: Set MountTableResolver as default file resolver | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-7430](https://issues.apache.org/jira/browse/YARN-7430) | Enable user re-mapping for Docker containers by default | Blocker | security, yarn | Eric Yang | Eric Yang | +| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server | Major | fs, fs/oss | Sammi Chen | Sammi Chen | +| [HDFS-12858](https://issues.apache.org/jira/browse/HDFS-12858) | RBF: Add router admin commands usage in HDFS commands reference doc | Minor | documentation | Yiqun Lin | Yiqun Lin | +| [HDFS-12835](https://issues.apache.org/jira/browse/HDFS-12835) | RBF: Fix Javadoc parameter errors | Minor | . | Wei Yan | Wei Yan | +| [HDFS-12396](https://issues.apache.org/jira/browse/HDFS-12396) | Webhdfs file system should get delegation token from kms provider. | Major | encryption, kms, webhdfs | Rushabh S Shah | Rushabh S Shah | +| [YARN-6704](https://issues.apache.org/jira/browse/YARN-6704) | Add support for work preserving NM restart when FederationInterceptor is enabled in AMRMProxyService | Major | . | Botong Huang | Botong Huang | +| [HDFS-12875](https://issues.apache.org/jira/browse/HDFS-12875) | RBF: Complete logic for -readonly option of dfsrouteradmin add command | Major | . | Yiqun Lin | Íñigo Goiri | +| [YARN-7630](https://issues.apache.org/jira/browse/YARN-7630) | Fix AMRMToken rollover handling in AMRMProxy | Minor | . | Botong Huang | Botong Huang | +| [HDFS-12937](https://issues.apache.org/jira/browse/HDFS-12937) | RBF: Add more unit tests for router admin commands | Major | test | Yiqun Lin | Yiqun Lin | +| [YARN-7032](https://issues.apache.org/jira/browse/YARN-7032) | [ATSv2] NPE while starting hbase co-processor when HBase authorization is enabled. | Critical | . | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-12988](https://issues.apache.org/jira/browse/HDFS-12988) | RBF: Mount table entries not properly updated in the local cache | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-12802](https://issues.apache.org/jira/browse/HDFS-12802) | RBF: Control MountTableResolver cache size | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-12934](https://issues.apache.org/jira/browse/HDFS-12934) | RBF: Federation supports global quota | Major | . | Yiqun Lin | Yiqun Lin | +| [HDFS-12972](https://issues.apache.org/jira/browse/HDFS-12972) | RBF: Display mount table quota info in Web UI and admin command | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-6736](https://issues.apache.org/jira/browse/YARN-6736) | Consider writing to both ats v1 & v2 from RM for smoother upgrades | Major | timelineserver | Vrushali C | Aaron Gresch | +| [HADOOP-15027](https://issues.apache.org/jira/browse/HADOOP-15027) | AliyunOSS: Support multi-thread pre-read to improve sequential read from Hadoop to Aliyun OSS performance | Major | fs/oss | wujinhu | wujinhu | +| [HDFS-12973](https://issues.apache.org/jira/browse/HDFS-12973) | RBF: Document global quota supporting in federation | Major | . | Yiqun Lin | Yiqun Lin | +| [HDFS-13028](https://issues.apache.org/jira/browse/HDFS-13028) | RBF: Fix spurious TestRouterRpc#testProxyGetStats | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-12772](https://issues.apache.org/jira/browse/HDFS-12772) | RBF: Federation Router State State Store internal API | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13042](https://issues.apache.org/jira/browse/HDFS-13042) | RBF: Heartbeat Router State | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13049](https://issues.apache.org/jira/browse/HDFS-13049) | RBF: Inconsistent Router OPTS config in branch-2 and branch-3 | Minor | . | Wei Yan | Wei Yan | +| [HDFS-12574](https://issues.apache.org/jira/browse/HDFS-12574) | Add CryptoInputStream to WebHdfsFileSystem read call. | Major | encryption, kms, webhdfs | Rushabh S Shah | Rushabh S Shah | +| [HDFS-13044](https://issues.apache.org/jira/browse/HDFS-13044) | RBF: Add a safe mode for the Router | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13043](https://issues.apache.org/jira/browse/HDFS-13043) | RBF: Expose the state of the Routers in the federation | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13068](https://issues.apache.org/jira/browse/HDFS-13068) | RBF: Add router admin option to manage safe mode | Major | . | Íñigo Goiri | Yiqun Lin | +| [HDFS-13119](https://issues.apache.org/jira/browse/HDFS-13119) | RBF: Manage unavailable clusters | Major | . | Íñigo Goiri | Yiqun Lin | +| [HDFS-13187](https://issues.apache.org/jira/browse/HDFS-13187) | RBF: Fix Routers information shown in the web UI | Minor | . | Wei Yan | Wei Yan | +| [HDFS-13184](https://issues.apache.org/jira/browse/HDFS-13184) | RBF: Improve the unit test TestRouterRPCClientRetries | Minor | test | Yiqun Lin | Yiqun Lin | +| [HDFS-13199](https://issues.apache.org/jira/browse/HDFS-13199) | RBF: Fix the hdfs router page missing label icon issue | Major | federation, hdfs | maobaolong | maobaolong | +| [HADOOP-15090](https://issues.apache.org/jira/browse/HADOOP-15090) | Add ADL troubleshooting doc | Major | documentation, fs/adl | Steve Loughran | Steve Loughran | +| [YARN-7919](https://issues.apache.org/jira/browse/YARN-7919) | Refactor timelineservice-hbase module into submodules | Major | timelineservice | Haibo Chen | Haibo Chen | +| [YARN-8003](https://issues.apache.org/jira/browse/YARN-8003) | Backport the code structure changes in YARN-7346 to branch-2 | Major | . | Haibo Chen | Haibo Chen | +| [HDFS-13214](https://issues.apache.org/jira/browse/HDFS-13214) | RBF: Complete document of Router configuration | Major | . | Tao Jie | Yiqun Lin | +| [HADOOP-15267](https://issues.apache.org/jira/browse/HADOOP-15267) | S3A multipart upload fails when SSE-C encryption is enabled | Critical | fs/s3 | Anis Elleuch | Anis Elleuch | +| [HDFS-13230](https://issues.apache.org/jira/browse/HDFS-13230) | RBF: ConnectionManager's cleanup task will compare each pool's own active conns with its total conns | Minor | . | Wei Yan | Chao Sun | +| [HDFS-13233](https://issues.apache.org/jira/browse/HDFS-13233) | RBF: MountTableResolver doesn't return the correct mount point of the given path | Major | hdfs | wangzhiyuan | wangzhiyuan | +| [HDFS-13212](https://issues.apache.org/jira/browse/HDFS-13212) | RBF: Fix router location cache issue | Major | federation, hdfs | Weiwei Wu | Weiwei Wu | +| [HDFS-13232](https://issues.apache.org/jira/browse/HDFS-13232) | RBF: ConnectionPool should return first usable connection | Minor | . | Wei Yan | Ekanth Sethuramalingam | +| [HDFS-13240](https://issues.apache.org/jira/browse/HDFS-13240) | RBF: Update some inaccurate document descriptions | Minor | . | Yiqun Lin | Yiqun Lin | +| [HDFS-11399](https://issues.apache.org/jira/browse/HDFS-11399) | Many tests fails in Windows due to injecting disk failures | Major | . | Yiqun Lin | Yiqun Lin | +| [HDFS-13241](https://issues.apache.org/jira/browse/HDFS-13241) | RBF: TestRouterSafemode failed if the port 8888 is in use | Major | hdfs, test | maobaolong | maobaolong | +| [HDFS-13253](https://issues.apache.org/jira/browse/HDFS-13253) | RBF: Quota management incorrect parent-child relationship judgement | Major | . | Yiqun Lin | Yiqun Lin | +| [HDFS-13226](https://issues.apache.org/jira/browse/HDFS-13226) | RBF: Throw the exception if mount table entry validated failed | Major | hdfs | maobaolong | maobaolong | +| [HADOOP-15308](https://issues.apache.org/jira/browse/HADOOP-15308) | TestConfiguration fails on Windows because of paths | Major | test | Íñigo Goiri | Xiao Liang | +| [HDFS-12773](https://issues.apache.org/jira/browse/HDFS-12773) | RBF: Improve State Store FS implementation | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13198](https://issues.apache.org/jira/browse/HDFS-13198) | RBF: RouterHeartbeatService throws out CachedStateStore related exceptions when starting router | Minor | . | Wei Yan | Wei Yan | +| [HDFS-13224](https://issues.apache.org/jira/browse/HDFS-13224) | RBF: Resolvers to support mount points across multiple subclusters | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-15262](https://issues.apache.org/jira/browse/HADOOP-15262) | AliyunOSS: move files under a directory in parallel when rename a directory | Major | fs/oss | wujinhu | wujinhu | +| [HDFS-13215](https://issues.apache.org/jira/browse/HDFS-13215) | RBF: Move Router to its own module | Major | . | Íñigo Goiri | Wei Yan | +| [HDFS-13307](https://issues.apache.org/jira/browse/HDFS-13307) | RBF: Improve the use of setQuota command | Major | . | liuhongtong | liuhongtong | +| [HDFS-13250](https://issues.apache.org/jira/browse/HDFS-13250) | RBF: Router to manage requests across multiple subclusters | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13318](https://issues.apache.org/jira/browse/HDFS-13318) | RBF: Fix FindBugs in hadoop-hdfs-rbf | Minor | . | Íñigo Goiri | Ekanth Sethuramalingam | +| [HDFS-12792](https://issues.apache.org/jira/browse/HDFS-12792) | RBF: Test Router-based federation using HDFSContract | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-7581](https://issues.apache.org/jira/browse/YARN-7581) | HBase filters are not constructed correctly in ATSv2 | Major | ATSv2 | Haibo Chen | Haibo Chen | +| [YARN-7986](https://issues.apache.org/jira/browse/YARN-7986) | ATSv2 REST API queries do not return results for uppercase application tags | Critical | . | Charan Hebri | Charan Hebri | +| [HDFS-12512](https://issues.apache.org/jira/browse/HDFS-12512) | RBF: Add WebHDFS | Major | fs | Íñigo Goiri | Wei Yan | +| [HDFS-13291](https://issues.apache.org/jira/browse/HDFS-13291) | RBF: Implement available space based OrderResolver | Major | . | Yiqun Lin | Yiqun Lin | +| [HDFS-13204](https://issues.apache.org/jira/browse/HDFS-13204) | RBF: Optimize name service safe mode icon | Minor | . | liuhongtong | liuhongtong | +| [HDFS-13352](https://issues.apache.org/jira/browse/HDFS-13352) | RBF: Add xsl stylesheet for hdfs-rbf-default.xml | Major | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8010](https://issues.apache.org/jira/browse/YARN-8010) | Add config in FederationRMFailoverProxy to not bypass facade cache when failing over | Minor | . | Botong Huang | Botong Huang | +| [HDFS-13347](https://issues.apache.org/jira/browse/HDFS-13347) | RBF: Cache datanode reports | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13289](https://issues.apache.org/jira/browse/HDFS-13289) | RBF: TestConnectionManager#testCleanup() test case need correction | Minor | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [HDFS-13364](https://issues.apache.org/jira/browse/HDFS-13364) | RBF: Support NamenodeProtocol in the Router | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-6936](https://issues.apache.org/jira/browse/YARN-6936) | [Atsv2] Retrospect storing entities into sub application table from client perspective | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-13353](https://issues.apache.org/jira/browse/HDFS-13353) | RBF: TestRouterWebHDFSContractCreate failed | Major | test | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8107](https://issues.apache.org/jira/browse/YARN-8107) | Give an informative message when incorrect format is used in ATSv2 filter attributes | Major | ATSv2 | Charan Hebri | Rohith Sharma K S | +| [YARN-8110](https://issues.apache.org/jira/browse/YARN-8110) | AMRMProxy recover should catch for all throwable to avoid premature exit | Major | . | Botong Huang | Botong Huang | +| [HDFS-13402](https://issues.apache.org/jira/browse/HDFS-13402) | RBF: Fix java doc for StateStoreFileSystemImpl | Minor | hdfs | Yiran Wu | Yiran Wu | +| [HDFS-13380](https://issues.apache.org/jira/browse/HDFS-13380) | RBF: mv/rm fail after the directory exceeded the quota limit | Major | . | Weiwei Wu | Yiqun Lin | +| [HDFS-13410](https://issues.apache.org/jira/browse/HDFS-13410) | RBF: Support federation with no subclusters | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13384](https://issues.apache.org/jira/browse/HDFS-13384) | RBF: Improve timeout RPC call mechanism | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13045](https://issues.apache.org/jira/browse/HDFS-13045) | RBF: Improve error message returned from subcluster | Minor | . | Wei Yan | Íñigo Goiri | +| [HDFS-13428](https://issues.apache.org/jira/browse/HDFS-13428) | RBF: Remove LinkedList From StateStoreFileImpl.java | Trivial | federation | BELUGA BEHR | BELUGA BEHR | +| [HADOOP-14999](https://issues.apache.org/jira/browse/HADOOP-14999) | AliyunOSS: provide one asynchronous multi-part based uploading mechanism | Major | fs/oss | Genmao Yu | Genmao Yu | +| [YARN-7810](https://issues.apache.org/jira/browse/YARN-7810) | TestDockerContainerRuntime test failures due to UID lookup of a non-existent user | Major | . | Shane Kumpf | Shane Kumpf | +| [HDFS-13435](https://issues.apache.org/jira/browse/HDFS-13435) | RBF: Improve the error loggings for printing the stack trace | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-7189](https://issues.apache.org/jira/browse/YARN-7189) | Container-executor doesn't remove Docker containers that error out early | Major | yarn | Eric Badger | Eric Badger | +| [HDFS-13466](https://issues.apache.org/jira/browse/HDFS-13466) | RBF: Add more router-related information to the UI | Minor | . | Wei Yan | Wei Yan | +| [HDFS-13453](https://issues.apache.org/jira/browse/HDFS-13453) | RBF: getMountPointDates should fetch latest subdir time/date when parent dir is not present but /parent/child dirs are present in mount table | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [HDFS-13478](https://issues.apache.org/jira/browse/HDFS-13478) | RBF: Disabled Nameservice store API | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13490](https://issues.apache.org/jira/browse/HDFS-13490) | RBF: Fix setSafeMode in the Router | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13484](https://issues.apache.org/jira/browse/HDFS-13484) | RBF: Disable Nameservices from the federation | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13326](https://issues.apache.org/jira/browse/HDFS-13326) | RBF: Improve the interfaces to modify and view mount tables | Minor | . | Wei Yan | Gang Li | +| [HDFS-13499](https://issues.apache.org/jira/browse/HDFS-13499) | RBF: Show disabled name services in the UI | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-8215](https://issues.apache.org/jira/browse/YARN-8215) | ATS v2 returns invalid YARN\_CONTAINER\_ALLOCATED\_HOST\_HTTP\_ADDRESS from NM | Critical | ATSv2 | Yesha Vora | Rohith Sharma K S | +| [HDFS-13508](https://issues.apache.org/jira/browse/HDFS-13508) | RBF: Normalize paths (automatically) when adding, updating, removing or listing mount table entries | Minor | . | Ekanth Sethuramalingam | Ekanth Sethuramalingam | +| [HDFS-13434](https://issues.apache.org/jira/browse/HDFS-13434) | RBF: Fix dead links in RBF document | Major | documentation | Akira Ajisaka | Chetna Chaudhari | +| [HDFS-13488](https://issues.apache.org/jira/browse/HDFS-13488) | RBF: Reject requests when a Router is overloaded | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13525](https://issues.apache.org/jira/browse/HDFS-13525) | RBF: Add unit test TestStateStoreDisabledNameservice | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-8253](https://issues.apache.org/jira/browse/YARN-8253) | HTTPS Ats v2 api call fails with "bad HTTP parsed" | Critical | ATSv2 | Yesha Vora | Charan Hebri | +| [HADOOP-15454](https://issues.apache.org/jira/browse/HADOOP-15454) | TestRollingFileSystemSinkWithLocal fails on Windows | Major | test | Xiao Liang | Xiao Liang | +| [HDFS-13346](https://issues.apache.org/jira/browse/HDFS-13346) | RBF: Fix synchronization of router quota and nameservice quota | Major | . | liuhongtong | Yiqun Lin | +| [YARN-8247](https://issues.apache.org/jira/browse/YARN-8247) | Incorrect HTTP status code returned by ATSv2 for non-whitelisted users | Critical | ATSv2 | Charan Hebri | Rohith Sharma K S | +| [YARN-8130](https://issues.apache.org/jira/browse/YARN-8130) | Race condition when container events are published for KILLED applications | Major | ATSv2 | Charan Hebri | Rohith Sharma K S | +| [YARN-7900](https://issues.apache.org/jira/browse/YARN-7900) | [AMRMProxy] AMRMClientRelayer for stateful FederationInterceptor | Major | . | Botong Huang | Botong Huang | +| [HADOOP-15498](https://issues.apache.org/jira/browse/HADOOP-15498) | TestHadoopArchiveLogs (#testGenerateScript, #testPrepareWorkingDir) fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13637](https://issues.apache.org/jira/browse/HDFS-13637) | RBF: Router fails when threadIndex (in ConnectionPool) wraps around Integer.MIN\_VALUE | Critical | federation | CR Hota | CR Hota | +| [YARN-4781](https://issues.apache.org/jira/browse/YARN-4781) | Support intra-queue preemption for fairness ordering policy. | Major | scheduler | Wangda Tan | Eric Payne | +| [HADOOP-15506](https://issues.apache.org/jira/browse/HADOOP-15506) | Upgrade Azure Storage Sdk version to 7.0.0 and update corresponding code blocks | Minor | fs/azure | Esfandiar Manii | Esfandiar Manii | +| [HADOOP-15529](https://issues.apache.org/jira/browse/HADOOP-15529) | ContainerLaunch#testInvalidEnvVariableSubstitutionType is not supported in Windows | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15533](https://issues.apache.org/jira/browse/HADOOP-15533) | Make WASB listStatus messages consistent | Trivial | fs/azure | Esfandiar Manii | Esfandiar Manii | +| [HADOOP-15458](https://issues.apache.org/jira/browse/HADOOP-15458) | TestLocalFileSystem#testFSOutputStreamBuilder fails on Windows | Minor | test | Xiao Liang | Xiao Liang | +| [HDFS-13528](https://issues.apache.org/jira/browse/HDFS-13528) | RBF: If a directory exceeds quota limit then quota usage is not refreshed for other mount entries | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [HDFS-13710](https://issues.apache.org/jira/browse/HDFS-13710) | RBF: setQuota and getQuotaUsage should check the dfs.federation.router.quota.enable | Major | federation, hdfs | yanghuafeng | yanghuafeng | +| [HDFS-13726](https://issues.apache.org/jira/browse/HDFS-13726) | RBF: Fix RBF configuration links | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13475](https://issues.apache.org/jira/browse/HDFS-13475) | RBF: Admin cannot enforce Router enter SafeMode | Major | . | Wei Yan | Chao Sun | +| [HDFS-13733](https://issues.apache.org/jira/browse/HDFS-13733) | RBF: Add Web UI configurations and descriptions to RBF document | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13743](https://issues.apache.org/jira/browse/HDFS-13743) | RBF: Router throws NullPointerException due to the invalid initialization of MountTableResolver | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13583](https://issues.apache.org/jira/browse/HDFS-13583) | RBF: Router admin clrQuota is not synchronized with nameservice | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [HDFS-13750](https://issues.apache.org/jira/browse/HDFS-13750) | RBF: Router ID in RouterRpcClient is always null | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8129](https://issues.apache.org/jira/browse/YARN-8129) | Improve error message for invalid value in fields attribute | Minor | ATSv2 | Charan Hebri | Abhishek Modi | +| [HDFS-13848](https://issues.apache.org/jira/browse/HDFS-13848) | Refactor NameNode failover proxy providers | Major | ha, hdfs-client | Konstantin Shvachko | Konstantin Shvachko | +| [HDFS-13634](https://issues.apache.org/jira/browse/HDFS-13634) | RBF: Configurable value in xml for async connection request queue size. | Major | federation | CR Hota | CR Hota | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15149](https://issues.apache.org/jira/browse/HADOOP-15149) | CryptoOutputStream should implement StreamCapabilities | Major | fs | Mike Drob | Xiao Chen | +| [HADOOP-15177](https://issues.apache.org/jira/browse/HADOOP-15177) | Update the release year to 2018 | Blocker | build | Akira Ajisaka | Bharat Viswanadham | +| [YARN-8412](https://issues.apache.org/jira/browse/YARN-8412) | Move ResourceRequest.clone logic everywhere into a proper API | Minor | . | Botong Huang | Botong Huang | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.10.0/RELEASENOTES.2.10.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.10.0/RELEASENOTES.2.10.0.md new file mode 100644 index 00000000000..ca8949646a1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.10.0/RELEASENOTES.2.10.0.md @@ -0,0 +1,115 @@ + + +# Apache Hadoop 2.10.0 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HDFS-12883](https://issues.apache.org/jira/browse/HDFS-12883) | *Major* | **RBF: Document Router and State Store metrics** + +This JIRA makes following change: +Change Router metrics context from 'router' to 'dfs'. + + +--- + +* [HDFS-12895](https://issues.apache.org/jira/browse/HDFS-12895) | *Major* | **RBF: Add ACL support for mount table** + +Mount tables support ACL, The users won't be able to modify their own entries (we are assuming these old (no-permissions before) mount table with owner:superuser, group:supergroup, permission:755 as the default permissions). The fix way is login as superuser to modify these mount table entries. + + +--- + +* [HADOOP-15027](https://issues.apache.org/jira/browse/HADOOP-15027) | *Major* | **AliyunOSS: Support multi-thread pre-read to improve sequential read from Hadoop to Aliyun OSS performance** + +Support multi-thread pre-read in AliyunOSSInputStream to improve the sequential read performance from Hadoop to Aliyun OSS. + + +--- + +* [MAPREDUCE-7029](https://issues.apache.org/jira/browse/MAPREDUCE-7029) | *Minor* | **FileOutputCommitter is slow on filesystems lacking recursive delete** + +MapReduce jobs that output to filesystems without direct support for recursive delete can set mapreduce.fileoutputcommitter.task.cleanup.enabled=true to have each task delete their intermediate work directory rather than waiting for the ApplicationMaster to clean up at the end of the job. This can significantly speed up the cleanup phase for large jobs on such filesystems. + + +--- + +* [HDFS-12528](https://issues.apache.org/jira/browse/HDFS-12528) | *Major* | **Add an option to not disable short-circuit reads on failures** + +Added an option to not disables short-circuit reads on failures, by setting dfs.domain.socket.disable.interval.seconds to 0. + + +--- + +* [HDFS-13083](https://issues.apache.org/jira/browse/HDFS-13083) | *Major* | **RBF: Fix doc error setting up client** + +Fix the document error of setting up HFDS Router Federation + + +--- + +* [HDFS-13099](https://issues.apache.org/jira/browse/HDFS-13099) | *Minor* | **RBF: Use the ZooKeeper as the default State Store** + +Change default State Store from local file to ZooKeeper. This will require additional zk address to be configured. + + +--- + +* [YARN-7919](https://issues.apache.org/jira/browse/YARN-7919) | *Major* | **Refactor timelineservice-hbase module into submodules** + +HBase integration module was mixed up with for hbase-server and hbase-client dependencies. This JIRA split into sub modules such that hbase-client dependent modules and hbase-server dependent modules are separated. This allows to make conditional compilation with different version of Hbase. + + +--- + +* [HDFS-13492](https://issues.apache.org/jira/browse/HDFS-13492) | *Major* | **Limit httpfs binds to certain IP addresses in branch-2** + +Use environment variable HTTPFS\_HTTP\_HOSTNAME to limit the IP addresses httpfs server binds to. Default: httpfs server binds to all IP addresses on the host. + + +--- + +* [HADOOP-15446](https://issues.apache.org/jira/browse/HADOOP-15446) | *Major* | **WASB: PageBlobInputStream.skip breaks HBASE replication** + +WASB: Bug fix to support non-sequential page blob reads. Required for HBASE replication. + + +--- + +* [HADOOP-15478](https://issues.apache.org/jira/browse/HADOOP-15478) | *Major* | **WASB: hflush() and hsync() regression** + +WASB: Bug fix for recent regression in hflush() and hsync(). + + +--- + +* [HADOOP-15506](https://issues.apache.org/jira/browse/HADOOP-15506) | *Minor* | **Upgrade Azure Storage Sdk version to 7.0.0 and update corresponding code blocks** + +WASB: Fix Spark process hang at shutdown due to use of non-daemon threads by updating Azure Storage Java SDK to 7.0 + + +--- + +* [HDFS-13553](https://issues.apache.org/jira/browse/HDFS-13553) | *Major* | **RBF: Support global quota** + +Federation supports and controls global quota at mount table level. + +In a federated environment, a folder can be spread across multiple subclusters. Router aggregates quota that queried from these subclusters and uses that for the quota-verification. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.0/CHANGES.2.2.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.0/CHANGELOG.2.2.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.0/CHANGES.2.2.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.0/CHANGELOG.2.2.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.1/CHANGES.2.2.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.1/CHANGELOG.2.2.1.md similarity index 96% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.1/CHANGES.2.2.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.1/CHANGELOG.2.2.1.md index 4284bb8bb41..bf20c21d4e4 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.1/CHANGES.2.2.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.2.1/CHANGELOG.2.2.1.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 2.2.1 - Unreleased (as of 2017-08-28) +## Release 2.2.1 - Unreleased (as of 2018-09-02) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.3.0/CHANGES.2.3.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.3.0/CHANGELOG.2.3.0.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.3.0/CHANGES.2.3.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.3.0/CHANGELOG.2.3.0.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.0/CHANGES.2.4.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.0/CHANGELOG.2.4.0.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.0/CHANGES.2.4.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.0/CHANGELOG.2.4.0.md index 4426ba90baf..884483d5757 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.0/CHANGES.2.4.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.0/CHANGELOG.2.4.0.md @@ -178,7 +178,7 @@ | [HADOOP-10328](https://issues.apache.org/jira/browse/HADOOP-10328) | loadGenerator exit code is not reliable | Major | tools | Arpit Gupta | Haohui Mai | | [HDFS-5944](https://issues.apache.org/jira/browse/HDFS-5944) | LeaseManager:findLeaseWithPrefixPath can't handle path like /a/b/ right and cause SecondaryNameNode failed do checkpoint | Major | namenode | yunjiong zhao | yunjiong zhao | | [HDFS-5982](https://issues.apache.org/jira/browse/HDFS-5982) | Need to update snapshot manager when applying editlog for deleting a snapshottable directory | Critical | namenode | Tassapol Athiapinya | Jing Zhao | -| [YARN-1398](https://issues.apache.org/jira/browse/YARN-1398) | Deadlock in capacity scheduler leaf queue and parent queue for getQueueInfo and completedContainer call | Blocker | resourcemanager | Sunil G | Vinod Kumar Vavilapalli | +| [YARN-1398](https://issues.apache.org/jira/browse/YARN-1398) | Deadlock in capacity scheduler leaf queue and parent queue for getQueueInfo and completedContainer call | Blocker | resourcemanager | Sunil Govindan | Vinod Kumar Vavilapalli | | [HDFS-5988](https://issues.apache.org/jira/browse/HDFS-5988) | Bad fsimage always generated after upgrade | Blocker | namenode | Andrew Wang | Andrew Wang | | [HADOOP-10355](https://issues.apache.org/jira/browse/HADOOP-10355) | TestLoadGenerator#testLoadGenerator fails | Major | . | Akira Ajisaka | Haohui Mai | | [HADOOP-10352](https://issues.apache.org/jira/browse/HADOOP-10352) | Recursive setfacl erroneously attempts to apply default ACL to files. | Major | fs | Chris Nauroth | Chris Nauroth | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.1/CHANGES.2.4.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.1/CHANGELOG.2.4.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.1/CHANGES.2.4.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.4.1/CHANGELOG.2.4.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.0/CHANGES.2.5.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.0/CHANGELOG.2.5.0.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.0/CHANGES.2.5.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.0/CHANGELOG.2.5.0.md index b27686a3c0e..13b8f3035c3 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.0/CHANGES.2.5.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.0/CHANGELOG.2.5.0.md @@ -520,7 +520,7 @@ | [HADOOP-10649](https://issues.apache.org/jira/browse/HADOOP-10649) | Allow overriding the default ACL for service authorization | Major | security | Benoy Antony | Benoy Antony | | [YARN-2052](https://issues.apache.org/jira/browse/YARN-2052) | ContainerId creation after work preserving restart is broken | Major | resourcemanager | Tsuyoshi Ozawa | Tsuyoshi Ozawa | | [YARN-1713](https://issues.apache.org/jira/browse/YARN-1713) | Implement getnewapplication and submitapp as part of RM web service | Blocker | . | Varun Vasudev | Varun Vasudev | -| [YARN-2022](https://issues.apache.org/jira/browse/YARN-2022) | Preempting an Application Master container can be kept as least priority when multiple applications are marked for preemption by ProportionalCapacityPreemptionPolicy | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-2022](https://issues.apache.org/jira/browse/YARN-2022) | Preempting an Application Master container can be kept as least priority when multiple applications are marked for preemption by ProportionalCapacityPreemptionPolicy | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [MAPREDUCE-5900](https://issues.apache.org/jira/browse/MAPREDUCE-5900) | Container preemption interpreted as task failures and eventually job failures | Major | applicationmaster, mr-am, mrv2 | Mayank Bansal | Mayank Bansal | | [HADOOP-9711](https://issues.apache.org/jira/browse/HADOOP-9711) | Write contract tests for S3Native; fix places where it breaks | Minor | fs/s3 | Steve Loughran | Steve Loughran | | [HADOOP-9371](https://issues.apache.org/jira/browse/HADOOP-9371) | Define Semantics of FileSystem more rigorously | Major | fs | Steve Loughran | Steve Loughran | @@ -530,7 +530,7 @@ | [HDFS-5411](https://issues.apache.org/jira/browse/HDFS-5411) | Update Bookkeeper dependency to 4.2.3 | Minor | . | Robert Rati | Rakesh R | | [YARN-1366](https://issues.apache.org/jira/browse/YARN-1366) | AM should implement Resync with the ApplicationMasterService instead of shutting down | Major | resourcemanager | Bikas Saha | Rohith Sharma K S | | [YARN-2233](https://issues.apache.org/jira/browse/YARN-2233) | Implement web services to create, renew and cancel delegation tokens | Blocker | resourcemanager | Varun Vasudev | Varun Vasudev | -| [YARN-1408](https://issues.apache.org/jira/browse/YARN-1408) | Preemption caused Invalid State Event: ACQUIRED at KILLED and caused a task timeout for 30mins | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-1408](https://issues.apache.org/jira/browse/YARN-1408) | Preemption caused Invalid State Event: ACQUIRED at KILLED and caused a task timeout for 30mins | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HADOOP-10864](https://issues.apache.org/jira/browse/HADOOP-10864) | Tool documentenation is broken | Minor | documentation | Allen Wittenauer | Akira Ajisaka | | [HADOOP-10894](https://issues.apache.org/jira/browse/HADOOP-10894) | Fix dead link in ToolRunner documentation | Minor | documentation | Akira Ajisaka | Akira Ajisaka | | [YARN-2247](https://issues.apache.org/jira/browse/YARN-2247) | Allow RM web services users to authenticate using delegation tokens | Blocker | . | Varun Vasudev | Varun Vasudev | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.1/CHANGES.2.5.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.1/CHANGELOG.2.5.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.1/CHANGES.2.5.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.1/CHANGELOG.2.5.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.2/CHANGES.2.5.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.2/CHANGELOG.2.5.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.2/CHANGES.2.5.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.5.2/CHANGELOG.2.5.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.0/CHANGES.2.6.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.0/CHANGELOG.2.6.0.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.0/CHANGES.2.6.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.0/CHANGELOG.2.6.0.md index 91aa76df035..c5dc4999757 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.0/CHANGES.2.6.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.0/CHANGELOG.2.6.0.md @@ -941,7 +941,7 @@ | [YARN-2795](https://issues.apache.org/jira/browse/YARN-2795) | Resource Manager fails startup with HDFS label storage and secure cluster | Major | resourcemanager | Phil D'Amore | Wangda Tan | | [YARN-2678](https://issues.apache.org/jira/browse/YARN-2678) | Improved Yarn Registry service record structure | Major | api, resourcemanager | Gour Saha | Steve Loughran | | [YARN-2744](https://issues.apache.org/jira/browse/YARN-2744) | Under some scenario, it is possible to end up with capacity scheduler configuration that uses labels that no longer exist | Critical | capacityscheduler | Sumit Mohanty | Wangda Tan | -| [YARN-2647](https://issues.apache.org/jira/browse/YARN-2647) | Add yarn queue CLI to get queue infos | Major | client | Wangda Tan | Sunil G | +| [YARN-2647](https://issues.apache.org/jira/browse/YARN-2647) | Add yarn queue CLI to get queue infos | Major | client | Wangda Tan | Sunil Govindan | | [YARN-2824](https://issues.apache.org/jira/browse/YARN-2824) | Capacity of labels should be zero by default | Critical | resourcemanager | Wangda Tan | Wangda Tan | | [YARN-2753](https://issues.apache.org/jira/browse/YARN-2753) | Fix potential issues and code clean up for \*NodeLabelsManager | Major | . | zhihai xu | zhihai xu | | [YARN-2632](https://issues.apache.org/jira/browse/YARN-2632) | Document NM Restart feature | Blocker | nodemanager | Junping Du | Junping Du | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.1/CHANGES.2.6.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.1/CHANGELOG.2.6.1.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.1/CHANGES.2.6.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.1/CHANGELOG.2.6.1.md index c972e565ce1..0e63de66df2 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.1/CHANGES.2.6.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.1/CHANGELOG.2.6.1.md @@ -66,7 +66,7 @@ | [YARN-2905](https://issues.apache.org/jira/browse/YARN-2905) | AggregatedLogsBlock page can infinitely loop if the aggregated log file is corrupted | Blocker | . | Jason Lowe | Varun Saxena | | [YARN-2894](https://issues.apache.org/jira/browse/YARN-2894) | When ACL's are enabled, if RM switches then application can not be viewed from web. | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | | [YARN-2874](https://issues.apache.org/jira/browse/YARN-2874) | Dead lock in "DelegationTokenRenewer" which blocks RM to execute any further apps | Blocker | resourcemanager | Naganarasimha G R | Naganarasimha G R | -| [HADOOP-11343](https://issues.apache.org/jira/browse/HADOOP-11343) | Overflow is not properly handled in caclulating final iv for AES CTR | Blocker | security | Jerry Chen | Jerry Chen | +| [HADOOP-11343](https://issues.apache.org/jira/browse/HADOOP-11343) | Overflow is not properly handled in caclulating final iv for AES CTR | Blocker | security | Haifeng Chen | Haifeng Chen | | [HADOOP-11368](https://issues.apache.org/jira/browse/HADOOP-11368) | Fix SSLFactory truststore reloader thread leak in KMSClientProvider | Major | kms | Arun Suresh | Arun Suresh | | [HDFS-7489](https://issues.apache.org/jira/browse/HDFS-7489) | Incorrect locking in FsVolumeList#checkDirs can hang datanodes | Critical | datanode | Noah Lorang | Noah Lorang | | [YARN-2910](https://issues.apache.org/jira/browse/YARN-2910) | FSLeafQueue can throw ConcurrentModificationException | Major | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.2/CHANGES.2.6.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.2/CHANGELOG.2.6.2.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.2/CHANGES.2.6.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.2/CHANGELOG.2.6.2.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.3/CHANGES.2.6.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.3/CHANGELOG.2.6.3.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.3/CHANGES.2.6.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.3/CHANGELOG.2.6.3.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.4/CHANGES.2.6.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.4/CHANGELOG.2.6.4.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.4/CHANGES.2.6.4.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.4/CHANGELOG.2.6.4.md index c87a205979c..14054e77e32 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.4/CHANGES.2.6.4.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.4/CHANGELOG.2.6.4.md @@ -50,7 +50,7 @@ | [YARN-3842](https://issues.apache.org/jira/browse/YARN-3842) | NMProxy should retry on NMNotYetReadyException | Critical | . | Karthik Kambatla | Robert Kanter | | [YARN-3695](https://issues.apache.org/jira/browse/YARN-3695) | ServerProxy (NMProxy, etc.) shouldn't retry forever for non network exception. | Major | . | Junping Du | Raju Bairishetti | | [HADOOP-12107](https://issues.apache.org/jira/browse/HADOOP-12107) | long running apps may have a huge number of StatisticsData instances under FileSystem | Critical | fs | Sangjin Lee | Sangjin Lee | -| [YARN-3849](https://issues.apache.org/jira/browse/YARN-3849) | Too much of preemption activity causing continuos killing of containers across queues | Critical | capacityscheduler | Sunil G | Sunil G | +| [YARN-3849](https://issues.apache.org/jira/browse/YARN-3849) | Too much of preemption activity causing continuos killing of containers across queues | Critical | capacityscheduler | Sunil Govindan | Sunil Govindan | | [HDFS-8767](https://issues.apache.org/jira/browse/HDFS-8767) | RawLocalFileSystem.listStatus() returns null for UNIX pipefile | Critical | . | Haohui Mai | Kanaka Kumar Avvaru | | [YARN-3535](https://issues.apache.org/jira/browse/YARN-3535) | Scheduler must re-request container resources when RMContainer transitions from ALLOCATED to KILLED | Critical | capacityscheduler, fairscheduler, resourcemanager | Peng Zhang | Peng Zhang | | [YARN-3857](https://issues.apache.org/jira/browse/YARN-3857) | Memory leak in ResourceManager with SIMPLE mode | Critical | resourcemanager | mujunchao | mujunchao | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.5/CHANGES.2.6.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.5/CHANGELOG.2.6.5.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.5/CHANGES.2.6.5.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.5/CHANGELOG.2.6.5.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.6/CHANGES.2.6.6.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.6/CHANGELOG.2.6.6.md similarity index 77% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.6/CHANGES.2.6.6.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.6/CHANGELOG.2.6.6.md index 262e66109bb..d437bc84dd5 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.6/CHANGES.2.6.6.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.6.6/CHANGELOG.2.6.6.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 2.6.6 - Unreleased (as of 2017-08-28) +## Release 2.6.6 - Unreleased (as of 2018-09-02) ### INCOMPATIBLE CHANGES: @@ -27,10 +27,19 @@ | [HADOOP-13812](https://issues.apache.org/jira/browse/HADOOP-13812) | Upgrade Tomcat to 6.0.48 | Blocker | kms | John Zhuge | John Zhuge | +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-8865](https://issues.apache.org/jira/browse/HDFS-8865) | Improve quota initialization performance | Major | . | Kihwal Lee | Kihwal Lee | +| [YARN-7590](https://issues.apache.org/jira/browse/YARN-7590) | Improve container-executor validation check | Major | security, yarn | Eric Yang | Eric Yang | + + ### BUG FIXES: | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-9003](https://issues.apache.org/jira/browse/HDFS-9003) | ForkJoin thread pool leaks | Major | . | Kihwal Lee | Kihwal Lee | | [YARN-4328](https://issues.apache.org/jira/browse/YARN-4328) | Findbugs warning in resourcemanager in branch-2.7 and branch-2.6 | Minor | resourcemanager | Varun Saxena | Akira Ajisaka | | [HDFS-11180](https://issues.apache.org/jira/browse/HDFS-11180) | Intermittent deadlock in NameNode when failover happens. | Blocker | namenode | Abhishek Modi | Akira Ajisaka | | [HDFS-11352](https://issues.apache.org/jira/browse/HDFS-11352) | Potential deadlock in NN when failing over | Critical | namenode | Erik Krogen | Erik Krogen | @@ -38,6 +47,7 @@ | [YARN-6056](https://issues.apache.org/jira/browse/YARN-6056) | Yarn NM using LCE shows a failure when trying to delete a non-existing dir | Major | yarn | Wilfred Spiegelenburg | Wilfred Spiegelenburg | | [YARN-6615](https://issues.apache.org/jira/browse/YARN-6615) | AmIpFilter drops query parameters on redirect | Major | . | Wilfred Spiegelenburg | Wilfred Spiegelenburg | | [HADOOP-14474](https://issues.apache.org/jira/browse/HADOOP-14474) | Use OpenJDK 7 instead of Oracle JDK 7 to avoid oracle-java7-installer failures | Major | build | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-15143](https://issues.apache.org/jira/browse/HADOOP-15143) | NPE due to Invalid KerberosTicket in UGI | Major | . | Jitendra Nath Pandey | Mukul Kumar Singh | ### TESTS: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/CHANGES.2.7.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/CHANGELOG.2.7.0.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/CHANGES.2.7.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/CHANGELOG.2.7.0.md index b0877829f6c..a4e7b633468 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/CHANGES.2.7.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/CHANGELOG.2.7.0.md @@ -28,6 +28,7 @@ | [HADOOP-11311](https://issues.apache.org/jira/browse/HADOOP-11311) | Restrict uppercase key names from being created with JCEKS | Major | security | Andrew Wang | Andrew Wang | | [HDFS-7210](https://issues.apache.org/jira/browse/HDFS-7210) | Avoid two separate RPC's namenode.append() and namenode.getFileInfo() for an append call from DFSClient | Major | hdfs-client, namenode | Vinayakumar B | Vinayakumar B | | [HADOOP-10530](https://issues.apache.org/jira/browse/HADOOP-10530) | Make hadoop trunk build on Java7+ only | Blocker | build | Steve Loughran | Steve Loughran | +| [MAPREDUCE-5420](https://issues.apache.org/jira/browse/MAPREDUCE-5420) | Remove mapreduce.task.tmp.dir from mapred-default.xml | Major | . | Sandy Ryza | James Carman | | [HADOOP-11385](https://issues.apache.org/jira/browse/HADOOP-11385) | Prevent cross site scripting attack on JMXJSONServlet | Critical | . | Haohui Mai | Haohui Mai | | [HADOOP-11498](https://issues.apache.org/jira/browse/HADOOP-11498) | Bump the version of HTrace to 3.1.0-incubating | Major | tracing | Masatake Iwasaki | Masatake Iwasaki | | [HDFS-6651](https://issues.apache.org/jira/browse/HDFS-6651) | Deletion failure can leak inodes permanently | Critical | . | Kihwal Lee | Jing Zhao | @@ -193,13 +194,13 @@ | [HADOOP-10626](https://issues.apache.org/jira/browse/HADOOP-10626) | Limit Returning Attributes for LDAP search | Major | security | Jason Hubbard | Jason Hubbard | | [HDFS-7675](https://issues.apache.org/jira/browse/HDFS-7675) | Remove unused member DFSClient#spanReceiverHost | Trivial | hdfs-client | Konstantin Shvachko | Colin P. McCabe | | [HADOOP-10525](https://issues.apache.org/jira/browse/HADOOP-10525) | Remove DRFA.MaxBackupIndex config from log4j.properties | Minor | . | Akira Ajisaka | Akira Ajisaka | -| [HADOOP-11441](https://issues.apache.org/jira/browse/HADOOP-11441) | Hadoop-azure: Change few methods scope to public | Minor | tools | shashank | shashank | +| [HADOOP-11441](https://issues.apache.org/jira/browse/HADOOP-11441) | Hadoop-azure: Change few methods scope to public | Minor | tools | Shashank Khandelwal | Shashank Khandelwal | | [MAPREDUCE-6150](https://issues.apache.org/jira/browse/MAPREDUCE-6150) | Update document of Rumen | Minor | documentation | Masatake Iwasaki | Masatake Iwasaki | | [YARN-3108](https://issues.apache.org/jira/browse/YARN-3108) | ApplicationHistoryServer doesn't process -D arguments | Major | . | Chang Li | Chang Li | | [MAPREDUCE-6151](https://issues.apache.org/jira/browse/MAPREDUCE-6151) | Update document of GridMix | Minor | documentation | Masatake Iwasaki | Masatake Iwasaki | | [HDFS-7706](https://issues.apache.org/jira/browse/HDFS-7706) | Switch BlockManager logging to use slf4j | Minor | namenode | Andrew Wang | Andrew Wang | | [YARN-3077](https://issues.apache.org/jira/browse/YARN-3077) | RM should create yarn.resourcemanager.zk-state-store.parent-path recursively | Major | resourcemanager | Chun Chen | Chun Chen | -| [HADOOP-11442](https://issues.apache.org/jira/browse/HADOOP-11442) | hadoop-azure: Create test jar | Major | tools | shashank | shashank | +| [HADOOP-11442](https://issues.apache.org/jira/browse/HADOOP-11442) | hadoop-azure: Create test jar | Major | tools | Shashank Khandelwal | Shashank Khandelwal | | [MAPREDUCE-6143](https://issues.apache.org/jira/browse/MAPREDUCE-6143) | add configuration for mapreduce speculative execution in MR2 | Major | mrv2 | zhihai xu | zhihai xu | | [YARN-3085](https://issues.apache.org/jira/browse/YARN-3085) | Application summary should include the application type | Major | resourcemanager | Jason Lowe | Rohith Sharma K S | | [HADOOP-11045](https://issues.apache.org/jira/browse/HADOOP-11045) | Introducing a tool to detect flaky tests of hadoop jenkins test job | Major | build, tools | Yongjun Zhang | Yongjun Zhang | @@ -288,7 +289,7 @@ | [HDFS-7434](https://issues.apache.org/jira/browse/HDFS-7434) | DatanodeID hashCode should not be mutable | Major | namenode | Daryn Sharp | Daryn Sharp | | [HADOOP-11648](https://issues.apache.org/jira/browse/HADOOP-11648) | Set DomainSocketWatcher thread name explicitly | Major | net | Liang Xie | Liang Xie | | [YARN-3249](https://issues.apache.org/jira/browse/YARN-3249) | Add a "kill application" button to Resource Manager's Web UI | Minor | resourcemanager | Ryu Kobayashi | Ryu Kobayashi | -| [HADOOP-11642](https://issues.apache.org/jira/browse/HADOOP-11642) | Upgrade azure sdk version from 0.6.0 to 2.0.0 | Major | tools | shashank | shashank | +| [HADOOP-11642](https://issues.apache.org/jira/browse/HADOOP-11642) | Upgrade azure sdk version from 0.6.0 to 2.0.0 | Major | tools | Shashank Khandelwal | Shashank Khandelwal | | [HDFS-7411](https://issues.apache.org/jira/browse/HDFS-7411) | Refactor and improve decommissioning logic into DecommissionManager | Major | . | Andrew Wang | Andrew Wang | | [HDFS-7898](https://issues.apache.org/jira/browse/HDFS-7898) | Change TestAppendSnapshotTruncate to fail-fast | Minor | test | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | | [HDFS-6806](https://issues.apache.org/jira/browse/HDFS-6806) | HDFS Rolling upgrade document should mention the versions available | Minor | documentation | Akira Ajisaka | J.Andreina | @@ -414,7 +415,7 @@ | [HADOOP-11355](https://issues.apache.org/jira/browse/HADOOP-11355) | When accessing data in HDFS and the key has been deleted, a Null Pointer Exception is shown. | Minor | . | Arun Suresh | Arun Suresh | | [YARN-2461](https://issues.apache.org/jira/browse/YARN-2461) | Fix PROCFS\_USE\_SMAPS\_BASED\_RSS\_ENABLED property in YarnConfiguration | Minor | . | Ray Chiang | Ray Chiang | | [YARN-2869](https://issues.apache.org/jira/browse/YARN-2869) | CapacityScheduler should trim sub queue names when parse configuration | Major | capacityscheduler, resourcemanager | Wangda Tan | Wangda Tan | -| [HADOOP-11343](https://issues.apache.org/jira/browse/HADOOP-11343) | Overflow is not properly handled in caclulating final iv for AES CTR | Blocker | security | Jerry Chen | Jerry Chen | +| [HADOOP-11343](https://issues.apache.org/jira/browse/HADOOP-11343) | Overflow is not properly handled in caclulating final iv for AES CTR | Blocker | security | Haifeng Chen | Haifeng Chen | | [MAPREDUCE-6177](https://issues.apache.org/jira/browse/MAPREDUCE-6177) | Minor typo in the EncryptedShuffle document about ssl-client.xml | Trivial | documentation | wyp | wyp | | [HDFS-7473](https://issues.apache.org/jira/browse/HDFS-7473) | Document setting dfs.namenode.fs-limits.max-directory-items to 0 is invalid | Major | documentation | Jason Keller | Akira Ajisaka | | [HADOOP-11354](https://issues.apache.org/jira/browse/HADOOP-11354) | ThrottledInputStream doesn't perform effective throttling | Major | . | Ted Yu | Ted Yu | @@ -444,7 +445,7 @@ | [HADOOP-11238](https://issues.apache.org/jira/browse/HADOOP-11238) | Update the NameNode's Group Cache in the background when possible | Minor | . | Chris Li | Chris Li | | [HADOOP-11394](https://issues.apache.org/jira/browse/HADOOP-11394) | hadoop-aws documentation missing. | Major | documentation | Chris Nauroth | Chris Nauroth | | [MAPREDUCE-4879](https://issues.apache.org/jira/browse/MAPREDUCE-4879) | TeraOutputFormat may overwrite an existing output directory | Major | examples | Gera Shegalov | Gera Shegalov | -| [YARN-2356](https://issues.apache.org/jira/browse/YARN-2356) | yarn status command for non-existent application/application attempt/container is too verbose | Minor | client | Sunil G | Sunil G | +| [YARN-2356](https://issues.apache.org/jira/browse/YARN-2356) | yarn status command for non-existent application/application attempt/container is too verbose | Minor | client | Sunil Govindan | Sunil Govindan | | [HDFS-7516](https://issues.apache.org/jira/browse/HDFS-7516) | Fix findbugs warnings in hadoop-nfs project | Major | nfs | Brandon Li | Brandon Li | | [HADOOP-10852](https://issues.apache.org/jira/browse/HADOOP-10852) | NetgroupCache is not thread-safe | Major | security | Benoy Antony | Benoy Antony | | [MAPREDUCE-6166](https://issues.apache.org/jira/browse/MAPREDUCE-6166) | Reducers do not validate checksum of map outputs when fetching directly to disk | Major | mrv2 | Eric Payne | Eric Payne | @@ -976,7 +977,6 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | | [HDFS-2486](https://issues.apache.org/jira/browse/HDFS-2486) | Review issues with UnderReplicatedBlocks | Minor | namenode | Steve Loughran | Uma Maheswara Rao G | -| [MAPREDUCE-5420](https://issues.apache.org/jira/browse/MAPREDUCE-5420) | Remove mapreduce.task.tmp.dir from mapred-default.xml | Major | . | Sandy Ryza | James Carman | | [YARN-2949](https://issues.apache.org/jira/browse/YARN-2949) | Add documentation for CGroups | Major | documentation, nodemanager | Varun Vasudev | Varun Vasudev | | [HADOOP-11489](https://issues.apache.org/jira/browse/HADOOP-11489) | Dropping dependency on io.netty from hadoop-nfs' pom.xml | Minor | nfs | Ted Yu | Ted Yu | | [HADOOP-11463](https://issues.apache.org/jira/browse/HADOOP-11463) | Replace method-local TransferManager object with S3AFileSystem#transfers | Major | fs/s3 | Ted Yu | Ted Yu | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/RELEASENOTES.2.7.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/RELEASENOTES.2.7.0.md index b321477fc64..9ff52f754f0 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/RELEASENOTES.2.7.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.0/RELEASENOTES.2.7.0.md @@ -63,6 +63,13 @@ Keys with uppercase names can no longer be created when using the JavaKeyStorePr **WARNING: No release note provided for this change.** +--- + +* [MAPREDUCE-5420](https://issues.apache.org/jira/browse/MAPREDUCE-5420) | *Major* | **Remove mapreduce.task.tmp.dir from mapred-default.xml** + +**WARNING: No release note provided for this change.** + + --- * [HADOOP-9629](https://issues.apache.org/jira/browse/HADOOP-9629) | *Major* | **Support Windows Azure Storage - Blob as a file system in Hadoop** diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/CHANGES.2.7.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/CHANGELOG.2.7.1.md similarity index 98% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/CHANGES.2.7.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/CHANGELOG.2.7.1.md index 131647dddae..c1728612be1 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/CHANGES.2.7.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/CHANGELOG.2.7.1.md @@ -27,13 +27,6 @@ | [HDFS-8226](https://issues.apache.org/jira/browse/HDFS-8226) | Non-HA rollback compatibility broken | Blocker | . | J.Andreina | J.Andreina | -### IMPORTANT ISSUES: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | -| [YARN-6959](https://issues.apache.org/jira/browse/YARN-6959) | RM may allocate wrong AM Container for new attempt | Major | capacity scheduler, fairscheduler, scheduler | Yuqi Wang | Yuqi Wang | - - ### IMPROVEMENTS: | JIRA | Summary | Priority | Component | Reporter | Contributor | @@ -173,7 +166,7 @@ | [YARN-3301](https://issues.apache.org/jira/browse/YARN-3301) | Fix the format issue of the new RM web UI and AHS web UI after YARN-3272 / YARN-3262 | Major | resourcemanager | Xuan Gong | Xuan Gong | | [YARN-2918](https://issues.apache.org/jira/browse/YARN-2918) | Don't fail RM if queue's configured labels are not existed in cluster-node-labels | Major | resourcemanager | Rohith Sharma K S | Wangda Tan | | [YARN-3609](https://issues.apache.org/jira/browse/YARN-3609) | Move load labels from storage from serviceInit to serviceStart to make it works with RM HA case. | Major | resourcemanager | Wangda Tan | Wangda Tan | -| [YARN-3686](https://issues.apache.org/jira/browse/YARN-3686) | CapacityScheduler should trim default\_node\_label\_expression | Critical | api, client, resourcemanager | Wangda Tan | Sunil G | +| [YARN-3686](https://issues.apache.org/jira/browse/YARN-3686) | CapacityScheduler should trim default\_node\_label\_expression | Critical | api, client, resourcemanager | Wangda Tan | Sunil Govindan | | [YARN-2900](https://issues.apache.org/jira/browse/YARN-2900) | Application (Attempt and Container) Not Found in AHS results in Internal Server Error (500) | Major | timelineserver | Jonathan Eagles | Mit Desai | | [YARN-3711](https://issues.apache.org/jira/browse/YARN-3711) | Documentation of ResourceManager HA should explain configurations about listen addresses | Minor | documentation | Masatake Iwasaki | Masatake Iwasaki | | [HDFS-8597](https://issues.apache.org/jira/browse/HDFS-8597) | Fix TestFSImage#testZeroBlockSize on Windows | Major | datanode, test | Xiaoyu Yao | Xiaoyu Yao | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/RELEASENOTES.2.7.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/RELEASENOTES.2.7.1.md index d022103bd2b..a8f4b0a9e31 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/RELEASENOTES.2.7.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.1/RELEASENOTES.2.7.1.md @@ -45,11 +45,4 @@ Public service notice: Proxy level retries will not be done on AlreadyBeingCreatedExeption for create() op. ---- - -* [YARN-6959](https://issues.apache.org/jira/browse/YARN-6959) | *Major* | **RM may allocate wrong AM Container for new attempt** - -ResourceManager will now record ResourceRequests from different attempts into different objects. - - diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.2/CHANGES.2.7.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.2/CHANGELOG.2.7.2.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.2/CHANGES.2.7.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.2/CHANGELOG.2.7.2.md index 60795d2da7a..7980983857a 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.2/CHANGES.2.7.2.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.2/CHANGELOG.2.7.2.md @@ -145,7 +145,7 @@ | [HDFS-9273](https://issues.apache.org/jira/browse/HDFS-9273) | ACLs on root directory may be lost after NN restart | Critical | namenode | Xiao Chen | Xiao Chen | | [YARN-4000](https://issues.apache.org/jira/browse/YARN-4000) | RM crashes with NPE if leaf queue becomes parent queue during restart | Major | capacityscheduler, resourcemanager | Jason Lowe | Varun Saxena | | [YARN-4009](https://issues.apache.org/jira/browse/YARN-4009) | CORS support for ResourceManager REST API | Major | . | Prakash Ramachandran | Varun Vasudev | -| [YARN-4041](https://issues.apache.org/jira/browse/YARN-4041) | Slow delegation token renewal can severely prolong RM recovery | Major | resourcemanager | Jason Lowe | Sunil G | +| [YARN-4041](https://issues.apache.org/jira/browse/YARN-4041) | Slow delegation token renewal can severely prolong RM recovery | Major | resourcemanager | Jason Lowe | Sunil Govindan | | [HDFS-9290](https://issues.apache.org/jira/browse/HDFS-9290) | DFSClient#callAppend() is not backward compatible for slightly older NameNodes | Blocker | . | Tony Wu | Tony Wu | | [HDFS-9305](https://issues.apache.org/jira/browse/HDFS-9305) | Delayed heartbeat processing causes storm of subsequent heartbeats | Major | datanode | Chris Nauroth | Arpit Agarwal | | [HDFS-9317](https://issues.apache.org/jira/browse/HDFS-9317) | Document fsck -blockId and -storagepolicy options in branch-2.7 | Major | documentation | Akira Ajisaka | Akira Ajisaka | @@ -194,7 +194,7 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | -| [YARN-3136](https://issues.apache.org/jira/browse/YARN-3136) | getTransferredContainers can be a bottleneck during AM registration | Major | scheduler | Jason Lowe | Sunil G | +| [YARN-3136](https://issues.apache.org/jira/browse/YARN-3136) | getTransferredContainers can be a bottleneck during AM registration | Major | scheduler | Jason Lowe | Sunil Govindan | | [YARN-3700](https://issues.apache.org/jira/browse/YARN-3700) | ATS Web Performance issue at load time when large number of jobs | Major | resourcemanager, webapp, yarn | Xuan Gong | Xuan Gong | | [YARN-3740](https://issues.apache.org/jira/browse/YARN-3740) | Fixed the typo with the configuration name: APPLICATION\_HISTORY\_PREFIX\_MAX\_APPS | Major | resourcemanager, webapp, yarn | Xuan Gong | Xuan Gong | | [YARN-3969](https://issues.apache.org/jira/browse/YARN-3969) | Allow jobs to be submitted to reservation that is active but does not have any allocations | Major | capacityscheduler, fairscheduler, resourcemanager | Subru Krishnan | Subru Krishnan | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.3/CHANGES.2.7.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.3/CHANGELOG.2.7.3.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.3/CHANGES.2.7.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.3/CHANGELOG.2.7.3.md index 799a9a43d36..0dfe2d2fbac 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.3/CHANGES.2.7.3.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.3/CHANGELOG.2.7.3.md @@ -81,7 +81,7 @@ | [MAPREDUCE-6413](https://issues.apache.org/jira/browse/MAPREDUCE-6413) | TestLocalJobSubmission is failing with unknown host | Major | test | Jason Lowe | zhihai xu | | [YARN-3695](https://issues.apache.org/jira/browse/YARN-3695) | ServerProxy (NMProxy, etc.) shouldn't retry forever for non network exception. | Major | . | Junping Du | Raju Bairishetti | | [HADOOP-12107](https://issues.apache.org/jira/browse/HADOOP-12107) | long running apps may have a huge number of StatisticsData instances under FileSystem | Critical | fs | Sangjin Lee | Sangjin Lee | -| [YARN-3849](https://issues.apache.org/jira/browse/YARN-3849) | Too much of preemption activity causing continuos killing of containers across queues | Critical | capacityscheduler | Sunil G | Sunil G | +| [YARN-3849](https://issues.apache.org/jira/browse/YARN-3849) | Too much of preemption activity causing continuos killing of containers across queues | Critical | capacityscheduler | Sunil Govindan | Sunil Govindan | | [HDFS-8772](https://issues.apache.org/jira/browse/HDFS-8772) | Fix TestStandbyIsHot#testDatanodeRestarts which occasionally fails | Major | . | Walter Su | Walter Su | | [MAPREDUCE-5817](https://issues.apache.org/jira/browse/MAPREDUCE-5817) | Mappers get rescheduled on node transition even after all reducers are completed | Major | applicationmaster | Sangjin Lee | Sangjin Lee | | [HDFS-8845](https://issues.apache.org/jira/browse/HDFS-8845) | DiskChecker should not traverse the entire tree | Major | . | Chang Li | Chang Li | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/CHANGES.2.7.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/CHANGELOG.2.7.4.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/CHANGES.2.7.4.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/CHANGELOG.2.7.4.md index d210ee604d8..e6063008000 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/CHANGES.2.7.4.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/CHANGELOG.2.7.4.md @@ -26,6 +26,7 @@ |:---- |:---- | :--- |:---- |:---- |:---- | | [HDFS-7933](https://issues.apache.org/jira/browse/HDFS-7933) | fsck should also report decommissioning replicas. | Major | namenode | Jitendra Nath Pandey | Xiaoyu Yao | | [HADOOP-13812](https://issues.apache.org/jira/browse/HADOOP-13812) | Upgrade Tomcat to 6.0.48 | Blocker | kms | John Zhuge | John Zhuge | +| [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | Add ability to secure log servlet using proxy users | Major | . | Jeffrey E Rodriguez | Yuanbo Liu | | [HADOOP-14138](https://issues.apache.org/jira/browse/HADOOP-14138) | Remove S3A ref from META-INF service discovery, rely on existing core-default entry | Critical | fs/s3 | Steve Loughran | Steve Loughran | @@ -52,7 +53,7 @@ | [HDFS-9726](https://issues.apache.org/jira/browse/HDFS-9726) | Refactor IBR code to a new class | Minor | datanode | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | | [HADOOP-12668](https://issues.apache.org/jira/browse/HADOOP-12668) | Support excluding weak Ciphers in HttpServer2 through ssl-server.xml | Critical | security | Vijay Singh | Vijay Singh | | [HDFS-9710](https://issues.apache.org/jira/browse/HDFS-9710) | Change DN to send block receipt IBRs in batches | Major | datanode | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | -| [HDFS-9412](https://issues.apache.org/jira/browse/HDFS-9412) | getBlocks occupies FSLock and takes too long to complete | Major | . | He Tianyi | He Tianyi | +| [HDFS-9412](https://issues.apache.org/jira/browse/HDFS-9412) | getBlocks occupies FSLock and takes too long to complete | Major | balancer & mover, namenode | He Tianyi | He Tianyi | | [HDFS-9902](https://issues.apache.org/jira/browse/HDFS-9902) | Support different values of dfs.datanode.du.reserved per storage type | Major | datanode | Pan Yuxuan | Brahma Reddy Battula | | [HADOOP-13290](https://issues.apache.org/jira/browse/HADOOP-13290) | Appropriate use of generics in FairCallQueue | Major | ipc | Konstantin Shvachko | Jonathan Hung | | [YARN-5483](https://issues.apache.org/jira/browse/YARN-5483) | Optimize RMAppAttempt#pullJustFinishedContainers | Major | . | sandflee | sandflee | @@ -115,7 +116,7 @@ | [YARN-4562](https://issues.apache.org/jira/browse/YARN-4562) | YARN WebApp ignores the configuration passed to it for keystore settings | Major | . | Sergey Shelukhin | Sergey Shelukhin | | [HDFS-10270](https://issues.apache.org/jira/browse/HDFS-10270) | TestJMXGet:testNameNode() fails | Minor | test | Andras Bokor | Gergely Novák | | [HADOOP-13026](https://issues.apache.org/jira/browse/HADOOP-13026) | Should not wrap IOExceptions into a AuthenticationException in KerberosAuthenticator | Critical | . | Xuan Gong | Xuan Gong | -| [HDFS-10276](https://issues.apache.org/jira/browse/HDFS-10276) | HDFS should not expose path info that user has no permission to see. | Major | . | Kevin Cox | Yuanbo Liu | +| [HDFS-10276](https://issues.apache.org/jira/browse/HDFS-10276) | HDFS should not expose path info that user has no permission to see. | Major | fs, security | Kevin Cox | Yuanbo Liu | | [YARN-5197](https://issues.apache.org/jira/browse/YARN-5197) | RM leaks containers if running container disappears from node update | Critical | resourcemanager | Jason Lowe | Jason Lowe | | [YARN-5262](https://issues.apache.org/jira/browse/YARN-5262) | Optimize sending RMNodeFinishedContainersPulledByAMEvent for every AM heartbeat | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | | [HDFS-10396](https://issues.apache.org/jira/browse/HDFS-10396) | Using -diff option with DistCp may get "Comparison method violates its general contract" exception | Major | . | Yongjun Zhang | Yongjun Zhang | @@ -212,7 +213,6 @@ | [HDFS-11377](https://issues.apache.org/jira/browse/HDFS-11377) | Balancer hung due to no available mover threads | Major | balancer & mover | yunjiong zhao | yunjiong zhao | | [YARN-6152](https://issues.apache.org/jira/browse/YARN-6152) | Used queue percentage not accurate in UI for 2.7 and below when using DominantResourceCalculator | Major | . | Jonathan Hung | Jonathan Hung | | [HADOOP-13433](https://issues.apache.org/jira/browse/HADOOP-13433) | Race in UGI.reloginFromKeytab | Major | security | Duo Zhang | Duo Zhang | -| [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | Add ability to secure log servlet using proxy users | Major | . | Jeffrey E Rodriguez | Yuanbo Liu | | [HDFS-11379](https://issues.apache.org/jira/browse/HDFS-11379) | DFSInputStream may infinite loop requesting block locations | Critical | hdfs-client | Daryn Sharp | Daryn Sharp | | [YARN-1728](https://issues.apache.org/jira/browse/YARN-1728) | Workaround guice3x-undecoded pathInfo in YARN WebApp | Major | . | Abraham Elmahrek | Yuanbo Liu | | [YARN-6310](https://issues.apache.org/jira/browse/YARN-6310) | OutputStreams in AggregatedLogFormat.LogWriter can be left open upon exceptions | Major | yarn | Haibo Chen | Haibo Chen | @@ -246,7 +246,7 @@ | [HDFS-11849](https://issues.apache.org/jira/browse/HDFS-11849) | JournalNode startup failure exception should be logged in log file | Major | journal-node | Surendra Singh Lilhore | Surendra Singh Lilhore | | [HDFS-11864](https://issues.apache.org/jira/browse/HDFS-11864) | Document Metrics to track usage of memory for writes | Major | documentation | Brahma Reddy Battula | Yiqun Lin | | [YARN-6615](https://issues.apache.org/jira/browse/YARN-6615) | AmIpFilter drops query parameters on redirect | Major | . | Wilfred Spiegelenburg | Wilfred Spiegelenburg | -| [HDFS-11445](https://issues.apache.org/jira/browse/HDFS-11445) | FSCK shows overall health stauts as corrupt even one replica is corrupt | Critical | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HDFS-11445](https://issues.apache.org/jira/browse/HDFS-11445) | FSCK shows overall health status as corrupt even one replica is corrupt | Critical | . | Brahma Reddy Battula | Brahma Reddy Battula | | [HDFS-11078](https://issues.apache.org/jira/browse/HDFS-11078) | Fix NPE in LazyPersistFileScrubber | Major | . | Íñigo Goiri | Íñigo Goiri | | [HDFS-5042](https://issues.apache.org/jira/browse/HDFS-5042) | Completed files lost after power failure | Critical | . | Dave Latham | Vinayakumar B | | [HDFS-11893](https://issues.apache.org/jira/browse/HDFS-11893) | Fix TestDFSShell.testMoveWithTargetPortEmpty failure. | Major | test | Konstantin Shvachko | Brahma Reddy Battula | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/RELEASENOTES.2.7.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/RELEASENOTES.2.7.4.md index 965551194e9..5bd8fd7b199 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/RELEASENOTES.2.7.4.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.4/RELEASENOTES.2.7.4.md @@ -45,6 +45,13 @@ The Code Changes include following: - Modfied associated test classes to owrk with existing code and also cover the newfunctionality in junit +--- + +* [HDFS-9412](https://issues.apache.org/jira/browse/HDFS-9412) | *Major* | **getBlocks occupies FSLock and takes too long to complete** + +Skip blocks with size below dfs.balancer.getBlocks.min-block-size (default 10MB) when a balancer asks for a list of blocks. + + --- * [HDFS-9902](https://issues.apache.org/jira/browse/HDFS-9902) | *Major* | **Support different values of dfs.datanode.du.reserved per storage type** @@ -108,6 +115,13 @@ The fix for HDFS-11056 reads meta file to load last partial chunk checksum when Fixed a race condition that caused VolumeScanner to recognize a good replica as a bad one if the replica is also being written concurrently. +--- + +* [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | *Major* | **Add ability to secure log servlet using proxy users** + +**WARNING: No release note provided for this change.** + + --- * [HADOOP-14138](https://issues.apache.org/jira/browse/HADOOP-14138) | *Critical* | **Remove S3A ref from META-INF service discovery, rely on existing core-default entry** diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/CHANGELOG.2.7.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/CHANGELOG.2.7.5.md new file mode 100644 index 00000000000..c78509149c4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/CHANGELOG.2.7.5.md @@ -0,0 +1,93 @@ + + +# Apache Hadoop Changelog + +## Release 2.7.5 - 2017-12-14 + + + +### IMPORTANT ISSUES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-6959](https://issues.apache.org/jira/browse/YARN-6959) | RM may allocate wrong AM Container for new attempt | Major | capacity scheduler, fairscheduler, scheduler | Yuqi Wang | Yuqi Wang | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-8829](https://issues.apache.org/jira/browse/HDFS-8829) | Make SO\_RCVBUF and SO\_SNDBUF size configurable for DataTransferProtocol sockets and allow configuring auto-tuning | Major | datanode | He Tianyi | He Tianyi | +| [HADOOP-13442](https://issues.apache.org/jira/browse/HADOOP-13442) | Optimize UGI group lookups | Major | . | Daryn Sharp | Daryn Sharp | +| [MAPREDUCE-6937](https://issues.apache.org/jira/browse/MAPREDUCE-6937) | Backport MAPREDUCE-6870 to branch-2 while preserving compatibility | Major | . | Zhe Zhang | Peter Bacsko | +| [HADOOP-14827](https://issues.apache.org/jira/browse/HADOOP-14827) | Allow StopWatch to accept a Timer parameter for tests | Minor | common, test | Erik Krogen | Erik Krogen | +| [HDFS-12131](https://issues.apache.org/jira/browse/HDFS-12131) | Add some of the FSNamesystem JMX values as metrics | Minor | hdfs, namenode | Erik Krogen | Erik Krogen | +| [HDFS-8865](https://issues.apache.org/jira/browse/HDFS-8865) | Improve quota initialization performance | Major | . | Kihwal Lee | Kihwal Lee | +| [HDFS-12420](https://issues.apache.org/jira/browse/HDFS-12420) | Add an option to disallow 'namenode format -force' | Major | . | Ajay Kumar | Ajay Kumar | +| [MAPREDUCE-6975](https://issues.apache.org/jira/browse/MAPREDUCE-6975) | Logging task counters | Major | task | Prabhu Joseph | Prabhu Joseph | +| [HDFS-12823](https://issues.apache.org/jira/browse/HDFS-12823) | Backport HDFS-9259 "Make SO\_SNDBUF size configurable at DFSClient" to branch-2.7 | Major | hdfs, hdfs-client | Erik Krogen | Erik Krogen | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [MAPREDUCE-6165](https://issues.apache.org/jira/browse/MAPREDUCE-6165) | [JDK8] TestCombineFileInputFormat failed on JDK8 | Minor | . | Wei Yan | Akira Ajisaka | +| [HDFS-8797](https://issues.apache.org/jira/browse/HDFS-8797) | WebHdfsFileSystem creates too many connections for pread | Major | webhdfs | Jing Zhao | Jing Zhao | +| [HDFS-9003](https://issues.apache.org/jira/browse/HDFS-9003) | ForkJoin thread pool leaks | Major | . | Kihwal Lee | Kihwal Lee | +| [HDFS-9107](https://issues.apache.org/jira/browse/HDFS-9107) | Prevent NN's unrecoverable death spiral after full GC | Critical | namenode | Daryn Sharp | Daryn Sharp | +| [MAPREDUCE-6750](https://issues.apache.org/jira/browse/MAPREDUCE-6750) | TestHSAdminServer.testRefreshSuperUserGroups is failing | Minor | test | Kihwal Lee | Kihwal Lee | +| [HDFS-10738](https://issues.apache.org/jira/browse/HDFS-10738) | Fix TestRefreshUserMappings.testRefreshSuperUserGroupsConfiguration test failure | Major | test | Rakesh R | Rakesh R | +| [HADOOP-14702](https://issues.apache.org/jira/browse/HADOOP-14702) | Fix formatting issue and regression caused by conversion from APT to Markdown | Minor | documentation | Doris Gu | Doris Gu | +| [HDFS-12157](https://issues.apache.org/jira/browse/HDFS-12157) | Do fsyncDirectory(..) outside of FSDataset lock | Critical | datanode | Vinayakumar B | Vinayakumar B | +| [MAPREDUCE-6931](https://issues.apache.org/jira/browse/MAPREDUCE-6931) | Remove TestDFSIO "Total Throughput" calculation | Critical | benchmarks, test | Dennis Huo | Dennis Huo | +| [HADOOP-14867](https://issues.apache.org/jira/browse/HADOOP-14867) | Update HDFS Federation setup document, for incorrect property name for secondary name node http address | Major | . | Bharat Viswanadham | Bharat Viswanadham | +| [MAPREDUCE-6957](https://issues.apache.org/jira/browse/MAPREDUCE-6957) | shuffle hangs after a node manager connection timeout | Major | mrv2 | Jooseong Kim | Jooseong Kim | +| [HDFS-12323](https://issues.apache.org/jira/browse/HDFS-12323) | NameNode terminates after full GC thinking QJM unresponsive if full GC is much longer than timeout | Major | namenode, qjm | Erik Krogen | Erik Krogen | +| [YARN-5195](https://issues.apache.org/jira/browse/YARN-5195) | RM intermittently crashed with NPE while handling APP\_ATTEMPT\_REMOVED event when async-scheduling enabled in CapacityScheduler | Major | resourcemanager | Karam Singh | sandflee | +| [HADOOP-14902](https://issues.apache.org/jira/browse/HADOOP-14902) | LoadGenerator#genFile write close timing is incorrectly calculated | Major | fs | Jason Lowe | Hanisha Koneru | +| [YARN-7084](https://issues.apache.org/jira/browse/YARN-7084) | TestSchedulingMonitor#testRMStarts fails sporadically | Major | . | Jason Lowe | Jason Lowe | +| [HDFS-12578](https://issues.apache.org/jira/browse/HDFS-12578) | TestDeadDatanode#testNonDFSUsedONDeadNodeReReg failing in branch-2.7 | Blocker | test | Xiao Chen | Ajay Kumar | +| [HADOOP-14919](https://issues.apache.org/jira/browse/HADOOP-14919) | BZip2 drops records when reading data in splits | Critical | . | Aki Tanaka | Jason Lowe | +| [HDFS-12832](https://issues.apache.org/jira/browse/HDFS-12832) | INode.getFullPathName may throw ArrayIndexOutOfBoundsException lead to NameNode exit | Critical | namenode | DENG FEI | Konstantin Shvachko | +| [HDFS-12638](https://issues.apache.org/jira/browse/HDFS-12638) | Delete copy-on-truncate block along with the original block, when deleting a file being truncated | Blocker | hdfs | Jiandan Yang | Konstantin Shvachko | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-9153](https://issues.apache.org/jira/browse/HDFS-9153) | Pretty-format the output for DFSIO | Major | . | Kai Zheng | Kai Zheng | +| [HDFS-12596](https://issues.apache.org/jira/browse/HDFS-12596) | Add TestFsck#testFsckCorruptWhenOneReplicaIsCorrupt back to branch-2.7 | Major | test | Xiao Chen | Xiao Chen | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-10711](https://issues.apache.org/jira/browse/HDFS-10711) | Optimize FSPermissionChecker group membership check | Major | hdfs | Daryn Sharp | Daryn Sharp | +| [HADOOP-14881](https://issues.apache.org/jira/browse/HADOOP-14881) | LoadGenerator should use Time.monotonicNow() to measure durations | Major | . | Chetna Chaudhari | Bharat Viswanadham | +| [YARN-5402](https://issues.apache.org/jira/browse/YARN-5402) | Fix NoSuchMethodError in ClusterMetricsInfo | Major | webapp | Weiwei Yang | Weiwei Yang | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-10984](https://issues.apache.org/jira/browse/HDFS-10984) | Expose nntop output as metrics | Major | namenode | Siddharth Wagle | Siddharth Wagle | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/CHANGES.2.7.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/CHANGES.2.7.5.md deleted file mode 100644 index 65d401a6349..00000000000 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/CHANGES.2.7.5.md +++ /dev/null @@ -1,32 +0,0 @@ - - -# Apache Hadoop Changelog - -## Release 2.7.5 - Unreleased (as of 2017-08-28) - - - -### BUG FIXES: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | -| [HADOOP-14702](https://issues.apache.org/jira/browse/HADOOP-14702) | Fix formatting issue and regression caused by conversion from APT to Markdown | Minor | documentation | Doris Gu | Doris Gu | -| [HDFS-12157](https://issues.apache.org/jira/browse/HDFS-12157) | Do fsyncDirectory(..) outside of FSDataset lock | Critical | datanode | Vinayakumar B | Vinayakumar B | - - diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/RELEASENOTES.2.7.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/RELEASENOTES.2.7.5.md index e6b5d818f0b..4cd569330be 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/RELEASENOTES.2.7.5.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.5/RELEASENOTES.2.7.5.md @@ -21,4 +21,16 @@ These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. +--- + +* [HDFS-8829](https://issues.apache.org/jira/browse/HDFS-8829) | *Major* | **Make SO\_RCVBUF and SO\_SNDBUF size configurable for DataTransferProtocol sockets and allow configuring auto-tuning** + +HDFS-8829 introduces two new configuration settings: dfs.datanode.transfer.socket.send.buffer.size and dfs.datanode.transfer.socket.recv.buffer.size. These settings can be used to control the socket send buffer and receive buffer sizes respectively on the DataNode for client-DataNode and DataNode-DataNode connections. The default values of both settings are 128KB for backwards compatibility. For optimum performance it is recommended to set these values to zero to enable the OS networking stack to auto-tune buffer sizes. + + +--- + +* [YARN-6959](https://issues.apache.org/jira/browse/YARN-6959) | *Major* | **RM may allocate wrong AM Container for new attempt** + +ResourceManager will now record ResourceRequests from different attempts into different objects. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.6/CHANGELOG.2.7.6.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.6/CHANGELOG.2.7.6.md new file mode 100644 index 00000000000..8424211baea --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.6/CHANGELOG.2.7.6.md @@ -0,0 +1,92 @@ + + +# Apache Hadoop Changelog + +## Release 2.7.6 - 2018-04-16 + + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-9477](https://issues.apache.org/jira/browse/HADOOP-9477) | Add posixGroups support for LDAP groups mapping service | Major | . | Kai Zheng | Dapeng Sun | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-12472](https://issues.apache.org/jira/browse/HADOOP-12472) | Make GenericTestUtils.assertExceptionContains robust | Minor | test | Steve Loughran | Steve Loughran | +| [HADOOP-12568](https://issues.apache.org/jira/browse/HADOOP-12568) | Update core-default.xml to describe posixGroups support | Minor | documentation | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-13105](https://issues.apache.org/jira/browse/HADOOP-13105) | Support timeouts in LDAP queries in LdapGroupsMapping. | Major | security | Chris Nauroth | Mingliang Liu | +| [HADOOP-13263](https://issues.apache.org/jira/browse/HADOOP-13263) | Reload cached groups in background after expiry | Major | . | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-11003](https://issues.apache.org/jira/browse/HDFS-11003) | Expose "XmitsInProgress" through DataNodeMXBean | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HADOOP-14246](https://issues.apache.org/jira/browse/HADOOP-14246) | Authentication Tokens should use SecureRandom instead of Random and 256 bit secrets | Major | security | Robert Kanter | Robert Kanter | +| [YARN-7590](https://issues.apache.org/jira/browse/YARN-7590) | Improve container-executor validation check | Major | security, yarn | Eric Yang | Eric Yang | +| [HADOOP-15212](https://issues.apache.org/jira/browse/HADOOP-15212) | Add independent secret manager method for logging expired tokens | Major | security | Daryn Sharp | Daryn Sharp | +| [MAPREDUCE-7048](https://issues.apache.org/jira/browse/MAPREDUCE-7048) | Uber AM can crash due to unknown task in statusUpdate | Major | mr-am | Peter Bacsko | Peter Bacsko | +| [HDFS-11187](https://issues.apache.org/jira/browse/HDFS-11187) | Optimize disk access for last partial chunk checksum of Finalized replica | Major | datanode | Wei-Chiu Chuang | Gabor Bota | +| [HADOOP-15279](https://issues.apache.org/jira/browse/HADOOP-15279) | increase maven heap size recommendations | Minor | build, documentation, test | Allen Wittenauer | Allen Wittenauer | +| [HDFS-12884](https://issues.apache.org/jira/browse/HDFS-12884) | BlockUnderConstructionFeature.truncateBlock should be of type BlockInfo | Major | namenode | Konstantin Shvachko | chencan | +| [HADOOP-15345](https://issues.apache.org/jira/browse/HADOOP-15345) | Backport HADOOP-12185 to branch-2.7: NetworkTopology is not efficient adding/getting/removing nodes | Major | . | He Xiaoqiao | He Xiaoqiao | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-3425](https://issues.apache.org/jira/browse/YARN-3425) | NPE from RMNodeLabelsManager.serviceStop when NodeLabelsManager.serviceInit failed | Minor | resourcemanager | Bibin A Chundatt | Bibin A Chundatt | +| [HADOOP-12001](https://issues.apache.org/jira/browse/HADOOP-12001) | Limiting LDAP search conflicts with posixGroup addition | Blocker | security | Patrick White | Patrick White | +| [YARN-4167](https://issues.apache.org/jira/browse/YARN-4167) | NPE on RMActiveServices#serviceStop when store is null | Minor | . | Bibin A Chundatt | Bibin A Chundatt | +| [HADOOP-12181](https://issues.apache.org/jira/browse/HADOOP-12181) | Fix intermittent test failure of TestZKSignerSecretProvider | Minor | . | Masatake Iwasaki | Masatake Iwasaki | +| [HDFS-7959](https://issues.apache.org/jira/browse/HDFS-7959) | WebHdfs logging is missing on Datanode | Critical | . | Kihwal Lee | Kihwal Lee | +| [HDFS-4210](https://issues.apache.org/jira/browse/HDFS-4210) | Throw helpful exception when DNS entry for JournalNode cannot be resolved | Trivial | ha, journal-node, namenode | Damien Hardy | John Zhuge | +| [HADOOP-13375](https://issues.apache.org/jira/browse/HADOOP-13375) | o.a.h.security.TestGroupsCaching.testBackgroundRefreshCounters seems flaky | Major | security, test | Mingliang Liu | Weiwei Yang | +| [HADOOP-12611](https://issues.apache.org/jira/browse/HADOOP-12611) | TestZKSignerSecretProvider#testMultipleInit occasionally fail | Major | . | Wei-Chiu Chuang | Eric Badger | +| [HADOOP-13508](https://issues.apache.org/jira/browse/HADOOP-13508) | FsPermission string constructor does not recognize sticky bit | Major | . | Atul Sikaria | Atul Sikaria | +| [HDFS-12299](https://issues.apache.org/jira/browse/HDFS-12299) | Race Between update pipeline and DN Re-Registration | Critical | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HDFS-12371](https://issues.apache.org/jira/browse/HDFS-12371) | "BlockVerificationFailures" and "BlocksVerified" show up as 0 in Datanode JMX | Major | metrics | Sai Nukavarapu | Hanisha Koneru | +| [MAPREDUCE-5124](https://issues.apache.org/jira/browse/MAPREDUCE-5124) | AM lacks flow control for task events | Major | mr-am | Jason Lowe | Peter Bacsko | +| [HDFS-12881](https://issues.apache.org/jira/browse/HDFS-12881) | Output streams closed with IOUtils suppressing write errors | Major | . | Jason Lowe | Ajay Kumar | +| [YARN-7661](https://issues.apache.org/jira/browse/YARN-7661) | NodeManager metrics return wrong value after update node resource | Major | . | Yang Wang | Yang Wang | +| [HDFS-12347](https://issues.apache.org/jira/browse/HDFS-12347) | TestBalancerRPCDelay#testBalancerRPCDelay fails very frequently | Critical | test | Xiao Chen | Bharat Viswanadham | +| [YARN-6632](https://issues.apache.org/jira/browse/YARN-6632) | Backport YARN-3425 to branch 2.7 | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-15143](https://issues.apache.org/jira/browse/HADOOP-15143) | NPE due to Invalid KerberosTicket in UGI | Major | . | Jitendra Nath Pandey | Mukul Kumar Singh | +| [MAPREDUCE-7028](https://issues.apache.org/jira/browse/MAPREDUCE-7028) | Concurrent task progress updates causing NPE in Application Master | Blocker | mr-am | Gergo Repas | Gergo Repas | +| [HADOOP-12751](https://issues.apache.org/jira/browse/HADOOP-12751) | While using kerberos Hadoop incorrectly assumes names with '@' to be non-simple | Critical | security | Bolke de Bruin | Bolke de Bruin | +| [MAPREDUCE-7020](https://issues.apache.org/jira/browse/MAPREDUCE-7020) | Task timeout in uber mode can crash AM | Major | mr-am | Akira Ajisaka | Peter Bacsko | +| [HDFS-13126](https://issues.apache.org/jira/browse/HDFS-13126) | Backport [HDFS-7959] to branch-2.7 to re-enable HTTP request logging for WebHDFS | Major | datanode, webhdfs | Erik Krogen | Erik Krogen | +| [HDFS-13120](https://issues.apache.org/jira/browse/HDFS-13120) | Snapshot diff could be corrupted after concat | Major | namenode, snapshots | Xiaoyu Yao | Xiaoyu Yao | +| [HDFS-10453](https://issues.apache.org/jira/browse/HDFS-10453) | ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster. | Major | namenode | He Xiaoqiao | He Xiaoqiao | +| [MAPREDUCE-7052](https://issues.apache.org/jira/browse/MAPREDUCE-7052) | TestFixedLengthInputFormat#testFormatCompressedIn is flaky | Major | client, test | Peter Bacsko | Peter Bacsko | +| [HDFS-13112](https://issues.apache.org/jira/browse/HDFS-13112) | Token expiration edits may cause log corruption or deadlock | Critical | namenode | Daryn Sharp | Daryn Sharp | +| [HADOOP-15206](https://issues.apache.org/jira/browse/HADOOP-15206) | BZip2 drops and duplicates records when input split size is small | Major | . | Aki Tanaka | Aki Tanaka | +| [HADOOP-15283](https://issues.apache.org/jira/browse/HADOOP-15283) | Upgrade from findbugs 3.0.1 to spotbugs 3.1.2 in branch-2 to fix docker image build | Major | . | Xiao Chen | Akira Ajisaka | +| [HDFS-13195](https://issues.apache.org/jira/browse/HDFS-13195) | DataNode conf page cannot display the current value after reconfig | Minor | datanode | maobaolong | maobaolong | +| [HADOOP-12862](https://issues.apache.org/jira/browse/HADOOP-12862) | LDAP Group Mapping over SSL can not specify trust store | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-7249](https://issues.apache.org/jira/browse/YARN-7249) | Fix CapacityScheduler NPE issue when a container preempted while the node is being removed | Blocker | . | Wangda Tan | Wangda Tan | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-6633](https://issues.apache.org/jira/browse/YARN-6633) | Backport YARN-4167 to branch 2.7 | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-15177](https://issues.apache.org/jira/browse/HADOOP-15177) | Update the release year to 2018 | Blocker | build | Akira Ajisaka | Bharat Viswanadham | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.6/RELEASENOTES.2.7.6.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.6/RELEASENOTES.2.7.6.md new file mode 100644 index 00000000000..93b91391736 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.6/RELEASENOTES.2.7.6.md @@ -0,0 +1,42 @@ + + +# Apache Hadoop 2.7.6 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-9477](https://issues.apache.org/jira/browse/HADOOP-9477) | *Major* | **Add posixGroups support for LDAP groups mapping service** + +Add posixGroups support for LDAP groups mapping service. The change in LDAPGroupMapping is compatible with previous scenario. In LDAP, the group mapping between {{posixAccount}} and {{posixGroup}} is different from the general LDAPGroupMapping, one of the differences is the {{"memberUid"}} will be used to mapping {{posixAccount}} and {{posixGroup}}. The feature will handle the mapping in internal when configuration {{hadoop.security.group.mapping.ldap.search.filter.user}} is set as "posixAccount" and {{hadoop.security.group.mapping.ldap.search.filter.group}} is "posixGroup". + + +--- + +* [HADOOP-13105](https://issues.apache.org/jira/browse/HADOOP-13105) | *Major* | **Support timeouts in LDAP queries in LdapGroupsMapping.** + +This patch adds two new config keys for supporting timeouts in LDAP query operations. The property "hadoop.security.group.mapping.ldap.connection.timeout.ms" is the connection timeout (in milliseconds), within which period if the LDAP provider doesn't establish a connection, it will abort the connect attempt. The property "hadoop.security.group.mapping.ldap.read.timeout.ms" is the read timeout (in milliseconds), within which period if the LDAP provider doesn't get a LDAP response, it will abort the read attempt. + + +--- + +* [HADOOP-13263](https://issues.apache.org/jira/browse/HADOOP-13263) | *Major* | **Reload cached groups in background after expiry** + +hadoop.security.groups.cache.background.reload can be set to true to enable background reload of expired groups cache entries. This setting can improve the performance of services that use Groups.java (e.g. the NameNode) when group lookups are slow. The setting is disabled by default. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.7/CHANGELOG.2.7.7.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.7/CHANGELOG.2.7.7.md new file mode 100644 index 00000000000..daa1cd6290e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.7/CHANGELOG.2.7.7.md @@ -0,0 +1,47 @@ + + +# Apache Hadoop Changelog + +## Release 2.7.7 - 2018-06-02 + + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15486](https://issues.apache.org/jira/browse/HADOOP-15486) | Make NetworkTopology#netLock fair | Major | net | Nanda kumar | Nanda kumar | +| [HDFS-13602](https://issues.apache.org/jira/browse/HDFS-13602) | Add checkOperation(WRITE) checks in FSNamesystem | Major | ha, namenode | Erik Krogen | Chao Sun | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-12156](https://issues.apache.org/jira/browse/HDFS-12156) | TestFSImage fails without -Pnative | Major | test | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-14970](https://issues.apache.org/jira/browse/HADOOP-14970) | MiniHadoopClusterManager doesn't respect lack of format option | Minor | . | Erik Krogen | Erik Krogen | +| [HDFS-13486](https://issues.apache.org/jira/browse/HDFS-13486) | Backport HDFS-11817 (A faulty node can cause a lease leak and NPE on accessing data) to branch-2.7 | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-15473](https://issues.apache.org/jira/browse/HADOOP-15473) | Configure serialFilter in KeyProvider to avoid UnrecoverableKeyException caused by JDK-8189997 | Critical | kms | Gabor Bota | Gabor Bota | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15509](https://issues.apache.org/jira/browse/HADOOP-15509) | Release Hadoop 2.7.7 | Major | build | Steve Loughran | Steve Loughran | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.7/RELEASENOTES.2.7.7.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.7/RELEASENOTES.2.7.7.md new file mode 100644 index 00000000000..a9cfd25d20f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.7/RELEASENOTES.2.7.7.md @@ -0,0 +1,21 @@ + + +# Apache Hadoop 2.7.7 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.8/CHANGELOG.2.7.8.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.8/CHANGELOG.2.7.8.md new file mode 100644 index 00000000000..1e0953f0e8f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.8/CHANGELOG.2.7.8.md @@ -0,0 +1,31 @@ + + +# Apache Hadoop Changelog + +## Release 2.7.8 - Unreleased (as of 2018-09-02) + + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-14891](https://issues.apache.org/jira/browse/HADOOP-14891) | Remove references to Guava Objects.toStringHelper | Major | . | Jonathan Eagles | Jonathan Eagles | +| [HADOOP-15674](https://issues.apache.org/jira/browse/HADOOP-15674) | Test failure TestSSLHttpServer.testExcludedCiphers with TLS\_ECDHE\_RSA\_WITH\_AES\_128\_CBC\_SHA256 cipher suite | Major | common | Gabor Bota | Szilard Nemeth | +| [YARN-8640](https://issues.apache.org/jira/browse/YARN-8640) | Restore previous state in container-executor after failure | Major | . | Jim Brennan | Jim Brennan | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.8/RELEASENOTES.2.7.8.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.8/RELEASENOTES.2.7.8.md new file mode 100644 index 00000000000..3da2cc704d4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.7.8/RELEASENOTES.2.7.8.md @@ -0,0 +1,21 @@ + + +# Apache Hadoop 2.7.8 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/CHANGES.2.8.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/CHANGELOG.2.8.0.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/CHANGES.2.8.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/CHANGELOG.2.8.0.md index 803257f092b..7b1c8939a98 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/CHANGES.2.8.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/CHANGELOG.2.8.0.md @@ -113,6 +113,7 @@ | [HADOOP-14049](https://issues.apache.org/jira/browse/HADOOP-14049) | Honour AclBit flag associated to file/folder permission for Azure datalake account | Major | fs/adl | Vishwajeet Dusane | Vishwajeet Dusane | | [HADOOP-14048](https://issues.apache.org/jira/browse/HADOOP-14048) | REDO operation of WASB#AtomicRename should create placeholder blob for destination folder | Critical | fs/azure | NITIN VERMA | NITIN VERMA | | [MAPREDUCE-6304](https://issues.apache.org/jira/browse/MAPREDUCE-6304) | Specifying node labels when submitting MR jobs | Major | job submission | Jian Fang | Naganarasimha G R | +| [YARN-1963](https://issues.apache.org/jira/browse/YARN-1963) | Support priorities across applications within the same queue | Major | api, resourcemanager | Arun C Murthy | Sunil Govindan | ### IMPROVEMENTS: @@ -304,7 +305,7 @@ | [MAPREDUCE-6408](https://issues.apache.org/jira/browse/MAPREDUCE-6408) | Queue name and user name should be printed on the job page | Major | applicationmaster | Siqi Li | Siqi Li | | [HDFS-8639](https://issues.apache.org/jira/browse/HDFS-8639) | Option for HTTP port of NameNode by MiniDFSClusterManager | Minor | test | Kai Sasaki | Kai Sasaki | | [YARN-3360](https://issues.apache.org/jira/browse/YARN-3360) | Add JMX metrics to TimelineDataManager | Major | timelineserver | Jason Lowe | Jason Lowe | -| [HADOOP-12049](https://issues.apache.org/jira/browse/HADOOP-12049) | Control http authentication cookie persistence via configuration | Major | security | Benoy Antony | Huizhi Lu | +| [HADOOP-12049](https://issues.apache.org/jira/browse/HADOOP-12049) | Control http authentication cookie persistence via configuration | Major | security | Benoy Antony | H Lu | | [HDFS-8462](https://issues.apache.org/jira/browse/HDFS-8462) | Implement GETXATTRS and LISTXATTRS operations for WebImageViewer | Major | . | Akira Ajisaka | Jagadesh Kiran N | | [HDFS-8640](https://issues.apache.org/jira/browse/HDFS-8640) | Make reserved RBW space visible through JMX | Major | . | Kanaka Kumar Avvaru | Kanaka Kumar Avvaru | | [HDFS-8546](https://issues.apache.org/jira/browse/HDFS-8546) | Use try with resources in DataStorage and Storage | Minor | datanode | Andrew Wang | Andrew Wang | @@ -371,7 +372,7 @@ | [HDFS-6407](https://issues.apache.org/jira/browse/HDFS-6407) | Add sorting and pagination in the datanode tab of the NN Web UI | Critical | namenode | Nathan Roberts | Haohui Mai | | [HDFS-8880](https://issues.apache.org/jira/browse/HDFS-8880) | NameNode metrics logging | Major | namenode | Arpit Agarwal | Arpit Agarwal | | [YARN-4057](https://issues.apache.org/jira/browse/YARN-4057) | If ContainersMonitor is not enabled, only print related log info one time | Minor | nodemanager | Jun Gong | Jun Gong | -| [HADOOP-12050](https://issues.apache.org/jira/browse/HADOOP-12050) | Enable MaxInactiveInterval for hadoop http auth token | Major | security | Benoy Antony | Huizhi Lu | +| [HADOOP-12050](https://issues.apache.org/jira/browse/HADOOP-12050) | Enable MaxInactiveInterval for hadoop http auth token | Major | security | Benoy Antony | H Lu | | [HDFS-8435](https://issues.apache.org/jira/browse/HDFS-8435) | Support CreateFlag in WebHdfs | Major | webhdfs | Vinoth Sathappan | Jakob Homan | | [HDFS-8911](https://issues.apache.org/jira/browse/HDFS-8911) | NameNode Metric : Add Editlog counters as a JMX metric | Major | namenode | Anu Engineer | Anu Engineer | | [HDFS-8917](https://issues.apache.org/jira/browse/HDFS-8917) | Cleanup BlockInfoUnderConstruction from comments and tests | Minor | namenode | Zhe Zhang | Zhe Zhang | @@ -380,7 +381,6 @@ | [HDFS-8924](https://issues.apache.org/jira/browse/HDFS-8924) | Add pluggable interface for reading replicas in DFSClient | Major | hdfs-client | Colin P. McCabe | Colin P. McCabe | | [HDFS-8928](https://issues.apache.org/jira/browse/HDFS-8928) | Improvements for BlockUnderConstructionFeature: ReplicaUnderConstruction as a separate class and replicas as an array | Minor | namenode | Zhe Zhang | Jing Zhao | | [HDFS-2390](https://issues.apache.org/jira/browse/HDFS-2390) | dfsadmin -setBalancerBandwidth doesnot validate -ve value | Minor | balancer & mover | Rajit Saha | Gautam Gopalakrishnan | -| [HDFS-8865](https://issues.apache.org/jira/browse/HDFS-8865) | Improve quota initialization performance | Major | . | Kihwal Lee | Kihwal Lee | | [HDFS-8983](https://issues.apache.org/jira/browse/HDFS-8983) | NameNode support for protected directories | Major | namenode | Arpit Agarwal | Arpit Agarwal | | [HDFS-8946](https://issues.apache.org/jira/browse/HDFS-8946) | Improve choosing datanode storage for block placement | Major | namenode | Yi Liu | Yi Liu | | [HDFS-8965](https://issues.apache.org/jira/browse/HDFS-8965) | Harden edit log reading code against out of memory errors | Major | . | Colin P. McCabe | Colin P. McCabe | @@ -509,7 +509,7 @@ | [HDFS-9024](https://issues.apache.org/jira/browse/HDFS-9024) | Deprecate the TotalFiles metric | Major | . | Akira Ajisaka | Akira Ajisaka | | [HDFS-7988](https://issues.apache.org/jira/browse/HDFS-7988) | Replace usage of ExactSizeInputStream with LimitInputStream. | Minor | . | Chris Nauroth | Walter Su | | [HDFS-9314](https://issues.apache.org/jira/browse/HDFS-9314) | Improve BlockPlacementPolicyDefault's picking of excess replicas | Major | . | Ming Ma | Xiao Chen | -| [MAPREDUCE-5870](https://issues.apache.org/jira/browse/MAPREDUCE-5870) | Support for passing Job priority through Application Submission Context in Mapreduce Side | Major | client | Sunil G | Sunil G | +| [MAPREDUCE-5870](https://issues.apache.org/jira/browse/MAPREDUCE-5870) | Support for passing Job priority through Application Submission Context in Mapreduce Side | Major | client | Sunil Govindan | Sunil Govindan | | [HDFS-9434](https://issues.apache.org/jira/browse/HDFS-9434) | Recommission a datanode with 500k blocks may pause NN for 30 seconds | Major | namenode | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | | [YARN-4132](https://issues.apache.org/jira/browse/YARN-4132) | Separate configs for nodemanager to resourcemanager connection timeout and retries | Major | nodemanager | Chang Li | Chang Li | | [HDFS-8512](https://issues.apache.org/jira/browse/HDFS-8512) | WebHDFS : GETFILESTATUS should return LocatedBlock with storage type info | Major | webhdfs | Sumana Sathish | Xiaoyu Yao | @@ -533,7 +533,7 @@ | [HDFS-9552](https://issues.apache.org/jira/browse/HDFS-9552) | Document types of permission checks performed for HDFS operations. | Major | documentation | Chris Nauroth | Chris Nauroth | | [HADOOP-12570](https://issues.apache.org/jira/browse/HADOOP-12570) | HDFS Secure Mode Documentation updates | Major | documentation | Arpit Agarwal | Arpit Agarwal | | [YARN-4480](https://issues.apache.org/jira/browse/YARN-4480) | Clean up some inappropriate imports | Major | . | Kai Zheng | Kai Zheng | -| [YARN-4290](https://issues.apache.org/jira/browse/YARN-4290) | Add -showDetails option to YARN Nodes CLI to print all nodes reports information | Major | client | Wangda Tan | Sunil G | +| [YARN-4290](https://issues.apache.org/jira/browse/YARN-4290) | Add -showDetails option to YARN Nodes CLI to print all nodes reports information | Major | client | Wangda Tan | Sunil Govindan | | [YARN-4400](https://issues.apache.org/jira/browse/YARN-4400) | AsyncDispatcher.waitForDrained should be final | Trivial | yarn | Daniel Templeton | Daniel Templeton | | [MAPREDUCE-6584](https://issues.apache.org/jira/browse/MAPREDUCE-6584) | Remove trailing whitespaces from mapred-default.xml | Major | documentation | Akira Ajisaka | Akira Ajisaka | | [HADOOP-12686](https://issues.apache.org/jira/browse/HADOOP-12686) | Update FileSystemShell documentation to mention the meaning of each columns of fs -du | Minor | documentation, fs | Daisuke Kobayashi | Daisuke Kobayashi | @@ -546,7 +546,7 @@ | [HDFS-9415](https://issues.apache.org/jira/browse/HDFS-9415) | Document dfs.cluster.administrators and dfs.permissions.superusergroup | Major | documentation | Arpit Agarwal | Xiaobing Zhou | | [HDFS-6054](https://issues.apache.org/jira/browse/HDFS-6054) | MiniQJMHACluster should not use static port to avoid binding failure in unit test | Major | test | Brandon Li | Yongjun Zhang | | [YARN-4492](https://issues.apache.org/jira/browse/YARN-4492) | Add documentation for preemption supported in Capacity scheduler | Minor | capacity scheduler | Naganarasimha G R | Naganarasimha G R | -| [YARN-4371](https://issues.apache.org/jira/browse/YARN-4371) | "yarn application -kill" should take multiple application ids | Major | . | Tsuyoshi Ozawa | Sunil G | +| [YARN-4371](https://issues.apache.org/jira/browse/YARN-4371) | "yarn application -kill" should take multiple application ids | Major | . | Tsuyoshi Ozawa | Sunil Govindan | | [HDFS-9653](https://issues.apache.org/jira/browse/HDFS-9653) | Expose the number of blocks pending deletion through dfsadmin report command | Major | hdfs-client, tools | Weiwei Yang | Weiwei Yang | | [HADOOP-12731](https://issues.apache.org/jira/browse/HADOOP-12731) | Remove useless boxing/unboxing code | Minor | performance | Kousuke Saruta | Kousuke Saruta | | [HDFS-9654](https://issues.apache.org/jira/browse/HDFS-9654) | Code refactoring for HDFS-8578 | Minor | datanode | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | @@ -639,7 +639,7 @@ | [HADOOP-12963](https://issues.apache.org/jira/browse/HADOOP-12963) | Allow using path style addressing for accessing the s3 endpoint | Minor | fs/s3 | Andrew Baptist | Stephen Montgomery | | [HDFS-10280](https://issues.apache.org/jira/browse/HDFS-10280) | Document new dfsadmin command -evictWriters | Minor | documentation | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-10292](https://issues.apache.org/jira/browse/HDFS-10292) | Add block id when client got Unable to close file exception | Minor | . | Brahma Reddy Battula | Brahma Reddy Battula | -| [HDFS-9412](https://issues.apache.org/jira/browse/HDFS-9412) | getBlocks occupies FSLock and takes too long to complete | Major | . | He Tianyi | He Tianyi | +| [HDFS-9412](https://issues.apache.org/jira/browse/HDFS-9412) | getBlocks occupies FSLock and takes too long to complete | Major | balancer & mover, namenode | He Tianyi | He Tianyi | | [HDFS-10302](https://issues.apache.org/jira/browse/HDFS-10302) | BlockPlacementPolicyDefault should use default replication considerload value | Trivial | . | Yiqun Lin | Yiqun Lin | | [HDFS-10264](https://issues.apache.org/jira/browse/HDFS-10264) | Logging improvements in FSImageFormatProtobuf.Saver | Major | namenode | Konstantin Shvachko | Xiaobing Zhou | | [HADOOP-12985](https://issues.apache.org/jira/browse/HADOOP-12985) | Support MetricsSource interface for DecayRpcScheduler Metrics | Major | . | Xiaoyu Yao | Xiaoyu Yao | @@ -705,7 +705,7 @@ | [HDFS-10676](https://issues.apache.org/jira/browse/HDFS-10676) | Add namenode metric to measure time spent in generating EDEKs | Major | namenode | Hanisha Koneru | Hanisha Koneru | | [MAPREDUCE-6746](https://issues.apache.org/jira/browse/MAPREDUCE-6746) | Replace org.apache.commons.io.Charsets with java.nio.charset.StandardCharsets | Minor | . | Vincent Poon | Vincent Poon | | [HDFS-10703](https://issues.apache.org/jira/browse/HDFS-10703) | HA NameNode Web UI should show last checkpoint time | Minor | ui | John Zhuge | John Zhuge | -| [MAPREDUCE-6729](https://issues.apache.org/jira/browse/MAPREDUCE-6729) | Accurately compute the test execute time in DFSIO | Minor | benchmarks, performance, test | mingleizhang | mingleizhang | +| [MAPREDUCE-6729](https://issues.apache.org/jira/browse/MAPREDUCE-6729) | Accurately compute the test execute time in DFSIO | Minor | benchmarks, performance, test | zhangminglei | zhangminglei | | [HADOOP-13444](https://issues.apache.org/jira/browse/HADOOP-13444) | Replace org.apache.commons.io.Charsets with java.nio.charset.StandardCharsets | Minor | . | Vincent Poon | Vincent Poon | | [YARN-5456](https://issues.apache.org/jira/browse/YARN-5456) | container-executor support for FreeBSD, NetBSD, and others if conf path is absolute | Major | nodemanager, security | Allen Wittenauer | Allen Wittenauer | | [MAPREDUCE-6730](https://issues.apache.org/jira/browse/MAPREDUCE-6730) | Use StandardCharsets instead of String overload in TextOutputFormat | Minor | . | Sahil Kang | Sahil Kang | @@ -771,6 +771,7 @@ | [HADOOP-13669](https://issues.apache.org/jira/browse/HADOOP-13669) | KMS Server should log exceptions before throwing | Major | kms | Xiao Chen | Suraj Acharya | | [HADOOP-13502](https://issues.apache.org/jira/browse/HADOOP-13502) | Split fs.contract.is-blobstore flag into more descriptive flags for use by contract tests. | Minor | test | Chris Nauroth | Chris Nauroth | | [HADOOP-13017](https://issues.apache.org/jira/browse/HADOOP-13017) | Implementations of InputStream.read(buffer, offset, bytes) to exit 0 if bytes==0 | Major | fs, io | Steve Loughran | Steve Loughran | +| [HDFS-11069](https://issues.apache.org/jira/browse/HDFS-11069) | Tighten the authorization of datanode RPC | Major | datanode, security | Kihwal Lee | Kihwal Lee | | [HDFS-11055](https://issues.apache.org/jira/browse/HDFS-11055) | Update default-log4j.properties for httpfs to imporve test logging | Major | httpfs, test | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-4963](https://issues.apache.org/jira/browse/YARN-4963) | capacity scheduler: Make number of OFF\_SWITCH assignments per heartbeat configurable | Major | capacityscheduler | Nathan Roberts | Nathan Roberts | | [HDFS-11047](https://issues.apache.org/jira/browse/HDFS-11047) | Remove deep copies of FinalizedReplica to alleviate heap consumption on DataNode | Major | datanode | Xiaobing Zhou | Xiaobing Zhou | @@ -817,6 +818,8 @@ | [HADOOP-11694](https://issues.apache.org/jira/browse/HADOOP-11694) | Über-jira: S3a phase II: robustness, scale and performance | Major | fs/s3 | Steve Loughran | Steve Loughran | | [HDFS-8873](https://issues.apache.org/jira/browse/HDFS-8873) | Allow the directoryScanner to be rate-limited | Major | datanode | Nathan Roberts | Daniel Templeton | | [HADOOP-12825](https://issues.apache.org/jira/browse/HADOOP-12825) | Log slow name resolutions | Major | . | Sidharta Seethana | Sidharta Seethana | +| [HDFS-8865](https://issues.apache.org/jira/browse/HDFS-8865) | Improve quota initialization performance | Major | . | Kihwal Lee | Kihwal Lee | +| [HADOOP-12806](https://issues.apache.org/jira/browse/HADOOP-12806) | Hadoop fs s3a lib not working with temporary credentials in AWS Lambda | Major | fs/s3 | Nikolaos Tsipas | | ### BUG FIXES: @@ -1012,7 +1015,7 @@ | [HADOOP-11922](https://issues.apache.org/jira/browse/HADOOP-11922) | Misspelling of threshold in log4j.properties for tests in hadoop-tools | Minor | . | Brahma Reddy Battula | Gabor Liptak | | [HDFS-8257](https://issues.apache.org/jira/browse/HDFS-8257) | Namenode rollingUpgrade option is incorrect in document | Major | documentation | J.Andreina | J.Andreina | | [HDFS-8067](https://issues.apache.org/jira/browse/HDFS-8067) | haadmin prints out stale help messages | Minor | hdfs-client | Ajith S | Ajith S | -| [YARN-3592](https://issues.apache.org/jira/browse/YARN-3592) | Fix typos in RMNodeLabelsManager | Trivial | resourcemanager | Junping Du | Sunil G | +| [YARN-3592](https://issues.apache.org/jira/browse/YARN-3592) | Fix typos in RMNodeLabelsManager | Trivial | resourcemanager | Junping Du | Sunil Govindan | | [HDFS-8174](https://issues.apache.org/jira/browse/HDFS-8174) | Update replication count to live rep count in fsck report | Minor | . | J.Andreina | J.Andreina | | [HDFS-6291](https://issues.apache.org/jira/browse/HDFS-6291) | FSImage may be left unclosed in BootstrapStandby#doRun() | Minor | ha | Ted Yu | Sanghyun Yun | | [YARN-3358](https://issues.apache.org/jira/browse/YARN-3358) | Audit log not present while refreshing Service ACLs | Minor | resourcemanager | Varun Saxena | Varun Saxena | @@ -1040,7 +1043,7 @@ | [MAPREDUCE-2094](https://issues.apache.org/jira/browse/MAPREDUCE-2094) | LineRecordReader should not seek into non-splittable, compressed streams. | Major | task | Niels Basjes | Niels Basjes | | [HDFS-8245](https://issues.apache.org/jira/browse/HDFS-8245) | Standby namenode doesn't process DELETED\_BLOCK if the add block request is in edit log. | Major | . | Rushabh S Shah | Rushabh S Shah | | [YARN-3018](https://issues.apache.org/jira/browse/YARN-3018) | Unify the default value for yarn.scheduler.capacity.node-locality-delay in code and default xml file | Trivial | capacityscheduler | nijel | nijel | -| [HDFS-8326](https://issues.apache.org/jira/browse/HDFS-8326) | Documentation about when checkpoints are run is out of date | Major | documentation | Misty Stanley-Jones | Misty Stanley-Jones | +| [HDFS-8326](https://issues.apache.org/jira/browse/HDFS-8326) | Documentation about when checkpoints are run is out of date | Major | documentation | Misty Linville | Misty Linville | | [YARN-3604](https://issues.apache.org/jira/browse/YARN-3604) | removeApplication in ZKRMStateStore should also disable watch. | Minor | resourcemanager | zhihai xu | zhihai xu | | [YARN-3476](https://issues.apache.org/jira/browse/YARN-3476) | Nodemanager can fail to delete local logs if log aggregation fails | Major | log-aggregation, nodemanager | Jason Lowe | Rohith Sharma K S | | [YARN-3473](https://issues.apache.org/jira/browse/YARN-3473) | Fix RM Web UI configuration for some properties | Minor | resourcemanager | Ray Chiang | Ray Chiang | @@ -1128,7 +1131,7 @@ | [HDFS-8470](https://issues.apache.org/jira/browse/HDFS-8470) | fsimage loading progress should update inode, delegation token and cache pool count. | Minor | namenode | tongshiquan | Surendra Singh Lilhore | | [HDFS-8523](https://issues.apache.org/jira/browse/HDFS-8523) | Remove usage information on unsupported operation "fsck -showprogress" from branch-2 | Major | documentation | J.Andreina | J.Andreina | | [HDFS-3716](https://issues.apache.org/jira/browse/HDFS-3716) | Purger should remove stale fsimage ckpt files | Minor | namenode | suja s | J.Andreina | -| [YARN-3751](https://issues.apache.org/jira/browse/YARN-3751) | TestAHSWebServices fails after YARN-3467 | Major | . | Zhijie Shen | Sunil G | +| [YARN-3751](https://issues.apache.org/jira/browse/YARN-3751) | TestAHSWebServices fails after YARN-3467 | Major | . | Zhijie Shen | Sunil Govindan | | [YARN-3585](https://issues.apache.org/jira/browse/YARN-3585) | NodeManager cannot exit on SHUTDOWN event triggered and NM recovery is enabled | Critical | . | Peng Zhang | Rohith Sharma K S | | [MAPREDUCE-6374](https://issues.apache.org/jira/browse/MAPREDUCE-6374) | Distributed Cache File visibility should check permission of full path | Major | . | Chang Li | Chang Li | | [YARN-3762](https://issues.apache.org/jira/browse/YARN-3762) | FairScheduler: CME on FSParentQueue#getQueueUserAclInfo | Critical | fairscheduler | Karthik Kambatla | Karthik Kambatla | @@ -1161,7 +1164,7 @@ | [MAPREDUCE-6350](https://issues.apache.org/jira/browse/MAPREDUCE-6350) | JobHistory doesn't support fully-functional search | Critical | jobhistoryserver | Siqi Li | Siqi Li | | [MAPREDUCE-6389](https://issues.apache.org/jira/browse/MAPREDUCE-6389) | Fix BaileyBorweinPlouffe CLI usage message | Trivial | . | Brahma Reddy Battula | Brahma Reddy Battula | | [HADOOP-12052](https://issues.apache.org/jira/browse/HADOOP-12052) | IPC client downgrades all exception types to IOE, breaks callers trying to use them | Critical | . | Steve Loughran | Brahma Reddy Battula | -| [YARN-3785](https://issues.apache.org/jira/browse/YARN-3785) | Support for Resource as an argument during submitApp call in MockRM test class | Minor | resourcemanager | Sunil G | Sunil G | +| [YARN-3785](https://issues.apache.org/jira/browse/YARN-3785) | Support for Resource as an argument during submitApp call in MockRM test class | Minor | resourcemanager | Sunil Govindan | Sunil Govindan | | [HADOOP-12074](https://issues.apache.org/jira/browse/HADOOP-12074) | in Shell.java#runCommand() rethrow InterruptedException as InterruptedIOException | Minor | . | Lavkesh Lahngir | Lavkesh Lahngir | | [HDFS-8566](https://issues.apache.org/jira/browse/HDFS-8566) | HDFS documentation about debug commands wrongly identifies them as "hdfs dfs" commands | Major | documentation | Surendra Singh Lilhore | Surendra Singh Lilhore | | [HDFS-8583](https://issues.apache.org/jira/browse/HDFS-8583) | Document that NFS gateway does not work with rpcbind on SLES 11 | Major | documentation | Arpit Agarwal | Arpit Agarwal | @@ -1266,7 +1269,7 @@ | [HDFS-8749](https://issues.apache.org/jira/browse/HDFS-8749) | Fix findbugs warning in BlockManager.java | Minor | . | Akira Ajisaka | Brahma Reddy Battula | | [HDFS-2956](https://issues.apache.org/jira/browse/HDFS-2956) | calling fetchdt without a --renewer argument throws NPE | Major | security | Todd Lipcon | Vinayakumar B | | [HDFS-8751](https://issues.apache.org/jira/browse/HDFS-8751) | Remove setBlocks API from INodeFile and misc code cleanup | Major | namenode | Zhe Zhang | Zhe Zhang | -| [YARN-3849](https://issues.apache.org/jira/browse/YARN-3849) | Too much of preemption activity causing continuos killing of containers across queues | Critical | capacityscheduler | Sunil G | Sunil G | +| [YARN-3849](https://issues.apache.org/jira/browse/YARN-3849) | Too much of preemption activity causing continuos killing of containers across queues | Critical | capacityscheduler | Sunil Govindan | Sunil Govindan | | [YARN-3917](https://issues.apache.org/jira/browse/YARN-3917) | getResourceCalculatorPlugin for the default should intercept all exceptions | Major | . | Gera Shegalov | Gera Shegalov | | [YARN-3894](https://issues.apache.org/jira/browse/YARN-3894) | RM startup should fail for wrong CS xml NodeLabel capacity configuration | Critical | capacityscheduler | Bibin A Chundatt | Bibin A Chundatt | | [MAPREDUCE-6421](https://issues.apache.org/jira/browse/MAPREDUCE-6421) | Fix findbugs warning in RMContainerAllocator.reduceNodeLabelExpression | Major | . | Ray Chiang | Brahma Reddy Battula | @@ -1297,7 +1300,7 @@ | [YARN-3954](https://issues.apache.org/jira/browse/YARN-3954) | TestYarnConfigurationFields#testCompareConfigurationClassAgainstXml fails in trunk | Major | . | Varun Saxena | Varun Saxena | | [YARN-2019](https://issues.apache.org/jira/browse/YARN-2019) | Retrospect on decision of making RM crashed if any exception throw in ZKRMStateStore | Critical | . | Junping Du | Jian He | | [HDFS-8797](https://issues.apache.org/jira/browse/HDFS-8797) | WebHdfsFileSystem creates too many connections for pread | Major | webhdfs | Jing Zhao | Jing Zhao | -| [YARN-3941](https://issues.apache.org/jira/browse/YARN-3941) | Proportional Preemption policy should try to avoid sending duplicate PREEMPT\_CONTAINER event to scheduler | Major | capacityscheduler | Sunil G | Sunil G | +| [YARN-3941](https://issues.apache.org/jira/browse/YARN-3941) | Proportional Preemption policy should try to avoid sending duplicate PREEMPT\_CONTAINER event to scheduler | Major | capacityscheduler | Sunil Govindan | Sunil Govindan | | [YARN-3900](https://issues.apache.org/jira/browse/YARN-3900) | Protobuf layout of yarn\_security\_token causes errors in other protos that include it | Major | . | Anubhav Dhoot | Anubhav Dhoot | | [YARN-3845](https://issues.apache.org/jira/browse/YARN-3845) | Scheduler page does not render RGBA color combinations in IE11 | Minor | . | Jagadesh Kiran N | Mohammad Shahid Khan | | [HDFS-8806](https://issues.apache.org/jira/browse/HDFS-8806) | Inconsistent metrics: number of missing blocks with replication factor 1 not properly cleared | Major | . | Zhe Zhang | Zhe Zhang | @@ -1418,13 +1421,13 @@ | [MAPREDUCE-6481](https://issues.apache.org/jira/browse/MAPREDUCE-6481) | LineRecordReader may give incomplete record and wrong position/key information for uncompressed input sometimes. | Critical | mrv2 | zhihai xu | zhihai xu | | [MAPREDUCE-5002](https://issues.apache.org/jira/browse/MAPREDUCE-5002) | AM could potentially allocate a reduce container to a map attempt | Major | mr-am | Jason Lowe | Chang Li | | [MAPREDUCE-5982](https://issues.apache.org/jira/browse/MAPREDUCE-5982) | Task attempts that fail from the ASSIGNED state can disappear | Major | mr-am | Jason Lowe | Chang Li | -| [HADOOP-12386](https://issues.apache.org/jira/browse/HADOOP-12386) | RetryPolicies.RETRY\_FOREVER should be able to specify a retry interval | Major | . | Wangda Tan | Sunil G | +| [HADOOP-12386](https://issues.apache.org/jira/browse/HADOOP-12386) | RetryPolicies.RETRY\_FOREVER should be able to specify a retry interval | Major | . | Wangda Tan | Sunil Govindan | | [YARN-3697](https://issues.apache.org/jira/browse/YARN-3697) | FairScheduler: ContinuousSchedulingThread can fail to shutdown | Critical | fairscheduler | zhihai xu | zhihai xu | | [HDFS-6955](https://issues.apache.org/jira/browse/HDFS-6955) | DN should reserve disk space for a full block when creating tmp files | Major | datanode | Arpit Agarwal | Kanaka Kumar Avvaru | | [HDFS-5802](https://issues.apache.org/jira/browse/HDFS-5802) | NameNode does not check for inode type before traversing down a path | Trivial | namenode | Harsh J | Xiao Chen | | [MAPREDUCE-6460](https://issues.apache.org/jira/browse/MAPREDUCE-6460) | TestRMContainerAllocator.testAttemptNotFoundCausesRMCommunicatorException fails | Major | test | zhihai xu | zhihai xu | | [YARN-4167](https://issues.apache.org/jira/browse/YARN-4167) | NPE on RMActiveServices#serviceStop when store is null | Minor | . | Bibin A Chundatt | Bibin A Chundatt | -| [YARN-4113](https://issues.apache.org/jira/browse/YARN-4113) | RM should respect retry-interval when uses RetryPolicies.RETRY\_FOREVER | Critical | . | Wangda Tan | Sunil G | +| [YARN-4113](https://issues.apache.org/jira/browse/YARN-4113) | RM should respect retry-interval when uses RetryPolicies.RETRY\_FOREVER | Critical | . | Wangda Tan | Sunil Govindan | | [YARN-4188](https://issues.apache.org/jira/browse/YARN-4188) | MoveApplicationAcrossQueuesResponse should be an abstract class | Minor | resourcemanager | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | | [HDFS-9043](https://issues.apache.org/jira/browse/HDFS-9043) | Doc updation for commands in HDFS Federation | Minor | documentation | J.Andreina | J.Andreina | | [HDFS-9013](https://issues.apache.org/jira/browse/HDFS-9013) | Deprecate NameNodeMXBean#getNNStarted in branch2 and remove from trunk | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | @@ -1432,7 +1435,7 @@ | [HADOOP-12438](https://issues.apache.org/jira/browse/HADOOP-12438) | Reset RawLocalFileSystem.useDeprecatedFileStatus in TestLocalFileSystem | Trivial | test | Chris Nauroth | Chris Nauroth | | [HDFS-9128](https://issues.apache.org/jira/browse/HDFS-9128) | TestWebHdfsFileContextMainOperations and TestSWebHdfsFileContextMainOperations fail due to invalid HDFS path on Windows. | Trivial | test | Chris Nauroth | Chris Nauroth | | [YARN-4152](https://issues.apache.org/jira/browse/YARN-4152) | NM crash with NPE when LogAggregationService#stopContainer called for absent container | Critical | log-aggregation, nodemanager | Bibin A Chundatt | Bibin A Chundatt | -| [YARN-4044](https://issues.apache.org/jira/browse/YARN-4044) | Running applications information changes such as movequeue is not published to TimeLine server | Critical | resourcemanager, timelineserver | Sunil G | Sunil G | +| [YARN-4044](https://issues.apache.org/jira/browse/YARN-4044) | Running applications information changes such as movequeue is not published to TimeLine server | Critical | resourcemanager, timelineserver | Sunil Govindan | Sunil Govindan | | [HDFS-9076](https://issues.apache.org/jira/browse/HDFS-9076) | Log full path instead of inodeId in DFSClient#closeAllFilesBeingWritten() | Major | hdfs-client | Surendra Singh Lilhore | Surendra Singh Lilhore | | [MAPREDUCE-6484](https://issues.apache.org/jira/browse/MAPREDUCE-6484) | Yarn Client uses local address instead of RM address as token renewer in a secure cluster when RM HA is enabled. | Major | client, security | zhihai xu | zhihai xu | | [HADOOP-12437](https://issues.apache.org/jira/browse/HADOOP-12437) | Allow SecurityUtil to lookup alternate hostnames | Major | net, security | Arpit Agarwal | Arpit Agarwal | @@ -1523,7 +1526,7 @@ | [HADOOP-12484](https://issues.apache.org/jira/browse/HADOOP-12484) | Single File Rename Throws Incorrectly In Potential Race Condition Scenarios | Major | tools | Gaurav Kanade | Gaurav Kanade | | [HDFS-9286](https://issues.apache.org/jira/browse/HDFS-9286) | HttpFs does not parse ACL syntax correctly for operation REMOVEACLENTRIES | Major | fs | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-4009](https://issues.apache.org/jira/browse/YARN-4009) | CORS support for ResourceManager REST API | Major | . | Prakash Ramachandran | Varun Vasudev | -| [YARN-4041](https://issues.apache.org/jira/browse/YARN-4041) | Slow delegation token renewal can severely prolong RM recovery | Major | resourcemanager | Jason Lowe | Sunil G | +| [YARN-4041](https://issues.apache.org/jira/browse/YARN-4041) | Slow delegation token renewal can severely prolong RM recovery | Major | resourcemanager | Jason Lowe | Sunil Govindan | | [HDFS-9290](https://issues.apache.org/jira/browse/HDFS-9290) | DFSClient#callAppend() is not backward compatible for slightly older NameNodes | Blocker | . | Tony Wu | Tony Wu | | [HDFS-9301](https://issues.apache.org/jira/browse/HDFS-9301) | HDFS clients can't construct HdfsConfiguration instances | Major | . | Steve Loughran | Mingliang Liu | | [YARN-4294](https://issues.apache.org/jira/browse/YARN-4294) | [JDK8] Fix javadoc errors caused by wrong reference and illegal tag | Blocker | build, documentation | Akira Ajisaka | Akira Ajisaka | @@ -1549,7 +1552,7 @@ | [HDFS-9044](https://issues.apache.org/jira/browse/HDFS-9044) | Give Priority to FavouredNodes , before selecting nodes from FavouredNode's Node Group | Major | . | J.Andreina | J.Andreina | | [YARN-4130](https://issues.apache.org/jira/browse/YARN-4130) | Duplicate declaration of ApplicationId in RMAppManager#submitApplication method | Trivial | resourcemanager | Kai Sasaki | Kai Sasaki | | [YARN-4288](https://issues.apache.org/jira/browse/YARN-4288) | NodeManager restart should keep retrying to register to RM while connection exception happens during RM failed over. | Critical | nodemanager | Junping Du | Junping Du | -| [MAPREDUCE-6515](https://issues.apache.org/jira/browse/MAPREDUCE-6515) | Update Application priority in AM side from AM-RM heartbeat | Major | applicationmaster | Sunil G | Sunil G | +| [MAPREDUCE-6515](https://issues.apache.org/jira/browse/MAPREDUCE-6515) | Update Application priority in AM side from AM-RM heartbeat | Major | applicationmaster | Sunil Govindan | Sunil Govindan | | [HDFS-9332](https://issues.apache.org/jira/browse/HDFS-9332) | Fix Precondition failures from NameNodeEditLogRoller while saving namespace | Major | . | Andrew Wang | Andrew Wang | | [YARN-4313](https://issues.apache.org/jira/browse/YARN-4313) | Race condition in MiniMRYarnCluster when getting history server address | Major | . | Jian He | Jian He | | [YARN-4312](https://issues.apache.org/jira/browse/YARN-4312) | TestSubmitApplicationWithRMHA fails on branch-2.7 and branch-2.6 as some of the test cases time out | Major | . | Varun Saxena | Varun Saxena | @@ -1619,7 +1622,7 @@ | [HADOOP-12577](https://issues.apache.org/jira/browse/HADOOP-12577) | Bump up commons-collections version to 3.2.2 to address a security flaw | Blocker | build, security | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-4344](https://issues.apache.org/jira/browse/YARN-4344) | NMs reconnecting with changed capabilities can lead to wrong cluster resource calculations | Critical | resourcemanager | Varun Vasudev | Varun Vasudev | | [HADOOP-9822](https://issues.apache.org/jira/browse/HADOOP-9822) | create constant MAX\_CAPACITY in RetryCache rather than hard-coding 16 in RetryCache constructor | Minor | . | Tsuyoshi Ozawa | Tsuyoshi Ozawa | -| [YARN-4298](https://issues.apache.org/jira/browse/YARN-4298) | Fix findbugs warnings in hadoop-yarn-common | Minor | . | Varun Saxena | Sunil G | +| [YARN-4298](https://issues.apache.org/jira/browse/YARN-4298) | Fix findbugs warnings in hadoop-yarn-common | Minor | . | Varun Saxena | Sunil Govindan | | [YARN-4387](https://issues.apache.org/jira/browse/YARN-4387) | Fix typo in FairScheduler log message | Minor | fairscheduler | Xin Wang | Xin Wang | | [HDFS-6101](https://issues.apache.org/jira/browse/HDFS-6101) | TestReplaceDatanodeOnFailure fails occasionally | Major | test | Arpit Agarwal | Wei-Chiu Chuang | | [HDFS-8855](https://issues.apache.org/jira/browse/HDFS-8855) | Webhdfs client leaks active NameNode connections | Major | webhdfs | Bob Hansen | Xiaobing Zhou | @@ -1661,7 +1664,7 @@ | [HDFS-9519](https://issues.apache.org/jira/browse/HDFS-9519) | Some coding improvement in SecondaryNameNode#main | Major | namenode | Yongjun Zhang | Xiao Chen | | [HDFS-9514](https://issues.apache.org/jira/browse/HDFS-9514) | TestDistributedFileSystem.testDFSClientPeerWriteTimeout failing; exception being swallowed | Major | hdfs-client, test | Steve Loughran | Wei-Chiu Chuang | | [HDFS-9535](https://issues.apache.org/jira/browse/HDFS-9535) | Newly completed blocks in IBR should not be considered under-replicated too quickly | Major | namenode | Jing Zhao | Mingliang Liu | -| [YARN-4418](https://issues.apache.org/jira/browse/YARN-4418) | AM Resource Limit per partition can be updated to ResourceUsage as well | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-4418](https://issues.apache.org/jira/browse/YARN-4418) | AM Resource Limit per partition can be updated to ResourceUsage as well | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-4403](https://issues.apache.org/jira/browse/YARN-4403) | (AM/NM/Container)LivelinessMonitor should use monotonic time when calculating period | Critical | . | Junping Du | Junping Du | | [YARN-4402](https://issues.apache.org/jira/browse/YARN-4402) | TestNodeManagerShutdown And TestNodeManagerResync fails with bind exception | Major | test | Brahma Reddy Battula | Brahma Reddy Battula | | [YARN-4439](https://issues.apache.org/jira/browse/YARN-4439) | Clarify NMContainerStatus#toString method. | Major | . | Jian He | Jian He | @@ -1687,6 +1690,7 @@ | [YARN-4477](https://issues.apache.org/jira/browse/YARN-4477) | FairScheduler: Handle condition which can result in an infinite loop in attemptScheduling. | Major | fairscheduler | Tao Jie | Tao Jie | | [HDFS-9589](https://issues.apache.org/jira/browse/HDFS-9589) | Block files which have been hardlinked should be duplicated before the DataNode appends to the them | Major | datanode | Colin P. McCabe | Colin P. McCabe | | [HDFS-9458](https://issues.apache.org/jira/browse/HDFS-9458) | TestBackupNode always binds to port 50070, which can cause bind failures. | Major | test | Chris Nauroth | Xiao Chen | +| [HDFS-9034](https://issues.apache.org/jira/browse/HDFS-9034) | "StorageTypeStats" Metric should not count failed storage. | Major | namenode | Archana T | Surendra Singh Lilhore | | [YARN-4109](https://issues.apache.org/jira/browse/YARN-4109) | Exception on RM scheduler page loading with labels | Minor | . | Bibin A Chundatt | Mohammad Shahid Khan | | [MAPREDUCE-6419](https://issues.apache.org/jira/browse/MAPREDUCE-6419) | JobHistoryServer doesn't sort properly based on Job ID when Job id's exceed 9999 | Major | webapps | Devaraj K | Mohammad Shahid Khan | | [HDFS-9597](https://issues.apache.org/jira/browse/HDFS-9597) | BaseReplicationPolicyTest should update data node stats after adding a data node | Minor | datanode, test | Wei-Chiu Chuang | Wei-Chiu Chuang | @@ -1736,7 +1740,7 @@ | [HADOOP-12706](https://issues.apache.org/jira/browse/HADOOP-12706) | TestLocalFsFCStatistics#testStatisticsThreadLocalDataCleanUp times out occasionally | Major | test | Jason Lowe | Sangjin Lee | | [YARN-4581](https://issues.apache.org/jira/browse/YARN-4581) | AHS writer thread leak makes RM crash while RM is recovering | Major | resourcemanager | sandflee | sandflee | | [MAPREDUCE-6554](https://issues.apache.org/jira/browse/MAPREDUCE-6554) | MRAppMaster servicestart failing with NPE in MRAppMaster#parsePreviousJobHistory | Critical | . | Bibin A Chundatt | Bibin A Chundatt | -| [YARN-4389](https://issues.apache.org/jira/browse/YARN-4389) | "yarn.am.blacklisting.enabled" and "yarn.am.blacklisting.disable-failure-threshold" should be app specific rather than a setting for whole YARN cluster | Critical | applications | Junping Du | Sunil G | +| [YARN-4389](https://issues.apache.org/jira/browse/YARN-4389) | "yarn.am.blacklisting.enabled" and "yarn.am.blacklisting.disable-failure-threshold" should be app specific rather than a setting for whole YARN cluster | Critical | applications | Junping Du | Sunil Govindan | | [HDFS-9612](https://issues.apache.org/jira/browse/HDFS-9612) | DistCp worker threads are not terminated after jobs are done. | Major | distcp | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HADOOP-12712](https://issues.apache.org/jira/browse/HADOOP-12712) | Fix some cmake plugin and native build warnings | Minor | native | Colin P. McCabe | Colin P. McCabe | | [YARN-4538](https://issues.apache.org/jira/browse/YARN-4538) | QueueMetrics pending cores and memory metrics wrong | Major | . | Bibin A Chundatt | Bibin A Chundatt | @@ -1894,7 +1898,7 @@ | [HDFS-10197](https://issues.apache.org/jira/browse/HDFS-10197) | TestFsDatasetCache failing intermittently due to timeout | Major | test | Yiqun Lin | Yiqun Lin | | [HDFS-9478](https://issues.apache.org/jira/browse/HDFS-9478) | Reason for failing ipc.FairCallQueue contruction should be thrown | Minor | . | Archana T | Ajith S | | [HDFS-10228](https://issues.apache.org/jira/browse/HDFS-10228) | TestHDFSCLI fails | Major | test | Akira Ajisaka | Akira Ajisaka | -| [YARN-4865](https://issues.apache.org/jira/browse/YARN-4865) | Track Reserved resources in ResourceUsage and QueueCapacities | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-4865](https://issues.apache.org/jira/browse/YARN-4865) | Track Reserved resources in ResourceUsage and QueueCapacities | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HADOOP-12972](https://issues.apache.org/jira/browse/HADOOP-12972) | Lz4Compressor#getLibraryName returns the wrong version number | Trivial | native | John Zhuge | Colin P. McCabe | | [HDFS-5177](https://issues.apache.org/jira/browse/HDFS-5177) | blocksScheduled count should be decremented for abandoned blocks | Major | namenode | Vinayakumar B | Vinayakumar B | | [HDFS-10223](https://issues.apache.org/jira/browse/HDFS-10223) | peerFromSocketAndKey performs SASL exchange before setting connection timeouts | Major | . | Colin P. McCabe | Colin P. McCabe | @@ -1914,7 +1918,7 @@ | [YARN-4915](https://issues.apache.org/jira/browse/YARN-4915) | Fix typo in YARN Secure Containers documentation | Trivial | documentation, yarn | Takashi Ohnishi | Takashi Ohnishi | | [YARN-4917](https://issues.apache.org/jira/browse/YARN-4917) | Fix typos in documentation of Capacity Scheduler. | Minor | documentation | Takashi Ohnishi | Takashi Ohnishi | | [HDFS-10261](https://issues.apache.org/jira/browse/HDFS-10261) | TestBookKeeperHACheckpoints doesn't handle ephemeral HTTP ports | Major | . | Eric Badger | Eric Badger | -| [YARN-4699](https://issues.apache.org/jira/browse/YARN-4699) | Scheduler UI and REST o/p is not in sync when -replaceLabelsOnNode is used to change label of a node | Critical | capacity scheduler | Sunil G | Sunil G | +| [YARN-4699](https://issues.apache.org/jira/browse/YARN-4699) | Scheduler UI and REST o/p is not in sync when -replaceLabelsOnNode is used to change label of a node | Critical | capacity scheduler | Sunil Govindan | Sunil Govindan | | [HADOOP-12022](https://issues.apache.org/jira/browse/HADOOP-12022) | fix site -Pdocs -Pdist in hadoop-project-dist; cleanout remaining forrest bits | Blocker | build | Allen Wittenauer | Allen Wittenauer | | [MAPREDUCE-6670](https://issues.apache.org/jira/browse/MAPREDUCE-6670) | TestJobListCache#testEviction sometimes fails on Windows with timeout | Minor | test | Gergely Novák | Gergely Novák | | [HDFS-6520](https://issues.apache.org/jira/browse/HDFS-6520) | hdfs fsck -move passes invalid length value when creating BlockReader | Major | . | Shengjun Xin | Xiao Chen | @@ -1936,7 +1940,7 @@ | [HADOOP-13026](https://issues.apache.org/jira/browse/HADOOP-13026) | Should not wrap IOExceptions into a AuthenticationException in KerberosAuthenticator | Critical | . | Xuan Gong | Xuan Gong | | [YARN-4940](https://issues.apache.org/jira/browse/YARN-4940) | yarn node -list -all failed if RM start with decommissioned node | Major | . | sandflee | sandflee | | [YARN-4965](https://issues.apache.org/jira/browse/YARN-4965) | Distributed shell AM failed due to ClientHandlerException thrown by jersey | Critical | . | Sumana Sathish | Junping Du | -| [YARN-4934](https://issues.apache.org/jira/browse/YARN-4934) | Reserved Resource for QueueMetrics needs to be handled correctly in few cases | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-4934](https://issues.apache.org/jira/browse/YARN-4934) | Reserved Resource for QueueMetrics needs to be handled correctly in few cases | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [HDFS-10291](https://issues.apache.org/jira/browse/HDFS-10291) | TestShortCircuitLocalRead failing | Major | test | Steve Loughran | Steve Loughran | | [HDFS-10275](https://issues.apache.org/jira/browse/HDFS-10275) | TestDataNodeMetrics failing intermittently due to TotalWriteTime counted incorrectly | Major | test | Yiqun Lin | Yiqun Lin | | [MAPREDUCE-6649](https://issues.apache.org/jira/browse/MAPREDUCE-6649) | getFailureInfo not returning any failure info | Major | . | Eric Badger | Eric Badger | @@ -1982,7 +1986,6 @@ | [HDFS-10324](https://issues.apache.org/jira/browse/HDFS-10324) | Trash directory in an encryption zone should be pre-created with correct permissions | Major | encryption | Wei-Chiu Chuang | Wei-Chiu Chuang | | [MAPREDUCE-6514](https://issues.apache.org/jira/browse/MAPREDUCE-6514) | Job hangs as ask is not updated after ramping down of all reducers | Blocker | applicationmaster | Varun Saxena | Varun Saxena | | [HDFS-2043](https://issues.apache.org/jira/browse/HDFS-2043) | TestHFlush failing intermittently | Major | test | Aaron T. Myers | Yiqun Lin | -| [HADOOP-12751](https://issues.apache.org/jira/browse/HADOOP-12751) | While using kerberos Hadoop incorrectly assumes names with '@' to be non-simple | Critical | security | Bolke de Bruin | Bolke de Bruin | | [MAPREDUCE-6689](https://issues.apache.org/jira/browse/MAPREDUCE-6689) | MapReduce job can infinitely increase number of reducer resource requests | Blocker | . | Wangda Tan | Wangda Tan | | [YARN-4747](https://issues.apache.org/jira/browse/YARN-4747) | AHS error 500 due to NPE when container start event is missing | Major | timelineserver | Jason Lowe | Varun Saxena | | [HDFS-9939](https://issues.apache.org/jira/browse/HDFS-9939) | Increase DecompressorStream skip buffer size | Major | . | Yongjun Zhang | John Zhuge | @@ -2031,7 +2034,7 @@ | [YARN-3344](https://issues.apache.org/jira/browse/YARN-3344) | Fix warning - procfs stat file is not in the expected format | Major | . | Jon Bringhurst | Ravindra Kumar Naik | | [YARN-4459](https://issues.apache.org/jira/browse/YARN-4459) | container-executor should only kill process groups | Major | nodemanager | Jun Gong | Jun Gong | | [YARN-5166](https://issues.apache.org/jira/browse/YARN-5166) | javadoc:javadoc goal fails on hadoop-yarn-client | Major | . | Andras Bokor | Andras Bokor | -| [HDFS-10276](https://issues.apache.org/jira/browse/HDFS-10276) | HDFS should not expose path info that user has no permission to see. | Major | . | Kevin Cox | Yuanbo Liu | +| [HDFS-10276](https://issues.apache.org/jira/browse/HDFS-10276) | HDFS should not expose path info that user has no permission to see. | Major | fs, security | Kevin Cox | Yuanbo Liu | | [YARN-5132](https://issues.apache.org/jira/browse/YARN-5132) | Exclude generated protobuf sources from YARN Javadoc build | Critical | . | Subru Krishnan | Subru Krishnan | | [HADOOP-13132](https://issues.apache.org/jira/browse/HADOOP-13132) | Handle ClassCastException on AuthenticationException in LoadBalancingKMSClientProvider | Major | kms | Miklos Szurap | Wei-Chiu Chuang | | [HDFS-10415](https://issues.apache.org/jira/browse/HDFS-10415) | TestDistributedFileSystem#MyDistributedFileSystem attempts to set up statistics before initialize() is called | Major | test | Sangjin Lee | Mingliang Liu | @@ -2154,6 +2157,7 @@ | [HDFS-10731](https://issues.apache.org/jira/browse/HDFS-10731) | FSDirectory#verifyMaxDirItems does not log path name | Minor | namenode | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-5476](https://issues.apache.org/jira/browse/YARN-5476) | Not existed application reported as ACCEPTED state by YarnClientImpl | Critical | yarn | Yesha Vora | Junping Du | | [YARN-5491](https://issues.apache.org/jira/browse/YARN-5491) | Random Failure TestCapacityScheduler#testCSQueueBlocked | Major | test | Bibin A Chundatt | Bibin A Chundatt | +| [HDFS-9696](https://issues.apache.org/jira/browse/HDFS-9696) | Garbage snapshot records lingering forever | Critical | . | Kihwal Lee | Kihwal Lee | | [HADOOP-13333](https://issues.apache.org/jira/browse/HADOOP-13333) | testConf.xml ls comparators in wrong order | Trivial | fs | John Zhuge | Vrushali C | | [HADOOP-13470](https://issues.apache.org/jira/browse/HADOOP-13470) | GenericTestUtils$LogCapturer is flaky | Major | test, util | Mingliang Liu | Mingliang Liu | | [HADOOP-13494](https://issues.apache.org/jira/browse/HADOOP-13494) | ReconfigurableBase can log sensitive information | Major | security | Sean Mackrory | Sean Mackrory | @@ -2161,6 +2165,7 @@ | [HDFS-10549](https://issues.apache.org/jira/browse/HDFS-10549) | Correctly revoke file leases when closing files | Major | hdfs-client | Yiqun Lin | Yiqun Lin | | [HADOOP-13513](https://issues.apache.org/jira/browse/HADOOP-13513) | Java 1.7 support for org.apache.hadoop.fs.azure testcases | Minor | fs/azure | Tibor Kiss | Tibor Kiss | | [HADOOP-13512](https://issues.apache.org/jira/browse/HADOOP-13512) | ReloadingX509TrustManager should keep reloading in case of exception | Critical | security | Mingliang Liu | Mingliang Liu | +| [HDFS-10763](https://issues.apache.org/jira/browse/HDFS-10763) | Open files can leak permanently due to inconsistent lease update | Critical | . | Kihwal Lee | Kihwal Lee | | [YARN-4307](https://issues.apache.org/jira/browse/YARN-4307) | Display blacklisted nodes for AM container in the RM web UI | Major | resourcemanager, webapp | Naganarasimha G R | Naganarasimha G R | | [MAPREDUCE-6763](https://issues.apache.org/jira/browse/MAPREDUCE-6763) | Shuffle server listen queue is too small | Major | mrv2 | Jason Lowe | Jason Lowe | | [YARN-4837](https://issues.apache.org/jira/browse/YARN-4837) | User facing aspects of 'AM blacklisting' feature need fixing | Critical | . | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | @@ -2186,7 +2191,6 @@ | [MAPREDUCE-4784](https://issues.apache.org/jira/browse/MAPREDUCE-4784) | TestRecovery occasionally fails | Major | mrv2, test | Jason Lowe | Haibo Chen | | [HDFS-10760](https://issues.apache.org/jira/browse/HDFS-10760) | DataXceiver#run() should not log InvalidToken exception as an error | Major | . | Pan Yuxuan | Pan Yuxuan | | [HDFS-10729](https://issues.apache.org/jira/browse/HDFS-10729) | Improve log message for edit loading failures caused by FS limit checks. | Major | namenode | Wei-Chiu Chuang | Wei-Chiu Chuang | -| [YARN-5221](https://issues.apache.org/jira/browse/YARN-5221) | Expose UpdateResourceRequest API to allow AM to request for change in container properties | Major | . | Arun Suresh | Arun Suresh | | [HADOOP-13375](https://issues.apache.org/jira/browse/HADOOP-13375) | o.a.h.security.TestGroupsCaching.testBackgroundRefreshCounters seems flaky | Major | security, test | Mingliang Liu | Weiwei Yang | | [YARN-5555](https://issues.apache.org/jira/browse/YARN-5555) | Scheduler UI: "% of Queue" is inaccurate if leaf queue is hierarchically nested. | Minor | . | Eric Payne | Eric Payne | | [YARN-5549](https://issues.apache.org/jira/browse/YARN-5549) | AMLauncher#createAMContainerLaunchContext() should not log the command to be launched indiscriminately | Critical | resourcemanager | Daniel Templeton | Daniel Templeton | @@ -2222,6 +2226,7 @@ | [HDFS-10828](https://issues.apache.org/jira/browse/HDFS-10828) | Fix usage of FsDatasetImpl object lock in ReplicaMap | Blocker | . | Arpit Agarwal | Arpit Agarwal | | [YARN-5631](https://issues.apache.org/jira/browse/YARN-5631) | Missing refreshClusterMaxPriority usage in rmadmin help message | Minor | . | Kai Sasaki | Kai Sasaki | | [HDFS-9444](https://issues.apache.org/jira/browse/HDFS-9444) | Add utility to find set of available ephemeral ports to ServerSocketUtil | Major | . | Brahma Reddy Battula | Masatake Iwasaki | +| [HADOOP-11780](https://issues.apache.org/jira/browse/HADOOP-11780) | Prevent IPC reader thread death | Critical | ipc | Daryn Sharp | Daryn Sharp | | [HDFS-10824](https://issues.apache.org/jira/browse/HDFS-10824) | MiniDFSCluster#storageCapacities has no effects on real capacity | Major | . | Xiaobing Zhou | Xiaobing Zhou | | [HDFS-10914](https://issues.apache.org/jira/browse/HDFS-10914) | Move remnants of oah.hdfs.client to hadoop-hdfs-client | Critical | hdfs-client | Andrew Wang | Andrew Wang | | [MAPREDUCE-6771](https://issues.apache.org/jira/browse/MAPREDUCE-6771) | RMContainerAllocator sends container diagnostics event after corresponding completion event | Major | mrv2 | Haibo Chen | Haibo Chen | @@ -2235,7 +2240,7 @@ | [HDFS-10609](https://issues.apache.org/jira/browse/HDFS-10609) | Uncaught InvalidEncryptionKeyException during pipeline recovery may abort downstream applications | Major | encryption | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-10962](https://issues.apache.org/jira/browse/HDFS-10962) | TestRequestHedgingProxyProvider is flaky | Major | test | Andrew Wang | Andrew Wang | | [MAPREDUCE-6740](https://issues.apache.org/jira/browse/MAPREDUCE-6740) | Enforce mapreduce.task.timeout to be at least mapreduce.task.progress-report.interval | Minor | mr-am | Haibo Chen | Haibo Chen | -| [YARN-5101](https://issues.apache.org/jira/browse/YARN-5101) | YARN\_APPLICATION\_UPDATED event is parsed in ApplicationHistoryManagerOnTimelineStore#convertToApplicationReport with reversed order | Major | . | Xuan Gong | Sunil G | +| [YARN-5101](https://issues.apache.org/jira/browse/YARN-5101) | YARN\_APPLICATION\_UPDATED event is parsed in ApplicationHistoryManagerOnTimelineStore#convertToApplicationReport with reversed order | Major | . | Xuan Gong | Sunil Govindan | | [YARN-5659](https://issues.apache.org/jira/browse/YARN-5659) | getPathFromYarnURL should use standard methods | Major | . | Sergey Shelukhin | Sergey Shelukhin | | [HADOOP-12611](https://issues.apache.org/jira/browse/HADOOP-12611) | TestZKSignerSecretProvider#testMultipleInit occasionally fail | Major | . | Wei-Chiu Chuang | Eric Badger | | [HDFS-10797](https://issues.apache.org/jira/browse/HDFS-10797) | Disk usage summary of snapshots causes renamed blocks to get counted twice | Major | snapshots | Sean Mackrory | Sean Mackrory | @@ -2272,7 +2277,7 @@ | [YARN-4831](https://issues.apache.org/jira/browse/YARN-4831) | Recovered containers will be killed after NM stateful restart | Major | nodemanager | Siqi Li | Siqi Li | | [YARN-3432](https://issues.apache.org/jira/browse/YARN-3432) | Cluster metrics have wrong Total Memory when there is reserved memory on CS | Major | capacityscheduler, resourcemanager | Thomas Graves | Brahma Reddy Battula | | [HDFS-9500](https://issues.apache.org/jira/browse/HDFS-9500) | datanodesSoftwareVersions map may counting wrong when rolling upgrade | Major | . | Phil Yang | Erik Krogen | -| [MAPREDUCE-2631](https://issues.apache.org/jira/browse/MAPREDUCE-2631) | Potential resource leaks in BinaryProtocol$TeeOutputStream.java | Major | . | Ravi Teja Ch N V | Sunil G | +| [MAPREDUCE-2631](https://issues.apache.org/jira/browse/MAPREDUCE-2631) | Potential resource leaks in BinaryProtocol$TeeOutputStream.java | Major | . | Ravi Teja Ch N V | Sunil Govindan | | [HADOOP-13770](https://issues.apache.org/jira/browse/HADOOP-13770) | Shell.checkIsBashSupported swallowed an interrupted exception | Minor | util | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-5027](https://issues.apache.org/jira/browse/YARN-5027) | NM should clean up app log dirs after NM restart | Major | nodemanager | sandflee | sandflee | | [YARN-5767](https://issues.apache.org/jira/browse/YARN-5767) | Fix the order that resources are cleaned up from the local Public/Private caches | Major | . | Chris Trezzo | Chris Trezzo | @@ -2377,6 +2382,7 @@ | [HDFS-11431](https://issues.apache.org/jira/browse/HDFS-11431) | hadoop-hdfs-client JAR does not include ConfiguredFailoverProxyProvider | Blocker | build, hdfs-client | Steven Rand | Steven Rand | | [YARN-4925](https://issues.apache.org/jira/browse/YARN-4925) | ContainerRequest in AMRMClient, application should be able to specify nodes/racks together with nodeLabelExpression | Major | . | Bibin A Chundatt | Bibin A Chundatt | | [MAPREDUCE-6433](https://issues.apache.org/jira/browse/MAPREDUCE-6433) | launchTime may be negative | Major | jobhistoryserver, mrv2 | Allen Wittenauer | zhihai xu | +| [HADOOP-12751](https://issues.apache.org/jira/browse/HADOOP-12751) | While using kerberos Hadoop incorrectly assumes names with '@' to be non-simple | Critical | security | Bolke de Bruin | Bolke de Bruin | ### TESTS: @@ -2411,7 +2417,7 @@ | [YARN-2871](https://issues.apache.org/jira/browse/YARN-2871) | TestRMRestart#testRMRestartGetApplicationList sometime fails in trunk | Minor | . | Ted Yu | zhihai xu | | [YARN-3956](https://issues.apache.org/jira/browse/YARN-3956) | Fix TestNodeManagerHardwareUtils fails on Mac | Minor | nodemanager | Varun Vasudev | Varun Vasudev | | [HDFS-8834](https://issues.apache.org/jira/browse/HDFS-8834) | TestReplication#testReplicationWhenBlockCorruption is not valid after HDFS-6482 | Minor | datanode | Lei (Eddy) Xu | Lei (Eddy) Xu | -| [YARN-3992](https://issues.apache.org/jira/browse/YARN-3992) | TestApplicationPriority.testApplicationPriorityAllocation fails intermittently | Major | . | Zhijie Shen | Sunil G | +| [YARN-3992](https://issues.apache.org/jira/browse/YARN-3992) | TestApplicationPriority.testApplicationPriorityAllocation fails intermittently | Major | . | Zhijie Shen | Sunil Govindan | | [HDFS-2070](https://issues.apache.org/jira/browse/HDFS-2070) | Add more unit tests for FsShell getmerge | Major | test | XieXianshan | Daniel Templeton | | [MAPREDUCE-5045](https://issues.apache.org/jira/browse/MAPREDUCE-5045) | UtilTest#isCygwin method appears to be unused | Trivial | contrib/streaming, test | Chris Nauroth | Neelesh Srinivas Salian | | [YARN-3573](https://issues.apache.org/jira/browse/YARN-3573) | MiniMRYarnCluster constructor that starts the timeline server using a boolean should be marked deprecated | Major | timelineserver | Mit Desai | Brahma Reddy Battula | @@ -2449,7 +2455,7 @@ | [HADOOP-10980](https://issues.apache.org/jira/browse/HADOOP-10980) | TestActiveStandbyElector fails occasionally in trunk | Minor | . | Ted Yu | Eric Badger | | [HADOOP-13395](https://issues.apache.org/jira/browse/HADOOP-13395) | Enhance TestKMSAudit | Minor | kms | Xiao Chen | Xiao Chen | | [YARN-5492](https://issues.apache.org/jira/browse/YARN-5492) | TestSubmitApplicationWithRMHA is failing sporadically during precommit builds | Major | test | Jason Lowe | Vrushali C | -| [YARN-5544](https://issues.apache.org/jira/browse/YARN-5544) | TestNodeBlacklistingOnAMFailures fails on trunk | Major | test | Varun Saxena | Sunil G | +| [YARN-5544](https://issues.apache.org/jira/browse/YARN-5544) | TestNodeBlacklistingOnAMFailures fails on trunk | Major | test | Varun Saxena | Sunil Govindan | | [HDFS-9745](https://issues.apache.org/jira/browse/HDFS-9745) | TestSecureNNWithQJM#testSecureMode sometimes fails with timeouts | Minor | . | Xiao Chen | Xiao Chen | | [YARN-5389](https://issues.apache.org/jira/browse/YARN-5389) | TestYarnClient#testReservationDelete fails | Major | . | Rohith Sharma K S | Sean Po | | [YARN-5560](https://issues.apache.org/jira/browse/YARN-5560) | Clean up bad exception catching practices in TestYarnClient | Major | . | Sean Po | Sean Po | @@ -2499,7 +2505,7 @@ | [YARN-1402](https://issues.apache.org/jira/browse/YARN-1402) | Related Web UI, CLI changes on exposing client API to check log aggregation status | Major | . | Xuan Gong | Xuan Gong | | [YARN-2696](https://issues.apache.org/jira/browse/YARN-2696) | Queue sorting in CapacityScheduler should consider node label | Major | capacityscheduler, resourcemanager | Wangda Tan | Wangda Tan | | [YARN-3487](https://issues.apache.org/jira/browse/YARN-3487) | CapacityScheduler scheduler lock obtained unnecessarily when calling getQueue | Critical | capacityscheduler | Jason Lowe | Jason Lowe | -| [YARN-3136](https://issues.apache.org/jira/browse/YARN-3136) | getTransferredContainers can be a bottleneck during AM registration | Major | scheduler | Jason Lowe | Sunil G | +| [YARN-3136](https://issues.apache.org/jira/browse/YARN-3136) | getTransferredContainers can be a bottleneck during AM registration | Major | scheduler | Jason Lowe | Sunil Govindan | | [HDFS-8169](https://issues.apache.org/jira/browse/HDFS-8169) | Move LocatedBlocks and related classes to hdfs-client | Major | build, hdfs-client | Haohui Mai | Haohui Mai | | [YARN-3463](https://issues.apache.org/jira/browse/YARN-3463) | Integrate OrderingPolicy Framework with CapacityScheduler | Major | capacityscheduler | Craig Welch | Craig Welch | | [HDFS-8185](https://issues.apache.org/jira/browse/HDFS-8185) | Separate client related routines in HAUtil into a new class | Major | build, hdfs-client | Haohui Mai | Haohui Mai | @@ -2526,19 +2532,19 @@ | [YARN-644](https://issues.apache.org/jira/browse/YARN-644) | Basic null check is not performed on passed in arguments before using them in ContainerManagerImpl.startContainer | Minor | nodemanager | Omkar Vinit Joshi | Varun Saxena | | [YARN-3593](https://issues.apache.org/jira/browse/YARN-3593) | Add label-type and Improve "DEFAULT\_PARTITION" in Node Labels Page | Major | webapp | Naganarasimha G R | Naganarasimha G R | | [YARN-2331](https://issues.apache.org/jira/browse/YARN-2331) | Distinguish shutdown during supervision vs. shutdown for rolling upgrade | Major | nodemanager | Jason Lowe | Jason Lowe | -| [YARN-3579](https://issues.apache.org/jira/browse/YARN-3579) | CommonNodeLabelsManager should support NodeLabel instead of string label name when getting node-to-label/label-to-label mappings | Minor | resourcemanager | Sunil G | Sunil G | +| [YARN-3579](https://issues.apache.org/jira/browse/YARN-3579) | CommonNodeLabelsManager should support NodeLabel instead of string label name when getting node-to-label/label-to-label mappings | Minor | resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-3505](https://issues.apache.org/jira/browse/YARN-3505) | Node's Log Aggregation Report with SUCCEED should not cached in RMApps | Critical | log-aggregation | Junping Du | Xuan Gong | | [HDFS-8403](https://issues.apache.org/jira/browse/HDFS-8403) | Eliminate retries in TestFileCreation#testOverwriteOpenForWrite | Major | test | Arpit Agarwal | Arpit Agarwal | | [HDFS-8157](https://issues.apache.org/jira/browse/HDFS-8157) | Writes to RAM DISK reserve locked memory for block files | Major | datanode | Arpit Agarwal | Arpit Agarwal | | [YARN-3541](https://issues.apache.org/jira/browse/YARN-3541) | Add version info on timeline service / generic history web UI and REST API | Major | timelineserver | Zhijie Shen | Zhijie Shen | | [YARN-3565](https://issues.apache.org/jira/browse/YARN-3565) | NodeHeartbeatRequest/RegisterNodeManagerRequest should use NodeLabel object instead of String | Blocker | api, client, resourcemanager | Wangda Tan | Naganarasimha G R | -| [YARN-3583](https://issues.apache.org/jira/browse/YARN-3583) | Support of NodeLabel object instead of plain String in YarnClient side. | Major | client | Sunil G | Sunil G | +| [YARN-3583](https://issues.apache.org/jira/browse/YARN-3583) | Support of NodeLabel object instead of plain String in YarnClient side. | Major | client | Sunil Govindan | Sunil Govindan | | [YARN-3609](https://issues.apache.org/jira/browse/YARN-3609) | Move load labels from storage from serviceInit to serviceStart to make it works with RM HA case. | Major | resourcemanager | Wangda Tan | Wangda Tan | | [YARN-3684](https://issues.apache.org/jira/browse/YARN-3684) | Change ContainerExecutor's primary lifecycle methods to use a more extensible mechanism for passing information. | Major | yarn | Sidharta Seethana | Sidharta Seethana | | [HDFS-8454](https://issues.apache.org/jira/browse/HDFS-8454) | Remove unnecessary throttling in TestDatanodeDeath | Major | test | Arpit Agarwal | Arpit Agarwal | | [YARN-3632](https://issues.apache.org/jira/browse/YARN-3632) | Ordering policy should be allowed to reorder an application when demand changes | Major | capacityscheduler | Craig Welch | Craig Welch | -| [YARN-3686](https://issues.apache.org/jira/browse/YARN-3686) | CapacityScheduler should trim default\_node\_label\_expression | Critical | api, client, resourcemanager | Wangda Tan | Sunil G | -| [YARN-3647](https://issues.apache.org/jira/browse/YARN-3647) | RMWebServices api's should use updated api from CommonNodeLabelsManager to get NodeLabel object | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-3686](https://issues.apache.org/jira/browse/YARN-3686) | CapacityScheduler should trim default\_node\_label\_expression | Critical | api, client, resourcemanager | Wangda Tan | Sunil Govindan | +| [YARN-3647](https://issues.apache.org/jira/browse/YARN-3647) | RMWebServices api's should use updated api from CommonNodeLabelsManager to get NodeLabel object | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-3581](https://issues.apache.org/jira/browse/YARN-3581) | Deprecate -directlyAccessNodeLabelStore in RMAdminCLI | Major | api, client, resourcemanager | Wangda Tan | Naganarasimha G R | | [HDFS-8482](https://issues.apache.org/jira/browse/HDFS-8482) | Rename BlockInfoContiguous to BlockInfo | Major | . | Zhe Zhang | Zhe Zhang | | [YARN-3700](https://issues.apache.org/jira/browse/YARN-3700) | ATS Web Performance issue at load time when large number of jobs | Major | resourcemanager, webapp, yarn | Xuan Gong | Xuan Gong | @@ -2557,7 +2563,7 @@ | [HDFS-7164](https://issues.apache.org/jira/browse/HDFS-7164) | Feature documentation for HDFS-6581 | Major | documentation | Arpit Agarwal | Arpit Agarwal | | [HDFS-8238](https://issues.apache.org/jira/browse/HDFS-8238) | Move ClientProtocol to the hdfs-client | Major | build | Haohui Mai | Takanobu Asanuma | | [HDFS-6249](https://issues.apache.org/jira/browse/HDFS-6249) | Output AclEntry in PBImageXmlWriter | Minor | tools | Akira Ajisaka | Surendra Singh Lilhore | -| [YARN-3521](https://issues.apache.org/jira/browse/YARN-3521) | Support return structured NodeLabel objects in REST API | Major | api, client, resourcemanager | Wangda Tan | Sunil G | +| [YARN-3521](https://issues.apache.org/jira/browse/YARN-3521) | Support return structured NodeLabel objects in REST API | Major | api, client, resourcemanager | Wangda Tan | Sunil Govindan | | [HDFS-8192](https://issues.apache.org/jira/browse/HDFS-8192) | Eviction should key off used locked memory instead of ram disk free space | Major | datanode | Arpit Agarwal | Arpit Agarwal | | [HDFS-8651](https://issues.apache.org/jira/browse/HDFS-8651) | Make hadoop-hdfs-project Native code -Wall-clean | Major | native | Alan Burlison | Alan Burlison | | [HADOOP-12036](https://issues.apache.org/jira/browse/HADOOP-12036) | Consolidate all of the cmake extensions in one directory | Major | . | Allen Wittenauer | Alan Burlison | @@ -2582,7 +2588,7 @@ | [YARN-3844](https://issues.apache.org/jira/browse/YARN-3844) | Make hadoop-yarn-project Native code -Wall-clean | Major | build | Alan Burlison | Alan Burlison | | [HDFS-8794](https://issues.apache.org/jira/browse/HDFS-8794) | Improve CorruptReplicasMap#corruptReplicasMap | Major | . | Yi Liu | Yi Liu | | [HDFS-7483](https://issues.apache.org/jira/browse/HDFS-7483) | Display information per tier on the Namenode UI | Major | . | Benoy Antony | Benoy Antony | -| [YARN-2003](https://issues.apache.org/jira/browse/YARN-2003) | Support for Application priority : Changes in RM and Capacity Scheduler | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-2003](https://issues.apache.org/jira/browse/YARN-2003) | Support for Application priority : Changes in RM and Capacity Scheduler | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-8721](https://issues.apache.org/jira/browse/HDFS-8721) | Add a metric for number of encryption zones | Major | encryption | Rakesh R | Rakesh R | | [YARN-1645](https://issues.apache.org/jira/browse/YARN-1645) | ContainerManager implementation to support container resizing | Major | nodemanager | Wangda Tan | MENG DING | | [HDFS-8495](https://issues.apache.org/jira/browse/HDFS-8495) | Consolidate append() related implementation into a single class | Major | namenode | Rakesh R | Rakesh R | @@ -2604,12 +2610,12 @@ | [YARN-3736](https://issues.apache.org/jira/browse/YARN-3736) | Add RMStateStore apis to store and load accepted reservations for failover | Major | capacityscheduler, fairscheduler, resourcemanager | Subru Krishnan | Anubhav Dhoot | | [YARN-1643](https://issues.apache.org/jira/browse/YARN-1643) | Make ContainersMonitor can support change monitoring size of an allocated container in NM side | Major | nodemanager | Wangda Tan | MENG DING | | [YARN-3974](https://issues.apache.org/jira/browse/YARN-3974) | Refactor the reservation system test cases to use parameterized base test | Major | capacityscheduler, fairscheduler | Subru Krishnan | Subru Krishnan | -| [YARN-3948](https://issues.apache.org/jira/browse/YARN-3948) | Display Application Priority in RM Web UI | Major | webapp | Sunil G | Sunil G | -| [YARN-3873](https://issues.apache.org/jira/browse/YARN-3873) | pendingApplications in LeafQueue should also use OrderingPolicy | Major | capacityscheduler | Sunil G | Sunil G | -| [YARN-3887](https://issues.apache.org/jira/browse/YARN-3887) | Support for changing Application priority during runtime | Major | capacityscheduler, resourcemanager | Sunil G | Sunil G | +| [YARN-3948](https://issues.apache.org/jira/browse/YARN-3948) | Display Application Priority in RM Web UI | Major | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-3873](https://issues.apache.org/jira/browse/YARN-3873) | pendingApplications in LeafQueue should also use OrderingPolicy | Major | capacityscheduler | Sunil Govindan | Sunil Govindan | +| [YARN-3887](https://issues.apache.org/jira/browse/YARN-3887) | Support for changing Application priority during runtime | Major | capacityscheduler, resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-8805](https://issues.apache.org/jira/browse/HDFS-8805) | Archival Storage: getStoragePolicy should not need superuser privilege | Major | balancer & mover, namenode | Hui Zheng | Brahma Reddy Battula | | [HDFS-8052](https://issues.apache.org/jira/browse/HDFS-8052) | Move WebHdfsFileSystem into hadoop-hdfs-client | Major | build | Haohui Mai | Haohui Mai | -| [YARN-4023](https://issues.apache.org/jira/browse/YARN-4023) | Publish Application Priority to TimelineServer | Major | timelineserver | Sunil G | Sunil G | +| [YARN-4023](https://issues.apache.org/jira/browse/YARN-4023) | Publish Application Priority to TimelineServer | Major | timelineserver | Sunil Govindan | Sunil Govindan | | [HDFS-8824](https://issues.apache.org/jira/browse/HDFS-8824) | Do not use small blocks for balancing the cluster | Major | balancer & mover | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | | [YARN-3534](https://issues.apache.org/jira/browse/YARN-3534) | Collect memory/cpu usage on the node | Major | nodemanager, resourcemanager | Íñigo Goiri | Íñigo Goiri | | [HDFS-8801](https://issues.apache.org/jira/browse/HDFS-8801) | Convert BlockInfoUnderConstruction as a feature | Major | namenode | Zhe Zhang | Jing Zhao | @@ -2629,7 +2635,7 @@ | [HDFS-8951](https://issues.apache.org/jira/browse/HDFS-8951) | Move the shortcircuit package to hdfs-client | Major | build | Mingliang Liu | Mingliang Liu | | [HDFS-8248](https://issues.apache.org/jira/browse/HDFS-8248) | Store INodeId instead of the INodeFile object in BlockInfoContiguous | Major | . | Haohui Mai | Haohui Mai | | [HDFS-8962](https://issues.apache.org/jira/browse/HDFS-8962) | Clean up checkstyle warnings in o.a.h.hdfs.DfsClientConf | Major | build | Mingliang Liu | Mingliang Liu | -| [YARN-3250](https://issues.apache.org/jira/browse/YARN-3250) | Support admin cli interface in for Application Priority | Major | resourcemanager | Sunil G | Rohith Sharma K S | +| [YARN-3250](https://issues.apache.org/jira/browse/YARN-3250) | Support admin cli interface in for Application Priority | Major | resourcemanager | Sunil Govindan | Rohith Sharma K S | | [HDFS-8925](https://issues.apache.org/jira/browse/HDFS-8925) | Move BlockReaderLocal to hdfs-client | Major | build | Mingliang Liu | Mingliang Liu | | [HDFS-8980](https://issues.apache.org/jira/browse/HDFS-8980) | Remove unnecessary block replacement in INodeFile | Major | namenode | Jing Zhao | Jing Zhao | | [HDFS-8990](https://issues.apache.org/jira/browse/HDFS-8990) | Move RemoteBlockReader to hdfs-client module | Major | build | Mingliang Liu | Mingliang Liu | @@ -2639,7 +2645,7 @@ | [YARN-3893](https://issues.apache.org/jira/browse/YARN-3893) | Both RM in active state when Admin#transitionToActive failure from refeshAll() | Critical | resourcemanager | Bibin A Chundatt | Bibin A Chundatt | | [HDFS-8890](https://issues.apache.org/jira/browse/HDFS-8890) | Allow admin to specify which blockpools the balancer should run on | Major | balancer & mover | Chris Trezzo | Chris Trezzo | | [YARN-4101](https://issues.apache.org/jira/browse/YARN-4101) | RM should print alert messages if Zookeeper and Resourcemanager gets connection issue | Critical | yarn | Yesha Vora | Xuan Gong | -| [YARN-3970](https://issues.apache.org/jira/browse/YARN-3970) | REST api support for Application Priority | Major | webapp | Sunil G | Naganarasimha G R | +| [YARN-3970](https://issues.apache.org/jira/browse/YARN-3970) | REST api support for Application Priority | Major | webapp | Sunil Govindan | Naganarasimha G R | | [HDFS-9002](https://issues.apache.org/jira/browse/HDFS-9002) | Move o.a.h.hdfs.net/\*Peer classes to hdfs-client | Major | build | Mingliang Liu | Mingliang Liu | | [HDFS-9012](https://issues.apache.org/jira/browse/HDFS-9012) | Move o.a.h.hdfs.protocol.datatransfer.PipelineAck class to hadoop-hdfs-client module | Major | build | Mingliang Liu | Mingliang Liu | | [HDFS-8984](https://issues.apache.org/jira/browse/HDFS-8984) | Move replication queues related methods in FSNamesystem to BlockManager | Major | . | Haohui Mai | Haohui Mai | @@ -2669,7 +2675,7 @@ | [HADOOP-11918](https://issues.apache.org/jira/browse/HADOOP-11918) | Listing an empty s3a root directory throws FileNotFound. | Minor | fs/s3 | Lei (Eddy) Xu | Lei (Eddy) Xu | | [HDFS-8053](https://issues.apache.org/jira/browse/HDFS-8053) | Move DFSIn/OutputStream and related classes to hadoop-hdfs-client | Major | build | Haohui Mai | Mingliang Liu | | [HDFS-8740](https://issues.apache.org/jira/browse/HDFS-8740) | Move DistributedFileSystem to hadoop-hdfs-client | Major | build | Yi Liu | Mingliang Liu | -| [YARN-4141](https://issues.apache.org/jira/browse/YARN-4141) | Runtime Application Priority change should not throw exception for applications at finishing states | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-4141](https://issues.apache.org/jira/browse/YARN-4141) | Runtime Application Priority change should not throw exception for applications at finishing states | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-9165](https://issues.apache.org/jira/browse/HDFS-9165) | Move entries in META-INF/services/o.a.h.fs.FileSystem to hdfs-client | Major | build | Haohui Mai | Mingliang Liu | | [HDFS-9166](https://issues.apache.org/jira/browse/HDFS-9166) | Move hftp / hsftp filesystem to hfds-client | Major | build | Haohui Mai | Mingliang Liu | | [HDFS-8971](https://issues.apache.org/jira/browse/HDFS-8971) | Remove guards when calling LOG.debug() and LOG.trace() in client package | Major | build | Mingliang Liu | Mingliang Liu | @@ -2691,7 +2697,7 @@ | [HDFS-9157](https://issues.apache.org/jira/browse/HDFS-9157) | [OEV and OIV] : Unnecessary parsing for mandatory arguements if "-h" option is specified as the only option | Major | . | nijel | nijel | | [HADOOP-12475](https://issues.apache.org/jira/browse/HADOOP-12475) | Replace guava Cache with ConcurrentHashMap for caching Connection in ipc Client | Major | conf, io, ipc | Walter Su | Walter Su | | [YARN-4162](https://issues.apache.org/jira/browse/YARN-4162) | CapacityScheduler: Add resource usage by partition and queue capacity by partition to REST API | Major | api, client, resourcemanager | Naganarasimha G R | Naganarasimha G R | -| [YARN-4170](https://issues.apache.org/jira/browse/YARN-4170) | AM need to be notified with priority in AllocateResponse | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-4170](https://issues.apache.org/jira/browse/YARN-4170) | AM need to be notified with priority in AllocateResponse | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-2556](https://issues.apache.org/jira/browse/YARN-2556) | Tool to measure the performance of the timeline server | Major | timelineserver | Jonathan Eagles | Chang Li | | [YARN-4262](https://issues.apache.org/jira/browse/YARN-4262) | Allow whitelisted users to run privileged docker containers. | Major | yarn | Sidharta Seethana | Sidharta Seethana | | [YARN-4267](https://issues.apache.org/jira/browse/YARN-4267) | Add additional logging to container launch implementations in container-executor | Major | yarn | Sidharta Seethana | Sidharta Seethana | @@ -2703,7 +2709,7 @@ | [YARN-3724](https://issues.apache.org/jira/browse/YARN-3724) | Use POSIX nftw(3) instead of fts(3) | Major | . | Malcolm Kavalsky | Alan Burlison | | [YARN-2729](https://issues.apache.org/jira/browse/YARN-2729) | Support script based NodeLabelsProvider Interface in Distributed Node Label Configuration Setup | Major | nodemanager | Naganarasimha G R | Naganarasimha G R | | [HDFS-9304](https://issues.apache.org/jira/browse/HDFS-9304) | Add HdfsClientConfigKeys class to TestHdfsConfigFields#configurationClasses | Major | build | Mingliang Liu | Mingliang Liu | -| [YARN-3216](https://issues.apache.org/jira/browse/YARN-3216) | Max-AM-Resource-Percentage should respect node labels | Critical | resourcemanager | Wangda Tan | Sunil G | +| [YARN-3216](https://issues.apache.org/jira/browse/YARN-3216) | Max-AM-Resource-Percentage should respect node labels | Critical | resourcemanager | Wangda Tan | Sunil Govindan | | [HADOOP-12457](https://issues.apache.org/jira/browse/HADOOP-12457) | [JDK8] Fix a failure of compiling common by javadoc | Major | . | Tsuyoshi Ozawa | Akira Ajisaka | | [HDFS-9168](https://issues.apache.org/jira/browse/HDFS-9168) | Move client side unit test to hadoop-hdfs-client | Major | build | Haohui Mai | Haohui Mai | | [HDFS-9343](https://issues.apache.org/jira/browse/HDFS-9343) | Empty caller context considered invalid | Major | . | Mingliang Liu | Mingliang Liu | @@ -2723,7 +2729,7 @@ | [HADOOP-11954](https://issues.apache.org/jira/browse/HADOOP-11954) | Solaris does not support RLIMIT\_MEMLOCK as in Linux | Major | . | Malcolm Kavalsky | Alan Burlison | | [YARN-4384](https://issues.apache.org/jira/browse/YARN-4384) | updateNodeResource CLI should not accept negative values for resource | Major | graceful, resourcemanager | Sushmitha Sreenivasan | Junping Du | | [HDFS-9438](https://issues.apache.org/jira/browse/HDFS-9438) | TestPipelinesFailover assumes Linux ifconfig | Minor | test | Alan Burlison | John Zhuge | -| [YARN-4292](https://issues.apache.org/jira/browse/YARN-4292) | ResourceUtilization should be a part of NodeInfo REST API | Major | . | Wangda Tan | Sunil G | +| [YARN-4292](https://issues.apache.org/jira/browse/YARN-4292) | ResourceUtilization should be a part of NodeInfo REST API | Major | . | Wangda Tan | Sunil Govindan | | [HDFS-9436](https://issues.apache.org/jira/browse/HDFS-9436) | Make NNThroughputBenchmark$BlockReportStats run with 10 datanodes by default | Minor | test | Mingliang Liu | Mingliang Liu | | [HDFS-9484](https://issues.apache.org/jira/browse/HDFS-9484) | NNThroughputBenchmark$BlockReportStats should not send empty block reports | Major | test | Mingliang Liu | Mingliang Liu | | [YARN-4405](https://issues.apache.org/jira/browse/YARN-4405) | Support node label store in non-appendable file system | Major | api, client, resourcemanager | Wangda Tan | Wangda Tan | @@ -2732,9 +2738,9 @@ | [YARN-4358](https://issues.apache.org/jira/browse/YARN-4358) | Improve relationship between SharingPolicy and ReservationAgent | Major | capacityscheduler, fairscheduler, resourcemanager | Carlo Curino | Carlo Curino | | [YARN-3946](https://issues.apache.org/jira/browse/YARN-3946) | Update exact reason as to why a submitted app is in ACCEPTED state to app's diagnostic message | Major | capacity scheduler, resourcemanager | Sumit Nigam | Naganarasimha G R | | [YARN-4309](https://issues.apache.org/jira/browse/YARN-4309) | Add container launch related debug information to container logs when a container fails | Major | nodemanager | Varun Vasudev | Varun Vasudev | -| [YARN-4293](https://issues.apache.org/jira/browse/YARN-4293) | ResourceUtilization should be a part of yarn node CLI | Major | . | Wangda Tan | Sunil G | +| [YARN-4293](https://issues.apache.org/jira/browse/YARN-4293) | ResourceUtilization should be a part of yarn node CLI | Major | . | Wangda Tan | Sunil Govindan | | [YARN-4416](https://issues.apache.org/jira/browse/YARN-4416) | Deadlock due to synchronised get Methods in AbstractCSQueue | Minor | capacity scheduler, resourcemanager | Naganarasimha G R | Naganarasimha G R | -| [YARN-3226](https://issues.apache.org/jira/browse/YARN-3226) | UI changes for decommissioning node | Major | graceful | Junping Du | Sunil G | +| [YARN-3226](https://issues.apache.org/jira/browse/YARN-3226) | UI changes for decommissioning node | Major | graceful | Junping Du | Sunil Govindan | | [YARN-4164](https://issues.apache.org/jira/browse/YARN-4164) | Retrospect update ApplicationPriority API return type | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | | [YARN-4234](https://issues.apache.org/jira/browse/YARN-4234) | New put APIs in TimelineClient for ats v1.5 | Major | timelineserver | Xuan Gong | Xuan Gong | | [YARN-4098](https://issues.apache.org/jira/browse/YARN-4098) | Document ApplicationPriority feature | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | @@ -2746,10 +2752,10 @@ | [YARN-4537](https://issues.apache.org/jira/browse/YARN-4537) | Pull out priority comparison from fifocomparator and use compound comparator for FifoOrdering policy | Major | capacity scheduler | Rohith Sharma K S | Rohith Sharma K S | | [HADOOP-11262](https://issues.apache.org/jira/browse/HADOOP-11262) | Enable YARN to use S3A | Major | fs/s3 | Thomas Demoor | Pieter Reuse | | [YARN-4265](https://issues.apache.org/jira/browse/YARN-4265) | Provide new timeline plugin storage to support fine-grained entity caching | Major | timelineserver | Li Lu | Li Lu | -| [YARN-4304](https://issues.apache.org/jira/browse/YARN-4304) | AM max resource configuration per partition to be displayed/updated correctly in UI and in various partition related metrics | Major | webapp | Sunil G | Sunil G | +| [YARN-4304](https://issues.apache.org/jira/browse/YARN-4304) | AM max resource configuration per partition to be displayed/updated correctly in UI and in various partition related metrics | Major | webapp | Sunil Govindan | Sunil Govindan | | [YARN-4557](https://issues.apache.org/jira/browse/YARN-4557) | Fix improper Queues sorting in PartitionedQueueComparator when accessible-node-labels=\* | Major | resourcemanager | Naganarasimha G R | Naganarasimha G R | | [HDFS-9601](https://issues.apache.org/jira/browse/HDFS-9601) | NNThroughputBenchmark.BlockReportStats should handle NotReplicatedYetException on adding block | Major | test | Masatake Iwasaki | Masatake Iwasaki | -| [YARN-4614](https://issues.apache.org/jira/browse/YARN-4614) | TestApplicationPriority#testApplicationPriorityAllocationWithChangeInPriority fails occasionally | Major | test | Jason Lowe | Sunil G | +| [YARN-4614](https://issues.apache.org/jira/browse/YARN-4614) | TestApplicationPriority#testApplicationPriorityAllocationWithChangeInPriority fails occasionally | Major | test | Jason Lowe | Sunil Govindan | | [HDFS-9672](https://issues.apache.org/jira/browse/HDFS-9672) | o.a.h.hdfs.TestLeaseRecovery2 fails intermittently | Major | test | Mingliang Liu | Mingliang Liu | | [YARN-4573](https://issues.apache.org/jira/browse/YARN-4573) | TestRMAppTransitions.testAppRunningKill and testAppKilledKilled fail on trunk | Major | resourcemanager, test | Takashi Ohnishi | Takashi Ohnishi | | [YARN-4643](https://issues.apache.org/jira/browse/YARN-4643) | Container recovery is broken with delegating container runtime | Critical | yarn | Sidharta Seethana | Sidharta Seethana | @@ -2782,7 +2788,7 @@ | [YARN-998](https://issues.apache.org/jira/browse/YARN-998) | Keep NM resource updated through dynamic resource config for RM/NM restart | Major | graceful, nodemanager, scheduler | Junping Du | Junping Du | | [MAPREDUCE-6543](https://issues.apache.org/jira/browse/MAPREDUCE-6543) | Migrate MR Client test cases part 2 | Trivial | test | Dustin Cote | Dustin Cote | | [YARN-4822](https://issues.apache.org/jira/browse/YARN-4822) | Refactor existing Preemption Policy of CS for easier adding new approach to select preemption candidates | Major | . | Wangda Tan | Wangda Tan | -| [YARN-4634](https://issues.apache.org/jira/browse/YARN-4634) | Scheduler UI/Metrics need to consider cases like non-queue label mappings | Major | . | Sunil G | Sunil G | +| [YARN-4634](https://issues.apache.org/jira/browse/YARN-4634) | Scheduler UI/Metrics need to consider cases like non-queue label mappings | Major | . | Sunil Govindan | Sunil Govindan | | [HADOOP-12169](https://issues.apache.org/jira/browse/HADOOP-12169) | ListStatus on empty dir in S3A lists itself instead of returning an empty list | Major | fs/s3 | Pieter Reuse | Pieter Reuse | | [HDFS-10186](https://issues.apache.org/jira/browse/HDFS-10186) | DirectoryScanner: Improve logs by adding full path of both actual and expected block directories | Minor | datanode | Rakesh R | Rakesh R | | [YARN-4826](https://issues.apache.org/jira/browse/YARN-4826) | Document configuration of ReservationSystem for CapacityScheduler | Minor | capacity scheduler | Subru Krishnan | Subru Krishnan | @@ -2842,7 +2848,7 @@ | [HADOOP-13283](https://issues.apache.org/jira/browse/HADOOP-13283) | Support reset operation for new global storage statistics and per FS storage stats | Major | fs | Mingliang Liu | Mingliang Liu | | [YARN-5080](https://issues.apache.org/jira/browse/YARN-5080) | Cannot obtain logs using YARN CLI -am for either KILLED or RUNNING AM | Critical | yarn | Sumana Sathish | Xuan Gong | | [HADOOP-13366](https://issues.apache.org/jira/browse/HADOOP-13366) | Fix dead link in o.a.h.fs.CommonConfigurationKeysPublic javadoc | Minor | documentation | Rakesh R | Rakesh R | -| [YARN-4484](https://issues.apache.org/jira/browse/YARN-4484) | Available Resource calculation for a queue is not correct when used with labels | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-4484](https://issues.apache.org/jira/browse/YARN-4484) | Available Resource calculation for a queue is not correct when used with labels | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [HADOOP-13368](https://issues.apache.org/jira/browse/HADOOP-13368) | DFSOpsCountStatistics$OpType#fromSymbol and s3a.Statistic#fromSymbol should be O(1) operation | Major | fs | Mingliang Liu | Mingliang Liu | | [HADOOP-13212](https://issues.apache.org/jira/browse/HADOOP-13212) | Provide an option to set the socket buffers in S3AFileSystem | Minor | fs/s3 | Rajesh Balamohan | Rajesh Balamohan | | [HDFS-10653](https://issues.apache.org/jira/browse/HDFS-10653) | Optimize conversion from path string to components | Major | hdfs | Daryn Sharp | Daryn Sharp | @@ -2855,7 +2861,7 @@ | [HADOOP-13426](https://issues.apache.org/jira/browse/HADOOP-13426) | More efficiently build IPC responses | Major | . | Daryn Sharp | Daryn Sharp | | [HDFS-10656](https://issues.apache.org/jira/browse/HDFS-10656) | Optimize conversion of byte arrays back to path string | Major | hdfs | Daryn Sharp | Daryn Sharp | | [HDFS-10674](https://issues.apache.org/jira/browse/HDFS-10674) | Optimize creating a full path from an inode | Major | hdfs | Daryn Sharp | Daryn Sharp | -| [YARN-5342](https://issues.apache.org/jira/browse/YARN-5342) | Improve non-exclusive node partition resource allocation in Capacity Scheduler | Major | . | Wangda Tan | Sunil G | +| [YARN-5342](https://issues.apache.org/jira/browse/YARN-5342) | Improve non-exclusive node partition resource allocation in Capacity Scheduler | Major | . | Wangda Tan | Sunil Govindan | | [HADOOP-13438](https://issues.apache.org/jira/browse/HADOOP-13438) | Optimize IPC server protobuf decoding | Major | . | Daryn Sharp | Daryn Sharp | | [HADOOP-13418](https://issues.apache.org/jira/browse/HADOOP-13418) | Fix javadoc warnings by JDK8 in hadoop-nfs package | Major | . | Kai Sasaki | Kai Sasaki | | [HDFS-10724](https://issues.apache.org/jira/browse/HDFS-10724) | Document the caller context config keys | Minor | ipc, namenode | Mingliang Liu | Mingliang Liu | @@ -2945,7 +2951,7 @@ | [YARN-4390](https://issues.apache.org/jira/browse/YARN-4390) | Do surgical preemption based on reserved container in CapacityScheduler | Major | capacity scheduler | Eric Payne | Wangda Tan | | [HDFS-8630](https://issues.apache.org/jira/browse/HDFS-8630) | WebHDFS : Support get/set/unset StoragePolicy | Major | webhdfs | nijel | Surendra Singh Lilhore | | [HADOOP-13871](https://issues.apache.org/jira/browse/HADOOP-13871) | ITestS3AInputStreamPerformance.testTimeToOpenAndReadWholeFileBlocks performance awful | Major | fs/s3 | Steve Loughran | Steve Loughran | -| [YARN-2009](https://issues.apache.org/jira/browse/YARN-2009) | CapacityScheduler: Add intra-queue preemption for app priority support | Major | capacityscheduler | Devaraj K | Sunil G | +| [YARN-2009](https://issues.apache.org/jira/browse/YARN-2009) | CapacityScheduler: Add intra-queue preemption for app priority support | Major | capacityscheduler | Devaraj K | Sunil Govindan | | [YARN-4844](https://issues.apache.org/jira/browse/YARN-4844) | Add getMemorySize/getVirtualCoresSize to o.a.h.y.api.records.Resource | Blocker | api | Wangda Tan | Wangda Tan | | [YARN-4990](https://issues.apache.org/jira/browse/YARN-4990) | Re-direction of a particular log file within in a container in NM UI does not redirect properly to Log Server ( history ) on container completion | Major | . | Hitesh Shah | Xuan Gong | | [YARN-3866](https://issues.apache.org/jira/browse/YARN-3866) | AM-RM protocol changes to support container resizing | Blocker | api | MENG DING | MENG DING | @@ -2953,7 +2959,7 @@ | [HDFS-8377](https://issues.apache.org/jira/browse/HDFS-8377) | Support HTTP/2 in datanode | Major | . | Duo Zhang | Duo Zhang | | [HADOOP-14019](https://issues.apache.org/jira/browse/HADOOP-14019) | fix some typos in the s3a docs | Minor | documentation, fs/s3 | Steve Loughran | Steve Loughran | | [HADOOP-14081](https://issues.apache.org/jira/browse/HADOOP-14081) | S3A: Consider avoiding array copy in S3ABlockOutputStream (ByteArrayBlock) | Minor | fs/s3 | Rajesh Balamohan | Rajesh Balamohan | -| [YARN-6143](https://issues.apache.org/jira/browse/YARN-6143) | Fix incompatible issue caused by YARN-3583 | Blocker | rolling upgrade | Wangda Tan | Sunil G | +| [YARN-6143](https://issues.apache.org/jira/browse/YARN-6143) | Fix incompatible issue caused by YARN-3583 | Blocker | rolling upgrade | Wangda Tan | Sunil Govindan | | [HADOOP-14113](https://issues.apache.org/jira/browse/HADOOP-14113) | review ADL Docs | Minor | documentation, fs/adl | Steve Loughran | Steve Loughran | | [HADOOP-14123](https://issues.apache.org/jira/browse/HADOOP-14123) | Remove misplaced ADL service provider config file for FileSystem | Minor | fs/adl | John Zhuge | John Zhuge | | [HADOOP-14153](https://issues.apache.org/jira/browse/HADOOP-14153) | ADL module has messed doc structure | Major | fs/adl | Mingliang Liu | Mingliang Liu | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/RELEASENOTES.2.8.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/RELEASENOTES.2.8.0.md index 646a9b6b93d..b0b03e81938 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/RELEASENOTES.2.8.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.0/RELEASENOTES.2.8.0.md @@ -734,6 +734,13 @@ This new dfsadmin command, evictWriters, stops active block writing activities o Add new flag to allow supporting path style addressing for s3a +--- + +* [HDFS-9412](https://issues.apache.org/jira/browse/HDFS-9412) | *Major* | **getBlocks occupies FSLock and takes too long to complete** + +Skip blocks with size below dfs.balancer.getBlocks.min-block-size (default 10MB) when a balancer asks for a list of blocks. + + --- * [HDFS-3702](https://issues.apache.org/jira/browse/HDFS-3702) | *Minor* | **Add an option for NOT writing the blocks locally if there is a datanode on the same box as the client** diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.1/CHANGES.2.8.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.1/CHANGELOG.2.8.1.md similarity index 100% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.1/CHANGES.2.8.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.1/CHANGELOG.2.8.1.md diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/CHANGES.2.8.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/CHANGELOG.2.8.2.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/CHANGES.2.8.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/CHANGELOG.2.8.2.md index 844db2421ba..2047c8d29d6 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/CHANGES.2.8.2.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/CHANGELOG.2.8.2.md @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. --> -# "Apache Hadoop" Changelog +# Apache Hadoop Changelog ## Release 2.8.2 - 2017-10-24 @@ -24,21 +24,10 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | Add ability to secure log servlet using proxy users | Major | . | Jeffrey E Rodriguez | Yuanbo Liu | | [HADOOP-14174](https://issues.apache.org/jira/browse/HADOOP-14174) | Set default ADLS access token provider type to ClientCredential | Major | fs/adl | John Zhuge | John Zhuge | -### IMPORTANT ISSUES: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | - - -### NEW FEATURES: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | - - ### IMPROVEMENTS: | JIRA | Summary | Priority | Component | Reporter | Contributor | @@ -59,7 +48,7 @@ | [HADOOP-14233](https://issues.apache.org/jira/browse/HADOOP-14233) | Delay construction of PreCondition.check failure message in Configuration#set | Major | . | Jonathan Eagles | Jonathan Eagles | | [HADOOP-14240](https://issues.apache.org/jira/browse/HADOOP-14240) | Configuration#get return value optimization | Major | . | Jonathan Eagles | Jonathan Eagles | | [YARN-6339](https://issues.apache.org/jira/browse/YARN-6339) | Improve performance for createAndGetApplicationReport | Major | . | yunjiong zhao | yunjiong zhao | -| [HDFS-9705](https://issues.apache.org/jira/browse/HDFS-9705) | Refine the behaviour of getFileChecksum when length = 0 | Minor | . | Kai Zheng | SammiChen | +| [HDFS-9705](https://issues.apache.org/jira/browse/HDFS-9705) | Refine the behaviour of getFileChecksum when length = 0 | Minor | . | Kai Zheng | Sammi Chen | | [HDFS-11628](https://issues.apache.org/jira/browse/HDFS-11628) | Clarify the behavior of HDFS Mover in documentation | Major | documentation | Xiaobing Zhou | Xiaobing Zhou | | [HADOOP-14104](https://issues.apache.org/jira/browse/HADOOP-14104) | Client should always ask namenode for kms provider path. | Major | kms | Rushabh S Shah | Rushabh S Shah | | [HADOOP-14276](https://issues.apache.org/jira/browse/HADOOP-14276) | Add a nanosecond API to Time/Timer/FakeTimer | Minor | util | Erik Krogen | Erik Krogen | @@ -122,7 +111,6 @@ | [YARN-6031](https://issues.apache.org/jira/browse/YARN-6031) | Application recovery has failed when node label feature is turned off during RM recovery | Minor | scheduler | Ying Zhang | Ying Zhang | | [YARN-6137](https://issues.apache.org/jira/browse/YARN-6137) | Yarn client implicitly invoke ATS client which accesses HDFS | Major | . | Yesha Vora | Li Lu | | [HADOOP-13433](https://issues.apache.org/jira/browse/HADOOP-13433) | Race in UGI.reloginFromKeytab | Major | security | Duo Zhang | Duo Zhang | -| [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | Add ability to secure log servlet using proxy users | Major | . | Jeffrey E Rodriguez | Yuanbo Liu | | [HADOOP-14058](https://issues.apache.org/jira/browse/HADOOP-14058) | Fix NativeS3FileSystemContractBaseTest#testDirWithDifferentMarkersWorks | Minor | fs/s3, test | Akira Ajisaka | Yiqun Lin | | [HDFS-11084](https://issues.apache.org/jira/browse/HDFS-11084) | Add a regression test for sticky bit support of OIV ReverseXML processor | Major | tools | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-11391](https://issues.apache.org/jira/browse/HDFS-11391) | Numeric usernames do no work with WebHDFS FS (write access) | Major | webhdfs | Pierre Villard | Pierre Villard | @@ -252,7 +240,7 @@ | [HDFS-11945](https://issues.apache.org/jira/browse/HDFS-11945) | Internal lease recovery may not be retried for a long time | Major | namenode | Kihwal Lee | Kihwal Lee | | [HADOOP-14511](https://issues.apache.org/jira/browse/HADOOP-14511) | WritableRpcEngine.Invocation#toString NPE on null parameters | Minor | ipc | John Zhuge | John Zhuge | | [HADOOP-14512](https://issues.apache.org/jira/browse/HADOOP-14512) | WASB atomic rename should not throw exception if the file is neither in src nor in dst when doing the rename | Major | fs/azure | Duo Xu | Duo Xu | -| [YARN-6585](https://issues.apache.org/jira/browse/YARN-6585) | RM fails to start when upgrading from 2.7 to 2.8 for clusters with node labels. | Blocker | . | Eric Payne | Sunil G | +| [YARN-6585](https://issues.apache.org/jira/browse/YARN-6585) | RM fails to start when upgrading from 2.7 to 2.8 for clusters with node labels. | Blocker | . | Eric Payne | Sunil Govindan | | [HDFS-11967](https://issues.apache.org/jira/browse/HDFS-11967) | TestJMXGet fails occasionally | Major | . | Arpit Agarwal | Arpit Agarwal | | [YARN-6719](https://issues.apache.org/jira/browse/YARN-6719) | Fix findbugs warnings in SLSCapacityScheduler.java | Major | . | Akira Ajisaka | Akira Ajisaka | | [HADOOP-14540](https://issues.apache.org/jira/browse/HADOOP-14540) | Replace MRv1 specific terms in HostsFileReader | Minor | documentation | Akira Ajisaka | hu xiaodong | @@ -296,9 +284,9 @@ | [HDFS-11896](https://issues.apache.org/jira/browse/HDFS-11896) | Non-dfsUsed will be doubled on dead node re-registration | Blocker | . | Brahma Reddy Battula | Brahma Reddy Battula | | [YARN-5728](https://issues.apache.org/jira/browse/YARN-5728) | TestMiniYarnClusterNodeUtilization.testUpdateNodeUtilization timeout | Major | test | Akira Ajisaka | Akira Ajisaka | | [YARN-6628](https://issues.apache.org/jira/browse/YARN-6628) | Unexpected jackson-core-2.2.3 dependency introduced | Blocker | timelineserver | Jason Lowe | Jonathan Eagles | -| [YARN-5731](https://issues.apache.org/jira/browse/YARN-5731) | Preemption calculation is not accurate when reserved containers are present in queue. | Major | capacity scheduler | Sunil G | Wangda Tan | +| [YARN-5731](https://issues.apache.org/jira/browse/YARN-5731) | Preemption calculation is not accurate when reserved containers are present in queue. | Major | capacity scheduler | Sunil Govindan | Wangda Tan | | [HADOOP-14683](https://issues.apache.org/jira/browse/HADOOP-14683) | FileStatus.compareTo binary compatible issue | Blocker | . | Sergey Shelukhin | Akira Ajisaka | -| [YARN-6872](https://issues.apache.org/jira/browse/YARN-6872) | Ensure apps could run given NodeLabels are disabled post RM switchover/restart | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-6872](https://issues.apache.org/jira/browse/YARN-6872) | Ensure apps could run given NodeLabels are disabled post RM switchover/restart | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-6846](https://issues.apache.org/jira/browse/YARN-6846) | Nodemanager can fail to fully delete application local directories when applications are killed | Critical | nodemanager | Jason Lowe | Jason Lowe | | [MAPREDUCE-6927](https://issues.apache.org/jira/browse/MAPREDUCE-6927) | MR job should only set tracking url if history was successfully written | Major | . | Eric Badger | Eric Badger | | [YARN-6890](https://issues.apache.org/jira/browse/YARN-6890) | If UI is not secured, we allow user to kill other users' job even yarn cluster is secured. | Critical | . | Sumana Sathish | Junping Du | @@ -326,12 +314,13 @@ | [HADOOP-14842](https://issues.apache.org/jira/browse/HADOOP-14842) | Hadoop 2.8.2 release build process get stuck due to java issue | Blocker | build | Junping Du | Junping Du | | [YARN-5195](https://issues.apache.org/jira/browse/YARN-5195) | RM intermittently crashed with NPE while handling APP\_ATTEMPT\_REMOVED event when async-scheduling enabled in CapacityScheduler | Major | resourcemanager | Karam Singh | sandflee | | [YARN-7034](https://issues.apache.org/jira/browse/YARN-7034) | DefaultLinuxContainerRuntime and DockerLinuxContainerRuntime sends client environment variables to container-executor | Blocker | nodemanager | Miklos Szegedi | Miklos Szegedi | -| [YARN-7249](https://issues.apache.org/jira/browse/YARN-7249) | Fix CapacityScheduler NPE issue when a container preempted while the node is being removed | Blocker | . | Wangda Tan | Wangda Tan | | [YARN-7325](https://issues.apache.org/jira/browse/YARN-7325) | Remove unused container variable in DockerLinuxContainerRuntime | Minor | nodemanager | Shane Kumpf | Shane Kumpf | | [YARN-7246](https://issues.apache.org/jira/browse/YARN-7246) | Fix the default docker binary path | Blocker | nodemanager | Shane Kumpf | Shane Kumpf | | [YARN-7333](https://issues.apache.org/jira/browse/YARN-7333) | container-executor fails to remove entries from a directory that is not writable or executable | Critical | . | Jason Lowe | Jason Lowe | | [YARN-7230](https://issues.apache.org/jira/browse/YARN-7230) | Document DockerContainerRuntime for branch-2.8 with proper scope and claim as an experimental feature | Blocker | documentation | Junping Du | Shane Kumpf | | [HADOOP-14958](https://issues.apache.org/jira/browse/HADOOP-14958) | CLONE - Fix source-level compatibility after HADOOP-11252 | Blocker | . | Junping Du | Junping Du | +| [YARN-7647](https://issues.apache.org/jira/browse/YARN-7647) | NM print inappropriate error log when node-labels is enabled | Minor | . | Yang Wang | Yang Wang | +| [YARN-7249](https://issues.apache.org/jira/browse/YARN-7249) | Fix CapacityScheduler NPE issue when a container preempted while the node is being removed | Blocker | . | Wangda Tan | Wangda Tan | ### TESTS: @@ -350,7 +339,6 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | -| [HDFS-9754](https://issues.apache.org/jira/browse/HDFS-9754) | Avoid unnecessary getBlockCollection calls in BlockManager | Major | namenode | Jing Zhao | Jing Zhao | | [HADOOP-14032](https://issues.apache.org/jira/browse/HADOOP-14032) | Reduce fair call queue priority inversion | Major | ipc | Daryn Sharp | Daryn Sharp | | [HADOOP-14034](https://issues.apache.org/jira/browse/HADOOP-14034) | Allow ipc layer exceptions to selectively close connections | Major | ipc | Daryn Sharp | Daryn Sharp | | [HADOOP-14033](https://issues.apache.org/jira/browse/HADOOP-14033) | Reduce fair call queue lock contention | Major | ipc | Daryn Sharp | Daryn Sharp | @@ -369,10 +357,11 @@ | [YARN-6682](https://issues.apache.org/jira/browse/YARN-6682) | Improve performance of AssignmentInformation datastructures | Major | . | Daryn Sharp | Daryn Sharp | | [YARN-6680](https://issues.apache.org/jira/browse/YARN-6680) | Avoid locking overhead for NO\_LABEL lookups | Major | resourcemanager | Daryn Sharp | Daryn Sharp | | [YARN-6681](https://issues.apache.org/jira/browse/YARN-6681) | Eliminate double-copy of child queues in canAssignToThisQueue | Major | resourcemanager | Daryn Sharp | Daryn Sharp | -| [YARN-2113](https://issues.apache.org/jira/browse/YARN-2113) | Add cross-user preemption within CapacityScheduler's leaf-queue | Major | capacity scheduler | Vinod Kumar Vavilapalli | Sunil G | +| [YARN-2113](https://issues.apache.org/jira/browse/YARN-2113) | Add cross-user preemption within CapacityScheduler's leaf-queue | Major | capacity scheduler | Vinod Kumar Vavilapalli | Sunil Govindan | | [YARN-6775](https://issues.apache.org/jira/browse/YARN-6775) | CapacityScheduler: Improvements to assignContainers, avoid unnecessary canAssignToUser/Queue calls | Major | capacityscheduler | Nathan Roberts | Nathan Roberts | | [YARN-6988](https://issues.apache.org/jira/browse/YARN-6988) | container-executor fails for docker when command length \> 4096 B | Major | yarn | Eric Badger | Eric Badger | | [HDFS-12473](https://issues.apache.org/jira/browse/HDFS-12473) | Change hosts JSON file format | Major | . | Ming Ma | Ming Ma | +| [HDFS-9754](https://issues.apache.org/jira/browse/HDFS-9754) | Avoid unnecessary getBlockCollection calls in BlockManager | Major | namenode | Jing Zhao | Jing Zhao | ### OTHER: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/RELEASENOTES.2.8.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/RELEASENOTES.2.8.2.md index 528fbd65869..0f88b1bb72c 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/RELEASENOTES.2.8.2.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/RELEASENOTES.2.8.2.md @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. --> -# "Apache Hadoop" 2.8.2 Release Notes +# Apache Hadoop 2.8.2 Release Notes These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. @@ -28,6 +28,13 @@ These release notes cover new developer and user-facing incompatibilities, impor Permissions are now checked when moving a file to Trash. +--- + +* [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | *Major* | **Add ability to secure log servlet using proxy users** + +**WARNING: No release note provided for this change.** + + --- * [HDFS-11499](https://issues.apache.org/jira/browse/HDFS-11499) | *Major* | **Decommissioning stuck because of failing recovery** diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/CHANGELOG.2.8.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/CHANGELOG.2.8.3.md new file mode 100644 index 00000000000..5cdfd9c432d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/CHANGELOG.2.8.3.md @@ -0,0 +1,129 @@ + + +# Apache Hadoop Changelog + +## Release 2.8.3 - 2017-12-12 + +### INCOMPATIBLE CHANGES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-13588](https://issues.apache.org/jira/browse/HADOOP-13588) | ConfServlet should respect Accept request header | Major | conf | Weiwei Yang | Weiwei Yang | +| [HADOOP-14260](https://issues.apache.org/jira/browse/HADOOP-14260) | Configuration.dumpConfiguration should redact sensitive information | Major | conf, security | Vihang Karajgaonkar | John Zhuge | + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-13933](https://issues.apache.org/jira/browse/HADOOP-13933) | Add haadmin -getAllServiceState option to get the HA state of all the NameNodes/ResourceManagers | Major | tools | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-10480](https://issues.apache.org/jira/browse/HDFS-10480) | Add an admin command to list currently open files | Major | . | Kihwal Lee | Manoj Govindassamy | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-13628](https://issues.apache.org/jira/browse/HADOOP-13628) | Support to retrieve specific property from configuration via REST API | Major | conf | Weiwei Yang | Weiwei Yang | +| [HDFS-12143](https://issues.apache.org/jira/browse/HDFS-12143) | Improve performance of getting and removing inode features | Major | namenode | Daryn Sharp | Daryn Sharp | +| [HDFS-12171](https://issues.apache.org/jira/browse/HDFS-12171) | Reduce IIP object allocations for inode lookup | Major | namenode | Daryn Sharp | Daryn Sharp | +| [HADOOP-14455](https://issues.apache.org/jira/browse/HADOOP-14455) | ViewFileSystem#rename should support be supported within same nameservice with different mountpoints | Major | viewfs | Brahma Reddy Battula | Brahma Reddy Battula | +| [HADOOP-14627](https://issues.apache.org/jira/browse/HADOOP-14627) | Support MSI and DeviceCode token provider in ADLS | Major | fs/adl | Atul Sikaria | Atul Sikaria | +| [YARN-7037](https://issues.apache.org/jira/browse/YARN-7037) | Optimize data transfer with zero-copy approach for containerlogs REST API in NMWebServices | Major | nodemanager | Tao Yang | Tao Yang | +| [HADOOP-14827](https://issues.apache.org/jira/browse/HADOOP-14827) | Allow StopWatch to accept a Timer parameter for tests | Minor | common, test | Erik Krogen | Erik Krogen | +| [HDFS-12131](https://issues.apache.org/jira/browse/HDFS-12131) | Add some of the FSNamesystem JMX values as metrics | Minor | hdfs, namenode | Erik Krogen | Erik Krogen | +| [HADOOP-14844](https://issues.apache.org/jira/browse/HADOOP-14844) | Remove requirement to specify TenantGuid for MSI Token Provider | Major | fs/adl | Atul Sikaria | Atul Sikaria | +| [HADOOP-14864](https://issues.apache.org/jira/browse/HADOOP-14864) | FSDataInputStream#unbuffer UOE should include stream class name | Minor | fs | John Zhuge | Bharat Viswanadham | +| [HDFS-12441](https://issues.apache.org/jira/browse/HDFS-12441) | Suppress UnresolvedPathException in namenode log | Minor | . | Kihwal Lee | Kihwal Lee | +| [MAPREDUCE-6958](https://issues.apache.org/jira/browse/MAPREDUCE-6958) | Shuffle audit logger should log size of shuffle transfer | Minor | . | Jason Lowe | Jason Lowe | +| [HDFS-12420](https://issues.apache.org/jira/browse/HDFS-12420) | Add an option to disallow 'namenode format -force' | Major | . | Ajay Kumar | Ajay Kumar | +| [HADOOP-14521](https://issues.apache.org/jira/browse/HADOOP-14521) | KMS client needs retry logic | Major | . | Rushabh S Shah | Rushabh S Shah | +| [HDFS-12603](https://issues.apache.org/jira/browse/HDFS-12603) | Enable async edit logging by default | Major | namenode | Andrew Wang | Andrew Wang | +| [HDFS-12642](https://issues.apache.org/jira/browse/HDFS-12642) | Log block and datanode details in BlockRecoveryWorker | Major | datanode | Xiao Chen | Xiao Chen | +| [HADOOP-14880](https://issues.apache.org/jira/browse/HADOOP-14880) | [KMS] Document&test missing KMS client side configs | Minor | . | Wei-Chiu Chuang | Gabor Bota | +| [HDFS-12619](https://issues.apache.org/jira/browse/HDFS-12619) | Do not catch and throw unchecked exceptions if IBRs fail to process | Minor | namenode | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-4163](https://issues.apache.org/jira/browse/YARN-4163) | Audit getQueueInfo and getApplications calls | Major | . | Chang Li | Chang Li | +| [MAPREDUCE-6975](https://issues.apache.org/jira/browse/MAPREDUCE-6975) | Logging task counters | Major | task | Prabhu Joseph | Prabhu Joseph | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-5444](https://issues.apache.org/jira/browse/YARN-5444) | Fix failing unit tests in TestLinuxContainerExecutorWithMocks | Major | nodemanager | Yufei Gu | Yufei Gu | +| [MAPREDUCE-6808](https://issues.apache.org/jira/browse/MAPREDUCE-6808) | Log map attempts as part of shuffle handler audit log | Major | . | Jonathan Eagles | Gergő Pásztor | +| [HADOOP-14578](https://issues.apache.org/jira/browse/HADOOP-14578) | Bind IPC connections to kerberos UPN host for proxy users | Major | ipc | Daryn Sharp | Daryn Sharp | +| [HADOOP-14677](https://issues.apache.org/jira/browse/HADOOP-14677) | mvn clean compile fails | Major | build | Andras Bokor | Andras Bokor | +| [HADOOP-14702](https://issues.apache.org/jira/browse/HADOOP-14702) | Fix formatting issue and regression caused by conversion from APT to Markdown | Minor | documentation | Doris Gu | Doris Gu | +| [YARN-6965](https://issues.apache.org/jira/browse/YARN-6965) | Duplicate instantiation in FairSchedulerQueueInfo | Minor | fairscheduler | Masahiro Tanaka | Masahiro Tanaka | +| [HDFS-11738](https://issues.apache.org/jira/browse/HDFS-11738) | Hedged pread takes more time when block moved from initial locations | Major | hdfs-client | Vinayakumar B | Vinayakumar B | +| [HDFS-12318](https://issues.apache.org/jira/browse/HDFS-12318) | Fix IOException condition for openInfo in DFSInputStream | Major | . | legend | legend | +| [HDFS-12336](https://issues.apache.org/jira/browse/HDFS-12336) | Listing encryption zones still fails when deleted EZ is not a direct child of snapshottable directory | Minor | encryption, hdfs | Wellington Chevreuil | Wellington Chevreuil | +| [MAPREDUCE-6931](https://issues.apache.org/jira/browse/MAPREDUCE-6931) | Remove TestDFSIO "Total Throughput" calculation | Critical | benchmarks, test | Dennis Huo | Dennis Huo | +| [YARN-7116](https://issues.apache.org/jira/browse/YARN-7116) | CapacityScheduler Web UI: Queue's AM usage is always show on per-user's AM usage. | Major | capacity scheduler, webapp | Wangda Tan | Wangda Tan | +| [HADOOP-14824](https://issues.apache.org/jira/browse/HADOOP-14824) | Update ADLS SDK to 2.2.2 for MSI fix | Major | fs/adl | Atul Sikaria | Atul Sikaria | +| [YARN-7120](https://issues.apache.org/jira/browse/YARN-7120) | CapacitySchedulerPage NPE in "Aggregate scheduler counts" section | Minor | . | Eric Payne | Eric Payne | +| [YARN-7164](https://issues.apache.org/jira/browse/YARN-7164) | TestAMRMClientOnRMRestart fails sporadically with bind address in use | Major | test | Jason Lowe | Jason Lowe | +| [HDFS-12369](https://issues.apache.org/jira/browse/HDFS-12369) | Edit log corruption due to hard lease recovery of not-closed file which has snapshots | Major | namenode | Xiao Chen | Xiao Chen | +| [HADOOP-14867](https://issues.apache.org/jira/browse/HADOOP-14867) | Update HDFS Federation setup document, for incorrect property name for secondary name node http address | Major | . | Bharat Viswanadham | Bharat Viswanadham | +| [YARN-4727](https://issues.apache.org/jira/browse/YARN-4727) | Unable to override the $HADOOP\_CONF\_DIR env variable for container | Major | nodemanager | Terence Yim | Jason Lowe | +| [MAPREDUCE-6957](https://issues.apache.org/jira/browse/MAPREDUCE-6957) | shuffle hangs after a node manager connection timeout | Major | mrv2 | Jooseong Kim | Jooseong Kim | +| [HDFS-12424](https://issues.apache.org/jira/browse/HDFS-12424) | Datatable sorting on the Datanode Information page in the Namenode UI is broken | Major | . | Shawna Martell | Shawna Martell | +| [HDFS-12323](https://issues.apache.org/jira/browse/HDFS-12323) | NameNode terminates after full GC thinking QJM unresponsive if full GC is much longer than timeout | Major | namenode, qjm | Erik Krogen | Erik Krogen | +| [MAPREDUCE-6960](https://issues.apache.org/jira/browse/MAPREDUCE-6960) | Shuffle Handler prints disk error stack traces for every read failure. | Major | . | Kuhu Shukla | Kuhu Shukla | +| [HDFS-11799](https://issues.apache.org/jira/browse/HDFS-11799) | Introduce a config to allow setting up write pipeline with fewer nodes than replication factor | Major | . | Yongjun Zhang | Brahma Reddy Battula | +| [YARN-6771](https://issues.apache.org/jira/browse/YARN-6771) | Use classloader inside configuration class to make new classes | Major | . | Jongyoul Lee | Jongyoul Lee | +| [HDFS-12526](https://issues.apache.org/jira/browse/HDFS-12526) | FSDirectory should use Time.monotonicNow for durations | Minor | . | Chetna Chaudhari | Bharat Viswanadham | +| [HDFS-12371](https://issues.apache.org/jira/browse/HDFS-12371) | "BlockVerificationFailures" and "BlocksVerified" show up as 0 in Datanode JMX | Major | metrics | Sai Nukavarapu | Hanisha Koneru | +| [MAPREDUCE-6966](https://issues.apache.org/jira/browse/MAPREDUCE-6966) | DistSum should use Time.monotonicNow for measuring durations | Minor | . | Chetna Chaudhari | Chetna Chaudhari | +| [HDFS-12531](https://issues.apache.org/jira/browse/HDFS-12531) | Fix conflict in the javadoc of UnderReplicatedBlocks.java in branch-2 | Minor | documentation | Akira Ajisaka | Bharat Viswanadham | +| [HDFS-12495](https://issues.apache.org/jira/browse/HDFS-12495) | TestPendingInvalidateBlock#testPendingDeleteUnknownBlocks fails intermittently | Major | . | Eric Badger | Eric Badger | +| [HADOOP-14891](https://issues.apache.org/jira/browse/HADOOP-14891) | Remove references to Guava Objects.toStringHelper | Major | . | Jonathan Eagles | Jonathan Eagles | +| [HADOOP-14902](https://issues.apache.org/jira/browse/HADOOP-14902) | LoadGenerator#genFile write close timing is incorrectly calculated | Major | fs | Jason Lowe | Hanisha Koneru | +| [YARN-7084](https://issues.apache.org/jira/browse/YARN-7084) | TestSchedulingMonitor#testRMStarts fails sporadically | Major | . | Jason Lowe | Jason Lowe | +| [YARN-7226](https://issues.apache.org/jira/browse/YARN-7226) | Whitelisted variables do not support delayed variable expansion | Major | nodemanager | Jason Lowe | Jason Lowe | +| [YARN-7285](https://issues.apache.org/jira/browse/YARN-7285) | ContainerExecutor always launches with priorities due to yarn-default property | Minor | nodemanager | Jason Lowe | Jason Lowe | +| [YARN-7245](https://issues.apache.org/jira/browse/YARN-7245) | Max AM Resource column in Active Users Info section of Capacity Scheduler UI page should be updated per-user | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | +| [HADOOP-14912](https://issues.apache.org/jira/browse/HADOOP-14912) | FairCallQueue may defer servicing calls | Major | ipc | Daryn Sharp | Daryn Sharp | +| [HDFS-12659](https://issues.apache.org/jira/browse/HDFS-12659) | Update TestDeadDatanode#testNonDFSUsedONDeadNodeReReg to increase heartbeat recheck interval | Minor | . | Ajay Kumar | Ajay Kumar | +| [HDFS-12485](https://issues.apache.org/jira/browse/HDFS-12485) | expunge may fail to remove trash from encryption zone | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-7333](https://issues.apache.org/jira/browse/YARN-7333) | container-executor fails to remove entries from a directory that is not writable or executable | Critical | . | Jason Lowe | Jason Lowe | +| [HADOOP-14966](https://issues.apache.org/jira/browse/HADOOP-14966) | Handle JDK-8071638 for hadoop-common | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [HDFS-9914](https://issues.apache.org/jira/browse/HDFS-9914) | Fix configurable WebhDFS connect/read timeout | Blocker | hdfs-client, webhdfs | Xiaoyu Yao | Xiaoyu Yao | +| [YARN-7244](https://issues.apache.org/jira/browse/YARN-7244) | ShuffleHandler is not aware of disks that are added | Major | . | Kuhu Shukla | Kuhu Shukla | +| [HADOOP-14990](https://issues.apache.org/jira/browse/HADOOP-14990) | Clean up jdiff xml files added for 2.8.2 release | Blocker | . | Subru Krishnan | Junping Du | +| [HADOOP-14919](https://issues.apache.org/jira/browse/HADOOP-14919) | BZip2 drops records when reading data in splits | Critical | . | Aki Tanaka | Jason Lowe | +| [YARN-7370](https://issues.apache.org/jira/browse/YARN-7370) | Preemption properties should be refreshable | Major | capacity scheduler, scheduler preemption | Eric Payne | Gergely Novák | +| [YARN-7361](https://issues.apache.org/jira/browse/YARN-7361) | Improve the docker container runtime documentation | Major | . | Shane Kumpf | Shane Kumpf | +| [YARN-7469](https://issues.apache.org/jira/browse/YARN-7469) | Capacity Scheduler Intra-queue preemption: User can starve if newest app is exactly at user limit | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | +| [HADOOP-15047](https://issues.apache.org/jira/browse/HADOOP-15047) | Python is required for -Preleasedoc but not documented in branch-2.8 | Major | build, documentation | Akira Ajisaka | Bharat Viswanadham | +| [YARN-7496](https://issues.apache.org/jira/browse/YARN-7496) | CS Intra-queue preemption user-limit calculations are not in line with LeafQueue user-limit calculations | Major | . | Eric Payne | Eric Payne | +| [HDFS-12832](https://issues.apache.org/jira/browse/HDFS-12832) | INode.getFullPathName may throw ArrayIndexOutOfBoundsException lead to NameNode exit | Critical | namenode | DENG FEI | Konstantin Shvachko | +| [HDFS-12638](https://issues.apache.org/jira/browse/HDFS-12638) | Delete copy-on-truncate block along with the original block, when deleting a file being truncated | Blocker | hdfs | Jiandan Yang | Konstantin Shvachko | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-14678](https://issues.apache.org/jira/browse/HADOOP-14678) | AdlFilesystem#initialize swallows exception when getting user name | Minor | fs/adl | John Zhuge | John Zhuge | +| [HADOOP-14892](https://issues.apache.org/jira/browse/HADOOP-14892) | MetricsSystemImpl should use Time.monotonicNow for measuring durations | Minor | . | Chetna Chaudhari | Chetna Chaudhari | +| [HADOOP-14881](https://issues.apache.org/jira/browse/HADOOP-14881) | LoadGenerator should use Time.monotonicNow() to measure durations | Major | . | Chetna Chaudhari | Bharat Viswanadham | +| [HADOOP-14893](https://issues.apache.org/jira/browse/HADOOP-14893) | WritableRpcEngine should use Time.monotonicNow | Minor | . | Chetna Chaudhari | Chetna Chaudhari | +| [HDFS-12386](https://issues.apache.org/jira/browse/HDFS-12386) | Add fsserver defaults call to WebhdfsFileSystem. | Minor | webhdfs | Rushabh S Shah | Rushabh S Shah | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/CHANGES.2.8.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/CHANGES.2.8.3.md deleted file mode 100644 index cac46d37ebc..00000000000 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/CHANGES.2.8.3.md +++ /dev/null @@ -1,69 +0,0 @@ - - -# Apache Hadoop Changelog - -## Release 2.8.3 - Unreleased (as of 2017-08-28) - -### INCOMPATIBLE CHANGES: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | -| [HADOOP-13588](https://issues.apache.org/jira/browse/HADOOP-13588) | ConfServlet should respect Accept request header | Major | conf | Weiwei Yang | Weiwei Yang | -| [HADOOP-14260](https://issues.apache.org/jira/browse/HADOOP-14260) | Configuration.dumpConfiguration should redact sensitive information | Major | conf, security | Vihang Karajgaonkar | John Zhuge | - - -### NEW FEATURES: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | -| [HADOOP-13933](https://issues.apache.org/jira/browse/HADOOP-13933) | Add haadmin -getAllServiceState option to get the HA state of all the NameNodes/ResourceManagers | Major | tools | Surendra Singh Lilhore | Surendra Singh Lilhore | -| [HDFS-10480](https://issues.apache.org/jira/browse/HDFS-10480) | Add an admin command to list currently open files | Major | . | Kihwal Lee | Manoj Govindassamy | - - -### IMPROVEMENTS: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | -| [HADOOP-13628](https://issues.apache.org/jira/browse/HADOOP-13628) | Support to retrieve specific property from configuration via REST API | Major | conf | Weiwei Yang | Weiwei Yang | -| [HDFS-12143](https://issues.apache.org/jira/browse/HDFS-12143) | Improve performance of getting and removing inode features | Major | namenode | Daryn Sharp | Daryn Sharp | -| [HDFS-12171](https://issues.apache.org/jira/browse/HDFS-12171) | Reduce IIP object allocations for inode lookup | Major | namenode | Daryn Sharp | Daryn Sharp | -| [HADOOP-14455](https://issues.apache.org/jira/browse/HADOOP-14455) | ViewFileSystem#rename should support be supported within same nameservice with different mountpoints | Major | viewfs | Brahma Reddy Battula | Brahma Reddy Battula | -| [HDFS-12131](https://issues.apache.org/jira/browse/HDFS-12131) | Add some of the FSNamesystem JMX values as metrics | Minor | hdfs, namenode | Erik Krogen | Erik Krogen | -| [HADOOP-14627](https://issues.apache.org/jira/browse/HADOOP-14627) | Support MSI and DeviceCode token provider in ADLS | Major | fs/adl | Atul Sikaria | Atul Sikaria | -| [HADOOP-14251](https://issues.apache.org/jira/browse/HADOOP-14251) | Credential provider should handle property key deprecation | Critical | security | John Zhuge | John Zhuge | - - -### BUG FIXES: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | -| [HADOOP-10829](https://issues.apache.org/jira/browse/HADOOP-10829) | Iteration on CredentialProviderFactory.serviceLoader is thread-unsafe | Major | security | Benoy Antony | Benoy Antony | -| [HADOOP-14578](https://issues.apache.org/jira/browse/HADOOP-14578) | Bind IPC connections to kerberos UPN host for proxy users | Major | ipc | Daryn Sharp | Daryn Sharp | -| [HDFS-11896](https://issues.apache.org/jira/browse/HDFS-11896) | Non-dfsUsed will be doubled on dead node re-registration | Blocker | . | Brahma Reddy Battula | Brahma Reddy Battula | -| [HADOOP-14677](https://issues.apache.org/jira/browse/HADOOP-14677) | mvn clean compile fails | Major | build | Andras Bokor | Andras Bokor | -| [HADOOP-14702](https://issues.apache.org/jira/browse/HADOOP-14702) | Fix formatting issue and regression caused by conversion from APT to Markdown | Minor | documentation | Doris Gu | Doris Gu | -| [YARN-6965](https://issues.apache.org/jira/browse/YARN-6965) | Duplicate instantiation in FairSchedulerQueueInfo | Minor | fairscheduler | Masahiro Tanaka | Masahiro Tanaka | -| [HDFS-11738](https://issues.apache.org/jira/browse/HDFS-11738) | Hedged pread takes more time when block moved from initial locations | Major | hdfs-client | Vinayakumar B | Vinayakumar B | - - -### SUB-TASKS: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | -| [HADOOP-14678](https://issues.apache.org/jira/browse/HADOOP-14678) | AdlFilesystem#initialize swallows exception when getting user name | Minor | fs/adl | John Zhuge | John Zhuge | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/RELEASENOTES.2.8.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/RELEASENOTES.2.8.3.md index 3e21d9ec12a..9fb6d1a3a85 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/RELEASENOTES.2.8.3.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.3/RELEASENOTES.2.8.3.md @@ -34,3 +34,20 @@ Conf HTTP service should set response's content type according to the Accept hea Configuration.dumpConfiguration no longer prints out the clear text values for the sensitive keys listed in `hadoop.security.sensitive-config-keys`. Callers can override the default list of sensitive keys either to redact more keys or print the clear text values for a few extra keys for debugging purpose. + + +--- + +* [HDFS-11799](https://issues.apache.org/jira/browse/HDFS-11799) | *Major* | **Introduce a config to allow setting up write pipeline with fewer nodes than replication factor** + +Added new configuration "dfs.client.block.write.replace-datanode-on-failure.min-replication". + + The minimum number of replications that are needed to not to fail + the write pipeline if new datanodes can not be found to replace + failed datanodes (could be due to network failure) in the write pipeline. + If the number of the remaining datanodes in the write pipeline is greater + than or equal to this property value, continue writing to the remaining nodes. + Otherwise throw exception. + + If this is set to 0, an exception will be thrown, when a replacement + can not be found. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.4/CHANGELOG.2.8.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.4/CHANGELOG.2.8.4.md new file mode 100644 index 00000000000..4e42e457118 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.4/CHANGELOG.2.8.4.md @@ -0,0 +1,123 @@ + + +# Apache Hadoop Changelog + +## Release 2.8.4 - 2018-05-15 + + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-12817](https://issues.apache.org/jira/browse/HADOOP-12817) | Enable TLS v1.1 and 1.2 | Major | security | Robert Kanter | Robert Kanter | +| [HDFS-11409](https://issues.apache.org/jira/browse/HDFS-11409) | DatanodeInfo getNetworkLocation and setNetworkLocation shoud use volatile instead of synchronized | Minor | performance | Chen Liang | Chen Liang | +| [HADOOP-14246](https://issues.apache.org/jira/browse/HADOOP-14246) | Authentication Tokens should use SecureRandom instead of Random and 256 bit secrets | Major | security | Robert Kanter | Robert Kanter | +| [HADOOP-14920](https://issues.apache.org/jira/browse/HADOOP-14920) | KMSClientProvider won't work with KMS delegation token retrieved from non-Java client. | Major | kms | Xiaoyu Yao | Xiaoyu Yao | +| [HADOOP-14987](https://issues.apache.org/jira/browse/HADOOP-14987) | Improve KMSClientProvider log around delegation token checking | Major | . | Xiaoyu Yao | Xiaoyu Yao | +| [YARN-7495](https://issues.apache.org/jira/browse/YARN-7495) | Improve robustness of the AggregatedLogDeletionService | Major | log-aggregation | Jonathan Eagles | Jonathan Eagles | +| [YARN-7678](https://issues.apache.org/jira/browse/YARN-7678) | Ability to enable logging of container memory stats | Major | nodemanager | Jim Brennan | Jim Brennan | +| [YARN-7590](https://issues.apache.org/jira/browse/YARN-7590) | Improve container-executor validation check | Major | security, yarn | Eric Yang | Eric Yang | +| [HADOOP-15212](https://issues.apache.org/jira/browse/HADOOP-15212) | Add independent secret manager method for logging expired tokens | Major | security | Daryn Sharp | Daryn Sharp | +| [YARN-7728](https://issues.apache.org/jira/browse/YARN-7728) | Expose container preemptions related information in Capacity Scheduler queue metrics | Major | . | Eric Payne | Eric Payne | +| [MAPREDUCE-7048](https://issues.apache.org/jira/browse/MAPREDUCE-7048) | Uber AM can crash due to unknown task in statusUpdate | Major | mr-am | Peter Bacsko | Peter Bacsko | +| [HADOOP-13972](https://issues.apache.org/jira/browse/HADOOP-13972) | ADLS to support per-store configuration | Major | fs/adl | John Zhuge | Sharad Sonker | +| [YARN-7813](https://issues.apache.org/jira/browse/YARN-7813) | Capacity Scheduler Intra-queue Preemption should be configurable for each queue | Major | capacity scheduler, scheduler preemption | Eric Payne | Eric Payne | +| [HDFS-11187](https://issues.apache.org/jira/browse/HDFS-11187) | Optimize disk access for last partial chunk checksum of Finalized replica | Major | datanode | Wei-Chiu Chuang | Gabor Bota | +| [HADOOP-15279](https://issues.apache.org/jira/browse/HADOOP-15279) | increase maven heap size recommendations | Minor | build, documentation, test | Allen Wittenauer | Allen Wittenauer | +| [HDFS-12884](https://issues.apache.org/jira/browse/HDFS-12884) | BlockUnderConstructionFeature.truncateBlock should be of type BlockInfo | Major | namenode | Konstantin Shvachko | chencan | +| [HADOOP-14841](https://issues.apache.org/jira/browse/HADOOP-14841) | Kms client should disconnect if unable to get output stream from connection. | Major | kms | Xiao Chen | Rushabh S Shah | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-13988](https://issues.apache.org/jira/browse/HADOOP-13988) | KMSClientProvider does not work with WebHDFS and Apache Knox w/ProxyUser | Major | common, kms | Greg Senia | Xiaoyu Yao | +| [HADOOP-14029](https://issues.apache.org/jira/browse/HADOOP-14029) | Fix KMSClientProvider for non-secure proxyuser use case | Major | kms | Xiaoyu Yao | Xiaoyu Yao | +| [HDFS-12614](https://issues.apache.org/jira/browse/HDFS-12614) | FSPermissionChecker#getINodeAttrs() throws NPE when INodeAttributesProvider configured | Major | . | Manoj Govindassamy | Manoj Govindassamy | +| [YARN-7489](https://issues.apache.org/jira/browse/YARN-7489) | ConcurrentModificationException in RMAppImpl#getRMAppMetrics | Major | capacityscheduler | Tao Yang | Tao Yang | +| [YARN-7525](https://issues.apache.org/jira/browse/YARN-7525) | Incorrect query parameters in cluster nodes REST API document | Minor | documentation | Tao Yang | Tao Yang | +| [HDFS-12754](https://issues.apache.org/jira/browse/HDFS-12754) | Lease renewal can hit a deadlock | Major | . | Kuhu Shukla | Kuhu Shukla | +| [HDFS-11754](https://issues.apache.org/jira/browse/HDFS-11754) | Make FsServerDefaults cache configurable. | Minor | . | Rushabh S Shah | Mikhail Erofeev | +| [MAPREDUCE-5124](https://issues.apache.org/jira/browse/MAPREDUCE-5124) | AM lacks flow control for task events | Major | mr-am | Jason Lowe | Peter Bacsko | +| [HDFS-12833](https://issues.apache.org/jira/browse/HDFS-12833) | Distcp : Update the usage of delete option for dependency with update and overwrite option | Minor | distcp, hdfs | Harshakiran Reddy | usharani | +| [HDFS-12907](https://issues.apache.org/jira/browse/HDFS-12907) | Allow read-only access to reserved raw for non-superusers | Major | namenode | Daryn Sharp | Rushabh S Shah | +| [HDFS-12881](https://issues.apache.org/jira/browse/HDFS-12881) | Output streams closed with IOUtils suppressing write errors | Major | . | Jason Lowe | Ajay Kumar | +| [YARN-7595](https://issues.apache.org/jira/browse/YARN-7595) | Container launching code suppresses close exceptions after writes | Major | nodemanager | Jason Lowe | Jim Brennan | +| [YARN-7661](https://issues.apache.org/jira/browse/YARN-7661) | NodeManager metrics return wrong value after update node resource | Major | . | Yang Wang | Yang Wang | +| [HDFS-12347](https://issues.apache.org/jira/browse/HDFS-12347) | TestBalancerRPCDelay#testBalancerRPCDelay fails very frequently | Critical | test | Xiao Chen | Bharat Viswanadham | +| [HADOOP-15143](https://issues.apache.org/jira/browse/HADOOP-15143) | NPE due to Invalid KerberosTicket in UGI | Major | . | Jitendra Nath Pandey | Mukul Kumar Singh | +| [MAPREDUCE-7028](https://issues.apache.org/jira/browse/MAPREDUCE-7028) | Concurrent task progress updates causing NPE in Application Master | Blocker | mr-am | Gergo Repas | Gergo Repas | +| [YARN-7619](https://issues.apache.org/jira/browse/YARN-7619) | Max AM Resource value in Capacity Scheduler UI has to be refreshed for every user | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | +| [YARN-7663](https://issues.apache.org/jira/browse/YARN-7663) | RMAppImpl:Invalid event: START at KILLED | Major | resourcemanager | lujie | lujie | +| [YARN-6948](https://issues.apache.org/jira/browse/YARN-6948) | Invalid event: ATTEMPT\_ADDED at FINAL\_SAVING | Major | yarn | lujie | lujie | +| [HDFS-6804](https://issues.apache.org/jira/browse/HDFS-6804) | Add test for race condition between transferring block and appending block causes "Unexpected checksum mismatch exception" | Major | datanode | Gordon Wang | Brahma Reddy Battula | +| [HDFS-9049](https://issues.apache.org/jira/browse/HDFS-9049) | Make Datanode Netty reverse proxy port to be configurable | Major | datanode | Vinayakumar B | Vinayakumar B | +| [HADOOP-15150](https://issues.apache.org/jira/browse/HADOOP-15150) | in FsShell, UGI params should be overidden through env vars(-D arg) | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HADOOP-15181](https://issues.apache.org/jira/browse/HADOOP-15181) | Typo in SecureMode.md | Trivial | documentation | Masahiro Tanaka | Masahiro Tanaka | +| [YARN-7102](https://issues.apache.org/jira/browse/YARN-7102) | NM heartbeat stuck when responseId overflows MAX\_INT | Critical | . | Botong Huang | Botong Huang | +| [MAPREDUCE-7020](https://issues.apache.org/jira/browse/MAPREDUCE-7020) | Task timeout in uber mode can crash AM | Major | mr-am | Akira Ajisaka | Peter Bacsko | +| [HDFS-13100](https://issues.apache.org/jira/browse/HDFS-13100) | Handle IllegalArgumentException when GETSERVERDEFAULTS is not implemented in webhdfs. | Critical | hdfs, webhdfs | Yongjun Zhang | Yongjun Zhang | +| [YARN-7849](https://issues.apache.org/jira/browse/YARN-7849) | TestMiniYarnClusterNodeUtilization#testUpdateNodeUtilization fails due to heartbeat sync error | Major | test | Jason Lowe | Botong Huang | +| [HDFS-13120](https://issues.apache.org/jira/browse/HDFS-13120) | Snapshot diff could be corrupted after concat | Major | namenode, snapshots | Xiaoyu Yao | Xiaoyu Yao | +| [HDFS-10453](https://issues.apache.org/jira/browse/HDFS-10453) | ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster. | Major | namenode | He Xiaoqiao | He Xiaoqiao | +| [HDFS-8693](https://issues.apache.org/jira/browse/HDFS-8693) | refreshNamenodes does not support adding a new standby to a running DN | Critical | datanode, ha | Jian Fang | Ajith S | +| [MAPREDUCE-7052](https://issues.apache.org/jira/browse/MAPREDUCE-7052) | TestFixedLengthInputFormat#testFormatCompressedIn is flaky | Major | client, test | Peter Bacsko | Peter Bacsko | +| [HDFS-13112](https://issues.apache.org/jira/browse/HDFS-13112) | Token expiration edits may cause log corruption or deadlock | Critical | namenode | Daryn Sharp | Daryn Sharp | +| [MAPREDUCE-7053](https://issues.apache.org/jira/browse/MAPREDUCE-7053) | Timed out tasks can fail to produce thread dump | Major | . | Jason Lowe | Jason Lowe | +| [HADOOP-15206](https://issues.apache.org/jira/browse/HADOOP-15206) | BZip2 drops and duplicates records when input split size is small | Major | . | Aki Tanaka | Aki Tanaka | +| [YARN-7947](https://issues.apache.org/jira/browse/YARN-7947) | Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps | Major | capacity scheduler, scheduler preemption | Eric Payne | Eric Payne | +| [HADOOP-14903](https://issues.apache.org/jira/browse/HADOOP-14903) | Add json-smart explicitly to pom.xml | Major | common | Ray Chiang | Ray Chiang | +| [HDFS-12070](https://issues.apache.org/jira/browse/HDFS-12070) | Failed block recovery leaves files open indefinitely and at risk for data loss | Major | . | Daryn Sharp | Kihwal Lee | +| [HADOOP-15283](https://issues.apache.org/jira/browse/HADOOP-15283) | Upgrade from findbugs 3.0.1 to spotbugs 3.1.2 in branch-2 to fix docker image build | Major | . | Xiao Chen | Akira Ajisaka | +| [HDFS-13164](https://issues.apache.org/jira/browse/HDFS-13164) | File not closed if streamer fail with DSQuotaExceededException | Major | hdfs-client | Xiao Chen | Xiao Chen | +| [HDFS-13109](https://issues.apache.org/jira/browse/HDFS-13109) | Support fully qualified hdfs path in EZ commands | Major | hdfs | Hanisha Koneru | Hanisha Koneru | +| [MAPREDUCE-6930](https://issues.apache.org/jira/browse/MAPREDUCE-6930) | mapreduce.map.cpu.vcores and mapreduce.reduce.cpu.vcores are both present twice in mapred-default.xml | Major | mrv2 | Daniel Templeton | Sen Zhao | +| [HDFS-12156](https://issues.apache.org/jira/browse/HDFS-12156) | TestFSImage fails without -Pnative | Major | test | Akira Ajisaka | Akira Ajisaka | +| [YARN-8054](https://issues.apache.org/jira/browse/YARN-8054) | Improve robustness of the LocalDirsHandlerService MonitoringTimerTask thread | Major | . | Jonathan Eagles | Jonathan Eagles | +| [HDFS-13195](https://issues.apache.org/jira/browse/HDFS-13195) | DataNode conf page cannot display the current value after reconfig | Minor | datanode | maobaolong | maobaolong | +| [HADOOP-12862](https://issues.apache.org/jira/browse/HADOOP-12862) | LDAP Group Mapping over SSL can not specify trust store | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-15317](https://issues.apache.org/jira/browse/HADOOP-15317) | Improve NetworkTopology chooseRandom's loop | Major | . | Xiao Chen | Xiao Chen | +| [HDFS-13427](https://issues.apache.org/jira/browse/HDFS-13427) | Fix the section titles of transparent encryption document | Minor | documentation | Akira Ajisaka | Akira Ajisaka | +| [YARN-8120](https://issues.apache.org/jira/browse/YARN-8120) | JVM can crash with SIGSEGV when exiting due to custom leveldb logger | Major | nodemanager, resourcemanager | Jason Lowe | Jason Lowe | +| [YARN-8147](https://issues.apache.org/jira/browse/YARN-8147) | TestClientRMService#testGetApplications sporadically fails | Major | test | Jason Lowe | Jason Lowe | +| [HADOOP-14970](https://issues.apache.org/jira/browse/HADOOP-14970) | MiniHadoopClusterManager doesn't respect lack of format option | Minor | . | Erik Krogen | Erik Krogen | +| [HADOOP-15180](https://issues.apache.org/jira/browse/HADOOP-15180) | branch-2 : daemon processes' sysout overwrites 'ulimit -a' in daemon's out file | Minor | scripts | Ranith Sardar | Ranith Sardar | +| [HDFS-10183](https://issues.apache.org/jira/browse/HDFS-10183) | Prevent race condition during class initialization | Minor | fs | Pavel Avgustinov | Pavel Avgustinov | +| [HADOOP-15390](https://issues.apache.org/jira/browse/HADOOP-15390) | Yarn RM logs flooded by DelegationTokenRenewer trying to renew KMS tokens | Critical | . | Xiao Chen | Xiao Chen | +| [HADOOP-15385](https://issues.apache.org/jira/browse/HADOOP-15385) | Many tests are failing in hadoop-distcp project in branch-2 | Critical | tools/distcp | Rushabh S Shah | Jason Lowe | +| [MAPREDUCE-7073](https://issues.apache.org/jira/browse/MAPREDUCE-7073) | Optimize TokenCache#obtainTokensForNamenodesInternal | Major | . | Bibin A Chundatt | Bibin A Chundatt | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-13174](https://issues.apache.org/jira/browse/HADOOP-13174) | Add more debug logs for delegation tokens and authentication | Minor | security | Xiao Chen | Xiao Chen | +| [HADOOP-14799](https://issues.apache.org/jira/browse/HADOOP-14799) | Update nimbus-jose-jwt to 4.41.1 | Major | . | Ray Chiang | Ray Chiang | +| [HDFS-12396](https://issues.apache.org/jira/browse/HDFS-12396) | Webhdfs file system should get delegation token from kms provider. | Major | encryption, kms, webhdfs | Rushabh S Shah | Rushabh S Shah | +| [HDFS-12574](https://issues.apache.org/jira/browse/HDFS-12574) | Add CryptoInputStream to WebHdfsFileSystem read call. | Major | encryption, kms, webhdfs | Rushabh S Shah | Rushabh S Shah | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15177](https://issues.apache.org/jira/browse/HADOOP-15177) | Update the release year to 2018 | Blocker | build | Akira Ajisaka | Bharat Viswanadham | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.4/RELEASENOTES.2.8.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.4/RELEASENOTES.2.8.4.md new file mode 100644 index 00000000000..3b50339bd8b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.4/RELEASENOTES.2.8.4.md @@ -0,0 +1,21 @@ + + +# Apache Hadoop 2.8.4 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.5/CHANGELOG.2.8.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.5/CHANGELOG.2.8.5.md new file mode 100644 index 00000000000..144dca3891a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.5/CHANGELOG.2.8.5.md @@ -0,0 +1,70 @@ + + +# Apache Hadoop Changelog + +## Release 2.8.5 - Unreleased (as of 2018-09-02) + + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-13738](https://issues.apache.org/jira/browse/HADOOP-13738) | DiskChecker should perform some disk IO | Major | . | Arpit Agarwal | Arpit Agarwal | +| [HADOOP-15394](https://issues.apache.org/jira/browse/HADOOP-15394) | Backport PowerShell NodeFencer HADOOP-14309 to branch-2 | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-15441](https://issues.apache.org/jira/browse/HADOOP-15441) | Log kms url and token service at debug level. | Minor | . | Wei-Chiu Chuang | Gabor Bota | +| [HADOOP-15486](https://issues.apache.org/jira/browse/HADOOP-15486) | Make NetworkTopology#netLock fair | Major | net | Nanda kumar | Nanda kumar | +| [HADOOP-15449](https://issues.apache.org/jira/browse/HADOOP-15449) | Increase default timeout of ZK session to avoid frequent NameNode failover | Critical | common | Karthik Palanisamy | Karthik Palanisamy | +| [HDFS-13602](https://issues.apache.org/jira/browse/HDFS-13602) | Add checkOperation(WRITE) checks in FSNamesystem | Major | ha, namenode | Erik Krogen | Chao Sun | +| [HADOOP-15689](https://issues.apache.org/jira/browse/HADOOP-15689) | Add "\*.patch" into .gitignore file of branch-2 | Major | . | Rui Gao | Rui Gao | +| [YARN-8051](https://issues.apache.org/jira/browse/YARN-8051) | TestRMEmbeddedElector#testCallbackSynchronization is flakey | Major | test | Robert Kanter | Robert Kanter | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15121](https://issues.apache.org/jira/browse/HADOOP-15121) | Encounter NullPointerException when using DecayRpcScheduler | Major | . | Tao Jie | Tao Jie | +| [HDFS-10803](https://issues.apache.org/jira/browse/HDFS-10803) | TestBalancerWithMultipleNameNodes#testBalancing2OutOf3Blockpools fails intermittently due to no free space available | Major | . | Yiqun Lin | Yiqun Lin | +| [HDFS-12828](https://issues.apache.org/jira/browse/HDFS-12828) | OIV ReverseXML Processor fails with escaped characters | Critical | hdfs | Erik Krogen | Erik Krogen | +| [HADOOP-15396](https://issues.apache.org/jira/browse/HADOOP-15396) | Some java source files are executable | Minor | . | Akira Ajisaka | Shashikant Banerjee | +| [YARN-8232](https://issues.apache.org/jira/browse/YARN-8232) | RMContainer lost queue name when RM HA happens | Major | resourcemanager | Hu Ziqian | Hu Ziqian | +| [HDFS-13581](https://issues.apache.org/jira/browse/HDFS-13581) | DN UI logs link is broken when https is enabled | Minor | datanode | Namit Maheshwari | Shashikant Banerjee | +| [HADOOP-15450](https://issues.apache.org/jira/browse/HADOOP-15450) | Avoid fsync storm triggered by DiskChecker and handle disk full situation | Blocker | . | Kihwal Lee | Arpit Agarwal | +| [HADOOP-15473](https://issues.apache.org/jira/browse/HADOOP-15473) | Configure serialFilter in KeyProvider to avoid UnrecoverableKeyException caused by JDK-8189997 | Critical | kms | Gabor Bota | Gabor Bota | +| [YARN-8444](https://issues.apache.org/jira/browse/YARN-8444) | NodeResourceMonitor crashes on bad swapFree value | Major | . | Jim Brennan | Jim Brennan | +| [HADOOP-15548](https://issues.apache.org/jira/browse/HADOOP-15548) | Randomize local dirs | Minor | . | Jim Brennan | Jim Brennan | +| [YARN-8383](https://issues.apache.org/jira/browse/YARN-8383) | TimelineServer 1.5 start fails with NoClassDefFoundError | Blocker | . | Rohith Sharma K S | Jason Lowe | +| [YARN-8473](https://issues.apache.org/jira/browse/YARN-8473) | Containers being launched as app tears down can leave containers in NEW state | Major | nodemanager | Jason Lowe | Jason Lowe | +| [YARN-8515](https://issues.apache.org/jira/browse/YARN-8515) | container-executor can crash with SIGPIPE after nodemanager restart | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8421](https://issues.apache.org/jira/browse/YARN-8421) | when moving app, activeUsers is increased, even though app does not have outstanding request | Major | . | kyungwan nam | | +| [HADOOP-15614](https://issues.apache.org/jira/browse/HADOOP-15614) | TestGroupsCaching.testExceptionOnBackgroundRefreshHandled reliably fails | Major | . | Kihwal Lee | Weiwei Yang | +| [HADOOP-15637](https://issues.apache.org/jira/browse/HADOOP-15637) | LocalFs#listLocatedStatus does not filter out hidden .crc files | Minor | fs | Erik Krogen | Erik Krogen | +| [HADOOP-15674](https://issues.apache.org/jira/browse/HADOOP-15674) | Test failure TestSSLHttpServer.testExcludedCiphers with TLS\_ECDHE\_RSA\_WITH\_AES\_128\_CBC\_SHA256 cipher suite | Major | common | Gabor Bota | Szilard Nemeth | +| [YARN-8640](https://issues.apache.org/jira/browse/YARN-8640) | Restore previous state in container-executor after failure | Major | . | Jim Brennan | Jim Brennan | +| [HADOOP-14314](https://issues.apache.org/jira/browse/HADOOP-14314) | The OpenSolaris taxonomy link is dead in InterfaceClassification.md | Major | documentation | Daniel Templeton | Rui Gao | +| [YARN-8649](https://issues.apache.org/jira/browse/YARN-8649) | NPE in localizer hearbeat processing if a container is killed while localizing | Major | . | lujie | lujie | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-4781](https://issues.apache.org/jira/browse/YARN-4781) | Support intra-queue preemption for fairness ordering policy. | Major | scheduler | Wangda Tan | Eric Payne | +| [HDFS-13281](https://issues.apache.org/jira/browse/HDFS-13281) | Namenode#createFile should be /.reserved/raw/ aware. | Critical | encryption | Rushabh S Shah | Rushabh S Shah | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.5/RELEASENOTES.2.8.5.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.5/RELEASENOTES.2.8.5.md new file mode 100644 index 00000000000..f2437d55602 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.5/RELEASENOTES.2.8.5.md @@ -0,0 +1,21 @@ + + +# Apache Hadoop 2.8.5 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/CHANGES.2.9.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/CHANGELOG.2.9.0.md similarity index 70% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/CHANGES.2.9.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/CHANGELOG.2.9.0.md index ff74e34ac56..cbe932e2823 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/CHANGES.2.9.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/CHANGELOG.2.9.0.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 2.9.0 - Unreleased (as of 2017-08-28) +## Release 2.9.0 - 2017-11-17 ### INCOMPATIBLE CHANGES: @@ -30,23 +30,28 @@ | [YARN-5388](https://issues.apache.org/jira/browse/YARN-5388) | Deprecate and remove DockerContainerExecutor | Critical | nodemanager | Daniel Templeton | Daniel Templeton | | [HADOOP-12705](https://issues.apache.org/jira/browse/HADOOP-12705) | Upgrade Jackson 2.2.3 to 2.7.8 | Major | build | Steve Loughran | Sean Mackrory | | [HADOOP-13050](https://issues.apache.org/jira/browse/HADOOP-13050) | Upgrade to AWS SDK 1.11.45 | Blocker | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | Add ability to secure log servlet using proxy users | Major | . | Jeffrey E Rodriguez | Yuanbo Liu | | [HADOOP-14174](https://issues.apache.org/jira/browse/HADOOP-14174) | Set default ADLS access token provider type to ClientCredential | Major | fs/adl | John Zhuge | John Zhuge | | [HDFS-11402](https://issues.apache.org/jira/browse/HDFS-11402) | HDFS Snapshots should capture point-in-time copies of OPEN files | Major | hdfs | Manoj Govindassamy | Manoj Govindassamy | | [YARN-2962](https://issues.apache.org/jira/browse/YARN-2962) | ZKRMStateStore: Limit the number of znodes under a znode | Critical | resourcemanager | Karthik Kambatla | Varun Saxena | | [HADOOP-14419](https://issues.apache.org/jira/browse/HADOOP-14419) | Remove findbugs report from docs profile | Minor | . | Andras Bokor | Andras Bokor | | [YARN-6127](https://issues.apache.org/jira/browse/YARN-6127) | Add support for work preserving NM restart when AMRMProxy is enabled | Major | amrmproxy, nodemanager | Subru Krishnan | Botong Huang | -| [YARN-5049](https://issues.apache.org/jira/browse/YARN-5049) | Extend NMStateStore to save queued container information | Major | nodemanager, resourcemanager | Konstantinos Karanasos | Konstantinos Karanasos | +| [HADOOP-14539](https://issues.apache.org/jira/browse/HADOOP-14539) | Move commons logging APIs over to slf4j in hadoop-common | Major | . | Akira Ajisaka | Wenxin He | | [HADOOP-14260](https://issues.apache.org/jira/browse/HADOOP-14260) | Configuration.dumpConfiguration should redact sensitive information | Major | conf, security | Vihang Karajgaonkar | John Zhuge | +| [YARN-6623](https://issues.apache.org/jira/browse/YARN-6623) | Add support to turn off launching privileged containers in the container-executor | Blocker | nodemanager | Varun Vasudev | Varun Vasudev | +| [YARN-5049](https://issues.apache.org/jira/browse/YARN-5049) | Extend NMStateStore to save queued container information | Major | nodemanager, resourcemanager | Konstantinos Karanasos | Arun Suresh | ### NEW FEATURES: | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | -| [HADOOP-12321](https://issues.apache.org/jira/browse/HADOOP-12321) | Make JvmPauseMonitor an AbstractService | Major | . | Steve Loughran | Sunil G | -| [HDFS-9525](https://issues.apache.org/jira/browse/HDFS-9525) | hadoop utilities need to support provided delegation tokens | Blocker | security | Allen Wittenauer | HeeSoo Kim | +| [HADOOP-12321](https://issues.apache.org/jira/browse/HADOOP-12321) | Make JvmPauseMonitor an AbstractService | Major | . | Steve Loughran | Sunil Govindan | +| [HDFS-9525](https://issues.apache.org/jira/browse/HDFS-9525) | hadoop utilities need to support provided delegation tokens | Blocker | security | Allen Wittenauer | Heesoo Kim | | [HADOOP-12702](https://issues.apache.org/jira/browse/HADOOP-12702) | Add an HDFS metrics sink | Major | metrics | Daniel Templeton | Daniel Templeton | | [HADOOP-12847](https://issues.apache.org/jira/browse/HADOOP-12847) | hadoop daemonlog should support https and SPNEGO for Kerberized cluster | Major | security | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-2928](https://issues.apache.org/jira/browse/YARN-2928) | YARN Timeline Service v.2: alpha 1 | Critical | timelineserver | Sangjin Lee | Sangjin Lee | +| [MAPREDUCE-6331](https://issues.apache.org/jira/browse/MAPREDUCE-6331) | [Umbrella] Make MapReduce work with Timeline Service Nextgen (YARN-2928) | Major | . | Vinod Kumar Vavilapalli | Sangjin Lee | | [HADOOP-12747](https://issues.apache.org/jira/browse/HADOOP-12747) | support wildcard in libjars argument | Major | util | Sangjin Lee | Sangjin Lee | | [MAPREDUCE-6690](https://issues.apache.org/jira/browse/MAPREDUCE-6690) | Limit the number of resources a single map reduce job can submit for localization | Major | . | Chris Trezzo | Chris Trezzo | | [HADOOP-13396](https://issues.apache.org/jira/browse/HADOOP-13396) | Allow pluggable audit loggers in KMS | Major | kms | Xiao Chen | Xiao Chen | @@ -62,13 +67,27 @@ | [YARN-679](https://issues.apache.org/jira/browse/YARN-679) | add an entry point that can start any Yarn service | Major | api | Steve Loughran | Steve Loughran | | [HDFS-10480](https://issues.apache.org/jira/browse/HDFS-10480) | Add an admin command to list currently open files | Major | . | Kihwal Lee | Manoj Govindassamy | | [YARN-4161](https://issues.apache.org/jira/browse/YARN-4161) | Capacity Scheduler : Assign single or multiple containers per heart beat driven by configuration | Major | capacity scheduler | Mayank Bansal | Wei Yan | -| [HDFS-12117](https://issues.apache.org/jira/browse/HDFS-12117) | HttpFS does not seem to support SNAPSHOT related methods for WebHDFS REST Interface | Major | httpfs | Wellington Chevreuil | Wellington Chevreuil | +| [YARN-5355](https://issues.apache.org/jira/browse/YARN-5355) | YARN Timeline Service v.2: alpha 2 | Critical | timelineserver | Sangjin Lee | Vrushali C | +| [HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345) | S3Guard: Improved Consistency for S3A | Major | fs/s3 | Chris Nauroth | Chris Nauroth | +| [HDFS-7877](https://issues.apache.org/jira/browse/HDFS-7877) | [Umbrella] Support maintenance state for datanodes | Major | datanode, namenode | Ming Ma | Ming Ma | +| [YARN-2915](https://issues.apache.org/jira/browse/YARN-2915) | Enable YARN RM scale out via federation using multiple RM's | Major | nodemanager, resourcemanager | Sriram Rao | Subru Krishnan | +| [YARN-1492](https://issues.apache.org/jira/browse/YARN-1492) | truly shared cache for jars (jobjar/libjar) | Major | . | Sangjin Lee | Chris Trezzo | +| [HDFS-10467](https://issues.apache.org/jira/browse/HDFS-10467) | Router-based HDFS federation | Major | fs | Íñigo Goiri | Íñigo Goiri | +| [YARN-5734](https://issues.apache.org/jira/browse/YARN-5734) | OrgQueue for easy CapacityScheduler queue configuration management | Major | . | Min Shen | Min Shen | +| [MAPREDUCE-5951](https://issues.apache.org/jira/browse/MAPREDUCE-5951) | Add support for the YARN Shared Cache | Major | . | Chris Trezzo | Chris Trezzo | +| [YARN-6871](https://issues.apache.org/jira/browse/YARN-6871) | Add additional deSelects params in RMWebServices#getAppReport | Major | resourcemanager, router | Giovanni Matteo Fumarola | Tanuj Nayak | +| [MAPREDUCE-6732](https://issues.apache.org/jira/browse/MAPREDUCE-6732) | mapreduce tasks for YARN Timeline Service v.2: alpha 2 | Major | . | Sangjin Lee | Vrushali C | +| [HADOOP-14840](https://issues.apache.org/jira/browse/HADOOP-14840) | Tool to estimate resource requirements of an application pipeline based on prior executions | Major | tools | Subru Krishnan | Rui Li | +| [YARN-3813](https://issues.apache.org/jira/browse/YARN-3813) | Support Application timeout feature in YARN. | Major | scheduler | nijel | Rohith Sharma K S | +| [YARN-2877](https://issues.apache.org/jira/browse/YARN-2877) | Extend YARN to support distributed scheduling | Major | nodemanager, resourcemanager | Sriram Rao | Konstantinos Karanasos | +| [YARN-5220](https://issues.apache.org/jira/browse/YARN-5220) | Scheduling of OPPORTUNISTIC containers through YARN RM | Major | resourcemanager | Konstantinos Karanasos | Konstantinos Karanasos | ### IMPROVEMENTS: | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-2280](https://issues.apache.org/jira/browse/YARN-2280) | Resource manager web service fields are not accessible | Trivial | resourcemanager | Krisztian Horvath | Krisztian Horvath | | [HDFS-9267](https://issues.apache.org/jira/browse/HDFS-9267) | TestDiskError should get stored replicas through FsDatasetTestUtils. | Minor | test | Lei (Eddy) Xu | Lei (Eddy) Xu | | [HDFS-9491](https://issues.apache.org/jira/browse/HDFS-9491) | Tests should get the number of pending async delets via FsDatasetTestUtils | Minor | test | Tony Wu | Tony Wu | | [HADOOP-12625](https://issues.apache.org/jira/browse/HADOOP-12625) | Add a config to disable the /logs endpoints | Major | security | Robert Kanter | Robert Kanter | @@ -192,11 +211,11 @@ | [MAPREDUCE-6792](https://issues.apache.org/jira/browse/MAPREDUCE-6792) | Allow user's full principal name as owner of MapReduce staging directory in JobSubmissionFiles#JobStagingDir() | Major | client | Santhosh G Nayak | Santhosh G Nayak | | [YARN-5575](https://issues.apache.org/jira/browse/YARN-5575) | Many classes use bare yarn. properties instead of the defined constants | Major | . | Daniel Templeton | Daniel Templeton | | [HDFS-11049](https://issues.apache.org/jira/browse/HDFS-11049) | The description of dfs.block.replicator.classname is not clear | Minor | documentation | Yiqun Lin | Yiqun Lin | +| [HDFS-11069](https://issues.apache.org/jira/browse/HDFS-11069) | Tighten the authorization of datanode RPC | Major | datanode, security | Kihwal Lee | Kihwal Lee | | [YARN-4456](https://issues.apache.org/jira/browse/YARN-4456) | Clean up Lint warnings in nodemanager | Minor | nodemanager | Daniel Templeton | Daniel Templeton | | [YARN-4668](https://issues.apache.org/jira/browse/YARN-4668) | Reuse objectMapper instance in Yarn | Major | timelineclient | Yiqun Lin | Yiqun Lin | | [YARN-4907](https://issues.apache.org/jira/browse/YARN-4907) | Make all MockRM#waitForState consistent. | Major | resourcemanager | Yufei Gu | Yufei Gu | | [YARN-4396](https://issues.apache.org/jira/browse/YARN-4396) | Log the trace information on FSAppAttempt#assignContainer | Major | applications, fairscheduler | Yiqun Lin | Yiqun Lin | -| [HADOOP-13738](https://issues.apache.org/jira/browse/HADOOP-13738) | DiskChecker should perform some disk IO | Major | . | Arpit Agarwal | Arpit Agarwal | | [HDFS-11088](https://issues.apache.org/jira/browse/HDFS-11088) | Quash unnecessary safemode WARN message during NameNode startup | Trivial | . | Andrew Wang | Yiqun Lin | | [YARN-4998](https://issues.apache.org/jira/browse/YARN-4998) | Minor cleanup to UGI use in AdminService | Trivial | resourcemanager | Daniel Templeton | Daniel Templeton | | [HDFS-10756](https://issues.apache.org/jira/browse/HDFS-10756) | Expose getTrashRoot to HTTPFS and WebHDFS | Major | encryption, httpfs, webhdfs | Xiao Chen | Yuanbo Liu | @@ -217,7 +236,7 @@ | [YARN-4997](https://issues.apache.org/jira/browse/YARN-4997) | Update fair scheduler to use pluggable auth provider | Major | fairscheduler | Daniel Templeton | Tao Jie | | [MAPREDUCE-6787](https://issues.apache.org/jira/browse/MAPREDUCE-6787) | Allow job\_conf.xml to be downloadable on the job overview page in JHS | Major | jobhistoryserver | Haibo Chen | Haibo Chen | | [HDFS-11211](https://issues.apache.org/jira/browse/HDFS-11211) | Add a time unit to the DataNode client trace format | Minor | datanode | Akira Ajisaka | Jagadesh Kiran N | -| [HDFS-10206](https://issues.apache.org/jira/browse/HDFS-10206) | Datanodes not sorted properly by distance when the reader isn't a datanode | Major | . | Ming Ma | Nandakumar | +| [HDFS-10206](https://issues.apache.org/jira/browse/HDFS-10206) | Datanodes not sorted properly by distance when the reader isn't a datanode | Major | . | Ming Ma | Nanda kumar | | [HADOOP-13709](https://issues.apache.org/jira/browse/HADOOP-13709) | Ability to clean up subprocesses spawned by Shell when the process exits | Major | . | Eric Badger | Eric Badger | | [HDFS-10930](https://issues.apache.org/jira/browse/HDFS-10930) | Refactor: Wrap Datanode IO related operations | Major | datanode | Xiaoyu Yao | Xiaoyu Yao | | [HDFS-10959](https://issues.apache.org/jira/browse/HDFS-10959) | Adding per disk IO statistics and metrics in DataNode. | Major | datanode | Xiaoyu Yao | Xiaoyu Yao | @@ -227,7 +246,6 @@ | [HDFS-10917](https://issues.apache.org/jira/browse/HDFS-10917) | Collect peer performance statistics on DataNode. | Major | datanode | Xiaobing Zhou | Xiaobing Zhou | | [YARN-5969](https://issues.apache.org/jira/browse/YARN-5969) | FairShareComparator: Cache value of getResourceUsage for better performance | Major | fairscheduler | zhangshilong | zhangshilong | | [HDFS-11279](https://issues.apache.org/jira/browse/HDFS-11279) | Cleanup unused DataNode#checkDiskErrorAsync() | Minor | . | Xiaoyu Yao | Hanisha Koneru | -| [HDFS-11156](https://issues.apache.org/jira/browse/HDFS-11156) | Add new op GETFILEBLOCKLOCATIONS to WebHDFS REST API | Major | webhdfs | Weiwei Yang | Weiwei Yang | | [YARN-6015](https://issues.apache.org/jira/browse/YARN-6015) | AsyncDispatcher thread name can be set to improved debugging | Major | . | Ajith S | Ajith S | | [HADOOP-13953](https://issues.apache.org/jira/browse/HADOOP-13953) | Make FTPFileSystem's data connection mode and transfer mode configurable | Major | fs | Xiao Chen | Xiao Chen | | [HDFS-11299](https://issues.apache.org/jira/browse/HDFS-11299) | Support multiple Datanode File IO hooks | Major | hdfs | Hanisha Koneru | Hanisha Koneru | @@ -238,7 +256,6 @@ | [HADOOP-13496](https://issues.apache.org/jira/browse/HADOOP-13496) | Include file lengths in Mismatch in length error for distcp | Minor | . | Ted Yu | Ted Yu | | [YARN-6028](https://issues.apache.org/jira/browse/YARN-6028) | Add document for container metrics | Major | documentation, nodemanager | Weiwei Yang | Weiwei Yang | | [MAPREDUCE-6728](https://issues.apache.org/jira/browse/MAPREDUCE-6728) | Give fetchers hint when ShuffleHandler rejects a shuffling connection | Major | mrv2 | Haibo Chen | Haibo Chen | -| [YARN-5547](https://issues.apache.org/jira/browse/YARN-5547) | NMLeveldbStateStore should be more tolerant of unknown keys | Major | nodemanager | Jason Lowe | Ajith S | | [HDFS-10534](https://issues.apache.org/jira/browse/HDFS-10534) | NameNode WebUI should display DataNode usage histogram | Major | namenode, ui | Zhe Zhang | Kai Sasaki | | [HADOOP-14003](https://issues.apache.org/jira/browse/HADOOP-14003) | Make additional KMS tomcat settings configurable | Major | kms | Andrew Wang | Andrew Wang | | [HDFS-11374](https://issues.apache.org/jira/browse/HDFS-11374) | Skip FSync in Test util CreateEditsLog to speed up edit log generation | Minor | hdfs | Hanisha Koneru | Hanisha Koneru | @@ -266,7 +283,7 @@ | [YARN-6194](https://issues.apache.org/jira/browse/YARN-6194) | Cluster capacity in SchedulingPolicy is updated only on allocation file reload | Major | fairscheduler | Karthik Kambatla | Yufei Gu | | [HADOOP-14097](https://issues.apache.org/jira/browse/HADOOP-14097) | Remove Java6 specific code from GzipCodec.java | Minor | . | Akira Ajisaka | Elek, Marton | | [HADOOP-13817](https://issues.apache.org/jira/browse/HADOOP-13817) | Add a finite shell command timeout to ShellBasedUnixGroupsMapping | Minor | security | Harsh J | Harsh J | -| [HDFS-11295](https://issues.apache.org/jira/browse/HDFS-11295) | Check storage remaining instead of node remaining in BlockPlacementPolicyDefault.chooseReplicaToDelete() | Major | namenode | Xiao Liang | Elek, Marton | +| [HDFS-11295](https://issues.apache.org/jira/browse/HDFS-11295) | Check storage remaining instead of node remaining in BlockPlacementPolicyDefault.chooseReplicaToDelete() | Major | namenode | X. Liang | Elek, Marton | | [HADOOP-14083](https://issues.apache.org/jira/browse/HADOOP-14083) | KMS should support old SSL clients | Minor | kms | John Zhuge | John Zhuge | | [HADOOP-14127](https://issues.apache.org/jira/browse/HADOOP-14127) | Add log4j configuration to enable logging in hadoop-distcp's tests | Minor | test | Xiao Chen | Xiao Chen | | [HDFS-11466](https://issues.apache.org/jira/browse/HDFS-11466) | Change dfs.namenode.write-lock-reporting-threshold-ms default from 1000ms to 5000ms | Major | namenode | Andrew Wang | Andrew Wang | @@ -300,9 +317,9 @@ | [HADOOP-14233](https://issues.apache.org/jira/browse/HADOOP-14233) | Delay construction of PreCondition.check failure message in Configuration#set | Major | . | Jonathan Eagles | Jonathan Eagles | | [HADOOP-14240](https://issues.apache.org/jira/browse/HADOOP-14240) | Configuration#get return value optimization | Major | . | Jonathan Eagles | Jonathan Eagles | | [YARN-6339](https://issues.apache.org/jira/browse/YARN-6339) | Improve performance for createAndGetApplicationReport | Major | . | yunjiong zhao | yunjiong zhao | -| [HDFS-11170](https://issues.apache.org/jira/browse/HDFS-11170) | Add builder-based create API to FileSystem | Major | . | SammiChen | SammiChen | +| [HDFS-11170](https://issues.apache.org/jira/browse/HDFS-11170) | Add builder-based create API to FileSystem | Major | . | Sammi Chen | Sammi Chen | | [YARN-6329](https://issues.apache.org/jira/browse/YARN-6329) | Remove unnecessary TODO comment from AppLogAggregatorImpl.java | Minor | . | Akira Ajisaka | victor bertschinger | -| [HDFS-9705](https://issues.apache.org/jira/browse/HDFS-9705) | Refine the behaviour of getFileChecksum when length = 0 | Minor | . | Kai Zheng | SammiChen | +| [HDFS-9705](https://issues.apache.org/jira/browse/HDFS-9705) | Refine the behaviour of getFileChecksum when length = 0 | Minor | . | Kai Zheng | Sammi Chen | | [HDFS-11551](https://issues.apache.org/jira/browse/HDFS-11551) | Handle SlowDiskReport from DataNode at the NameNode | Major | hdfs | Hanisha Koneru | Hanisha Koneru | | [HDFS-11603](https://issues.apache.org/jira/browse/HDFS-11603) | Improve slow mirror/disk warnings in BlockReceiver | Major | datanode | Arpit Agarwal | Arpit Agarwal | | [HDFS-11560](https://issues.apache.org/jira/browse/HDFS-11560) | Expose slow disks via NameNode JMX | Major | namenode | Hanisha Koneru | Hanisha Koneru | @@ -380,11 +397,9 @@ | [HADOOP-14443](https://issues.apache.org/jira/browse/HADOOP-14443) | Azure: Support retry and client side failover for authorization, SASKey and delegation token generation | Major | fs/azure | Santhosh G Nayak | Santhosh G Nayak | | [HADOOP-14535](https://issues.apache.org/jira/browse/HADOOP-14535) | wasb: implement high-performance random access and seek of block blobs | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | | [HADOOP-14629](https://issues.apache.org/jira/browse/HADOOP-14629) | Improve exception checking in FileContext related JUnit tests | Major | fs, test | Andras Bokor | Andras Bokor | -| [HDFS-6874](https://issues.apache.org/jira/browse/HDFS-6874) | Add GETFILEBLOCKLOCATIONS operation to HttpFS | Major | httpfs | Gao Zhong Liang | Weiwei Yang | | [YARN-6689](https://issues.apache.org/jira/browse/YARN-6689) | PlacementRule should be configurable | Major | . | Jonathan Hung | Jonathan Hung | | [HDFS-12130](https://issues.apache.org/jira/browse/HDFS-12130) | Optimizing permission check for getContentSummary | Major | namenode | Chen Liang | Chen Liang | | [HDFS-12137](https://issues.apache.org/jira/browse/HDFS-12137) | DN dataset lock should be fair | Critical | datanode | Daryn Sharp | Daryn Sharp | -| [HADOOP-14521](https://issues.apache.org/jira/browse/HADOOP-14521) | KMS client needs retry logic | Major | . | Rushabh S Shah | Rushabh S Shah | | [YARN-6280](https://issues.apache.org/jira/browse/YARN-6280) | Introduce deselect query param to skip ResourceRequest from getApp/getApps REST API | Major | resourcemanager, restapi | Lantao Jin | Lantao Jin | | [HDFS-12138](https://issues.apache.org/jira/browse/HDFS-12138) | Remove redundant 'public' modifiers from BlockCollection | Trivial | namenode | Chen Liang | Chen Liang | | [HADOOP-14640](https://issues.apache.org/jira/browse/HADOOP-14640) | Azure: Support affinity for service running on localhost and reuse SPNEGO hadoop.auth cookie for authorization, SASKey and delegation token generation | Major | fs/azure | Santhosh G Nayak | Santhosh G Nayak | @@ -406,12 +421,12 @@ | [HADOOP-14709](https://issues.apache.org/jira/browse/HADOOP-14709) | Fix checkstyle warnings in ContractTestUtils | Minor | test | Steve Loughran | Thomas Marquardt | | [MAPREDUCE-6914](https://issues.apache.org/jira/browse/MAPREDUCE-6914) | Tests use assertTrue(....equals(...)) instead of assertEquals() | Minor | test | Daniel Templeton | Daniel Templeton | | [YARN-6832](https://issues.apache.org/jira/browse/YARN-6832) | Tests use assertTrue(....equals(...)) instead of assertEquals() | Minor | test | Daniel Templeton | Daniel Templeton | -| [HDFS-12131](https://issues.apache.org/jira/browse/HDFS-12131) | Add some of the FSNamesystem JMX values as metrics | Minor | hdfs, namenode | Erik Krogen | Erik Krogen | | [HADOOP-14706](https://issues.apache.org/jira/browse/HADOOP-14706) | Adding a helper method to determine whether a log is Log4j implement | Minor | util | Wenxin He | Wenxin He | | [HDFS-12251](https://issues.apache.org/jira/browse/HDFS-12251) | Add document for StreamCapabilities | Major | . | Lei (Eddy) Xu | Lei (Eddy) Xu | | [YARN-6634](https://issues.apache.org/jira/browse/YARN-6634) | [API] Refactor ResourceManager WebServices to make API explicit | Critical | resourcemanager | Subru Krishnan | Giovanni Matteo Fumarola | | [YARN-6802](https://issues.apache.org/jira/browse/YARN-6802) | Add Max AM Resource and AM Resource Usage to Leaf Queue View in FairScheduler WebUI | Major | fairscheduler | YunFan Zhou | YunFan Zhou | | [HDFS-12264](https://issues.apache.org/jira/browse/HDFS-12264) | DataNode uses a deprecated method IoUtils#cleanup. | Major | . | Ajay Kumar | Ajay Kumar | +| [YARN-6757](https://issues.apache.org/jira/browse/YARN-6757) | Refactor the usage of yarn.nodemanager.linux-container-executor.cgroups.mount-path | Minor | nodemanager | Miklos Szegedi | Miklos Szegedi | | [YARN-6811](https://issues.apache.org/jira/browse/YARN-6811) | [ATS1.5] All history logs should be kept under its own User Directory. | Major | timelineclient, timelineserver | Rohith Sharma K S | Rohith Sharma K S | | [YARN-6879](https://issues.apache.org/jira/browse/YARN-6879) | TestLeafQueue.testDRFUserLimits() has commented out code | Trivial | capacity scheduler, test | Daniel Templeton | Angela Wang | | [MAPREDUCE-6923](https://issues.apache.org/jira/browse/MAPREDUCE-6923) | Optimize MapReduce Shuffle I/O for small partitions | Major | . | Robert Schmidtke | Robert Schmidtke | @@ -421,10 +436,67 @@ | [YARN-6917](https://issues.apache.org/jira/browse/YARN-6917) | Queue path is recomputed from scratch on every allocation | Minor | capacityscheduler | Jason Lowe | Eric Payne | | [HADOOP-14662](https://issues.apache.org/jira/browse/HADOOP-14662) | Update azure-storage sdk to version 5.4.0 | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | | [YARN-3254](https://issues.apache.org/jira/browse/YARN-3254) | HealthReport should include disk full information | Major | nodemanager | Akira Ajisaka | Suma Shivaprasad | +| [HDFS-12117](https://issues.apache.org/jira/browse/HDFS-12117) | HttpFS does not seem to support SNAPSHOT related methods for WebHDFS REST Interface | Major | httpfs | Wellington Chevreuil | Wellington Chevreuil | | [YARN-7053](https://issues.apache.org/jira/browse/YARN-7053) | Move curator transaction support to ZKCuratorManager | Major | . | Jonathan Hung | Jonathan Hung | | [HADOOP-14251](https://issues.apache.org/jira/browse/HADOOP-14251) | Credential provider should handle property key deprecation | Critical | security | John Zhuge | John Zhuge | | [YARN-7049](https://issues.apache.org/jira/browse/YARN-7049) | FSAppAttempt preemption related fields have confusing names | Major | fairscheduler | Karthik Kambatla | Karthik Kambatla | | [YARN-6999](https://issues.apache.org/jira/browse/YARN-6999) | Add log about how to solve Error: Could not find or load main class org.apache.hadoop.mapreduce.v2.app.MRAppMaster | Minor | documentation, security | Linlin Zhou | Linlin Zhou | +| [YARN-7037](https://issues.apache.org/jira/browse/YARN-7037) | Optimize data transfer with zero-copy approach for containerlogs REST API in NMWebServices | Major | nodemanager | Tao Yang | Tao Yang | +| [MAPREDUCE-6937](https://issues.apache.org/jira/browse/MAPREDUCE-6937) | Backport MAPREDUCE-6870 to branch-2 while preserving compatibility | Major | . | Zhe Zhang | Peter Bacsko | +| [YARN-6780](https://issues.apache.org/jira/browse/YARN-6780) | ResourceWeights.toString() cleanup | Minor | scheduler | Daniel Templeton | weiyuan | +| [YARN-5547](https://issues.apache.org/jira/browse/YARN-5547) | NMLeveldbStateStore should be more tolerant of unknown keys | Major | nodemanager | Jason Lowe | Ajith S | +| [HDFS-12182](https://issues.apache.org/jira/browse/HDFS-12182) | BlockManager.metaSave does not distinguish between "under replicated" and "missing" blocks | Trivial | hdfs | Wellington Chevreuil | Wellington Chevreuil | +| [HADOOP-14688](https://issues.apache.org/jira/browse/HADOOP-14688) | Intern strings in KeyVersion and EncryptedKeyVersion | Major | kms | Xiao Chen | Xiao Chen | +| [HADOOP-14827](https://issues.apache.org/jira/browse/HADOOP-14827) | Allow StopWatch to accept a Timer parameter for tests | Minor | common, test | Erik Krogen | Erik Krogen | +| [HDFS-12131](https://issues.apache.org/jira/browse/HDFS-12131) | Add some of the FSNamesystem JMX values as metrics | Minor | hdfs, namenode | Erik Krogen | Erik Krogen | +| [HADOOP-14844](https://issues.apache.org/jira/browse/HADOOP-14844) | Remove requirement to specify TenantGuid for MSI Token Provider | Major | fs/adl | Atul Sikaria | Atul Sikaria | +| [YARN-6799](https://issues.apache.org/jira/browse/YARN-6799) | Remove the duplicated code in CGroupsHandlerImp.java | Trivial | nodemanager | Yufei Gu | weiyuan | +| [HADOOP-14520](https://issues.apache.org/jira/browse/HADOOP-14520) | WASB: Block compaction for Azure Block Blobs | Major | fs/azure | Georgi Chalakov | Georgi Chalakov | +| [HADOOP-14839](https://issues.apache.org/jira/browse/HADOOP-14839) | DistCp log output should contain copied and deleted files and directories | Major | tools/distcp | Konstantin Shaposhnikov | Yiqun Lin | +| [HADOOP-14843](https://issues.apache.org/jira/browse/HADOOP-14843) | Improve FsPermission symbolic parsing unit test coverage | Minor | fs | Jason Lowe | Bharat Viswanadham | +| [YARN-7057](https://issues.apache.org/jira/browse/YARN-7057) | FSAppAttempt#getResourceUsage doesn't need to consider resources queued for preemption | Major | fairscheduler | Karthik Kambatla | Karthik Kambatla | +| [HADOOP-14864](https://issues.apache.org/jira/browse/HADOOP-14864) | FSDataInputStream#unbuffer UOE should include stream class name | Minor | fs | John Zhuge | Bharat Viswanadham | +| [HDFS-12441](https://issues.apache.org/jira/browse/HDFS-12441) | Suppress UnresolvedPathException in namenode log | Minor | . | Kihwal Lee | Kihwal Lee | +| [HDFS-12472](https://issues.apache.org/jira/browse/HDFS-12472) | Add JUNIT timeout to TestBlockStatsMXBean | Minor | . | Lei (Eddy) Xu | Bharat Viswanadham | +| [MAPREDUCE-6958](https://issues.apache.org/jira/browse/MAPREDUCE-6958) | Shuffle audit logger should log size of shuffle transfer | Minor | . | Jason Lowe | Jason Lowe | +| [HADOOP-7308](https://issues.apache.org/jira/browse/HADOOP-7308) | Remove unused TaskLogAppender configurations from log4j.properties | Major | conf | Todd Lipcon | Todd Lipcon | +| [HDFS-12496](https://issues.apache.org/jira/browse/HDFS-12496) | Make QuorumJournalManager timeout properties configurable | Major | . | Ajay Kumar | Ajay Kumar | +| [YARN-7045](https://issues.apache.org/jira/browse/YARN-7045) | Remove FSLeafQueue#addAppSchedulable | Major | fairscheduler | Yufei Gu | Sen Zhao | +| [HDFS-12530](https://issues.apache.org/jira/browse/HDFS-12530) | Processor argument in Offline Image Viewer should be case insensitive | Minor | tools | Hanisha Koneru | Hanisha Koneru | +| [YARN-7240](https://issues.apache.org/jira/browse/YARN-7240) | Add more states and transitions to stabilize the NM Container state machine | Major | . | Arun Suresh | kartheek muthyala | +| [YARN-65](https://issues.apache.org/jira/browse/YARN-65) | Reduce RM app memory footprint once app has completed | Major | resourcemanager | Jason Lowe | Manikandan R | +| [YARN-6333](https://issues.apache.org/jira/browse/YARN-6333) | Improve doc for minSharePreemptionTimeout, fairSharePreemptionTimeout and fairSharePreemptionThreshold | Major | fairscheduler | Yufei Gu | Chetna Chaudhari | +| [HADOOP-14095](https://issues.apache.org/jira/browse/HADOOP-14095) | Document caveats about the default JavaKeyStoreProvider in KMS | Major | documentation, kms | Xiao Chen | Xiao Chen | +| [YARN-4879](https://issues.apache.org/jira/browse/YARN-4879) | Enhance Allocate Protocol to Identify Requests Explicitly | Major | applications, resourcemanager | Subru Krishnan | Subru Krishnan | +| [HDFS-12420](https://issues.apache.org/jira/browse/HDFS-12420) | Add an option to disallow 'namenode format -force' | Major | . | Ajay Kumar | Ajay Kumar | +| [HADOOP-14521](https://issues.apache.org/jira/browse/HADOOP-14521) | KMS client needs retry logic | Major | . | Rushabh S Shah | Rushabh S Shah | +| [YARN-2162](https://issues.apache.org/jira/browse/YARN-2162) | add ability in Fair Scheduler to optionally configure maxResources in terms of percentage | Major | fairscheduler, scheduler | Ashwin Shankar | Yufei Gu | +| [YARN-7207](https://issues.apache.org/jira/browse/YARN-7207) | Cache the RM proxy server address | Major | RM | Yufei Gu | Yufei Gu | +| [HADOOP-14920](https://issues.apache.org/jira/browse/HADOOP-14920) | KMSClientProvider won't work with KMS delegation token retrieved from non-Java client. | Major | kms | Xiaoyu Yao | Xiaoyu Yao | +| [YARN-6930](https://issues.apache.org/jira/browse/YARN-6930) | Admins should be able to explicitly enable specific LinuxContainerRuntime in the NodeManager | Major | nodemanager | Vinod Kumar Vavilapalli | Shane Kumpf | +| [HDFS-12603](https://issues.apache.org/jira/browse/HDFS-12603) | Enable async edit logging by default | Major | namenode | Andrew Wang | Andrew Wang | +| [HDFS-12642](https://issues.apache.org/jira/browse/HDFS-12642) | Log block and datanode details in BlockRecoveryWorker | Major | datanode | Xiao Chen | Xiao Chen | +| [HADOOP-14938](https://issues.apache.org/jira/browse/HADOOP-14938) | Configuration.updatingResource map should be initialized lazily | Major | . | Misha Dmitriev | Misha Dmitriev | +| [YARN-6608](https://issues.apache.org/jira/browse/YARN-6608) | Backport all SLS improvements from trunk to branch-2 | Major | . | Carlo Curino | Carlo Curino | +| [HADOOP-14880](https://issues.apache.org/jira/browse/HADOOP-14880) | [KMS] Document&test missing KMS client side configs | Minor | . | Wei-Chiu Chuang | Gabor Bota | +| [HDFS-12619](https://issues.apache.org/jira/browse/HDFS-12619) | Do not catch and throw unchecked exceptions if IBRs fail to process | Minor | namenode | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-7359](https://issues.apache.org/jira/browse/YARN-7359) | TestAppManager.testQueueSubmitWithNoPermission() should be scheduler agnostic | Minor | . | Haibo Chen | Haibo Chen | +| [HADOOP-14944](https://issues.apache.org/jira/browse/HADOOP-14944) | Add JvmMetrics to KMS | Major | kms | Xiao Chen | Xiao Chen | +| [YARN-7261](https://issues.apache.org/jira/browse/YARN-7261) | Add debug message for better download latency monitoring | Major | nodemanager | Yufei Gu | Yufei Gu | +| [YARN-7357](https://issues.apache.org/jira/browse/YARN-7357) | Several methods in TestZKRMStateStore.TestZKRMStateStoreTester.TestZKRMStateStoreInternal should have @Override annotations | Trivial | resourcemanager | Daniel Templeton | Sen Zhao | +| [YARN-4163](https://issues.apache.org/jira/browse/YARN-4163) | Audit getQueueInfo and getApplications calls | Major | . | Chang Li | Chang Li | +| [HADOOP-9657](https://issues.apache.org/jira/browse/HADOOP-9657) | NetUtils.wrapException to have special handling for 0.0.0.0 addresses and :0 ports | Minor | net | Steve Loughran | Varun Saxena | +| [YARN-7389](https://issues.apache.org/jira/browse/YARN-7389) | Make TestResourceManager Scheduler agnostic | Major | test | Robert Kanter | Robert Kanter | +| [YARN-7358](https://issues.apache.org/jira/browse/YARN-7358) | TestZKConfigurationStore and TestLeveldbConfigurationStore should explicitly set capacity scheduler | Minor | resourcemanager | Haibo Chen | Haibo Chen | +| [YARN-7262](https://issues.apache.org/jira/browse/YARN-7262) | Add a hierarchy into the ZKRMStateStore for delegation token znodes to prevent jute buffer overflow | Major | . | Robert Kanter | Robert Kanter | +| [HADOOP-14870](https://issues.apache.org/jira/browse/HADOOP-14870) | backport HADOOP-14553 parallel tests to branch-2 | Major | fs/azure, test | Steve Loughran | Steve Loughran | +| [HADOOP-14992](https://issues.apache.org/jira/browse/HADOOP-14992) | Upgrade Avro patch version | Major | build | Chris Douglas | Bharat Viswanadham | +| [YARN-6413](https://issues.apache.org/jira/browse/YARN-6413) | FileSystem based Yarn Registry implementation | Major | amrmproxy, api, resourcemanager | Ellen Hui | Ellen Hui | +| [HDFS-12744](https://issues.apache.org/jira/browse/HDFS-12744) | More logs when short-circuit read is failed and disabled | Major | datanode | Weiwei Yang | Weiwei Yang | +| [MAPREDUCE-6975](https://issues.apache.org/jira/browse/MAPREDUCE-6975) | Logging task counters | Major | task | Prabhu Joseph | Prabhu Joseph | +| [HADOOP-14112](https://issues.apache.org/jira/browse/HADOOP-14112) | Über-jira adl:// Azure Data Lake Phase I: Stabilization | Major | fs/adl | Steve Loughran | John Zhuge | +| [HADOOP-14552](https://issues.apache.org/jira/browse/HADOOP-14552) | Über-jira: WASB client phase II: performance and testing | Major | fs/azure | Steve Loughran | Thomas Marquardt | +| [HADOOP-13738](https://issues.apache.org/jira/browse/HADOOP-13738) | DiskChecker should perform some disk IO | Major | . | Arpit Agarwal | Arpit Agarwal | ### BUG FIXES: @@ -435,6 +507,9 @@ | [YARN-524](https://issues.apache.org/jira/browse/YARN-524) | TestYarnVersionInfo failing if generated properties doesn't include an SVN URL | Minor | api | Steve Loughran | Steve Loughran | | [YARN-1471](https://issues.apache.org/jira/browse/YARN-1471) | The SLS simulator is not running the preemption policy for CapacityScheduler | Minor | . | Carlo Curino | Carlo Curino | | [HADOOP-11703](https://issues.apache.org/jira/browse/HADOOP-11703) | git should ignore .DS\_Store files on Mac OS X | Major | . | Abin Shahab | Abin Shahab | +| [HDFS-8797](https://issues.apache.org/jira/browse/HDFS-8797) | WebHdfsFileSystem creates too many connections for pread | Major | webhdfs | Jing Zhao | Jing Zhao | +| [HDFS-9107](https://issues.apache.org/jira/browse/HDFS-9107) | Prevent NN's unrecoverable death spiral after full GC | Critical | namenode | Daryn Sharp | Daryn Sharp | +| [HDFS-9034](https://issues.apache.org/jira/browse/HDFS-9034) | "StorageTypeStats" Metric should not count failed storage. | Major | namenode | Archana T | Surendra Singh Lilhore | | [YARN-4156](https://issues.apache.org/jira/browse/YARN-4156) | TestAMRestart#testAMBlacklistPreventsRestartOnSameNode assumes CapacityScheduler | Major | . | Anubhav Dhoot | Anubhav Dhoot | | [YARN-4417](https://issues.apache.org/jira/browse/YARN-4417) | Make RM and Timeline-server REST APIs more consistent | Major | . | Wangda Tan | Wangda Tan | | [YARN-4522](https://issues.apache.org/jira/browse/YARN-4522) | Queue acl can be checked at app submission | Major | . | Jian He | Jian He | @@ -443,7 +518,7 @@ | [HADOOP-12573](https://issues.apache.org/jira/browse/HADOOP-12573) | TestRPC.testClientBackOff failing | Major | test | Steve Loughran | Xiao Chen | | [HADOOP-12653](https://issues.apache.org/jira/browse/HADOOP-12653) | Use SO\_REUSEADDR to avoid getting "Address already in use" when using kerberos and attempting to bind to any port on the local IP address | Major | net | Colin P. McCabe | Colin P. McCabe | | [YARN-4571](https://issues.apache.org/jira/browse/YARN-4571) | Make app id/name available to the yarn authorizer provider for better auditing | Major | . | Jian He | Jian He | -| [YARN-4551](https://issues.apache.org/jira/browse/YARN-4551) | Address the duplication between StatusUpdateWhenHealthy and StatusUpdateWhenUnhealthy transitions | Minor | nodemanager | Karthik Kambatla | Sunil G | +| [YARN-4551](https://issues.apache.org/jira/browse/YARN-4551) | Address the duplication between StatusUpdateWhenHealthy and StatusUpdateWhenUnhealthy transitions | Minor | nodemanager | Karthik Kambatla | Sunil Govindan | | [HDFS-9517](https://issues.apache.org/jira/browse/HDFS-9517) | Fix missing @Test annotation on TestDistCpUtils.testUnpackAttributes | Trivial | distcp | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-9624](https://issues.apache.org/jira/browse/HDFS-9624) | DataNode start slowly due to the initial DU command operations | Major | . | Yiqun Lin | Yiqun Lin | | [YARN-4611](https://issues.apache.org/jira/browse/YARN-4611) | Fix scheduler load simulator to support multi-layer network location | Major | . | Ming Ma | Ming Ma | @@ -482,7 +557,7 @@ | [MAPREDUCE-6655](https://issues.apache.org/jira/browse/MAPREDUCE-6655) | Fix a typo (STRICT\_IE6) in Encrypted Shuffle | Trivial | documentation | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HADOOP-12954](https://issues.apache.org/jira/browse/HADOOP-12954) | Add a way to change hadoop.security.token.service.use\_ip | Major | security | Robert Kanter | Robert Kanter | | [YARN-4657](https://issues.apache.org/jira/browse/YARN-4657) | Javadoc comment is broken for Resources.multiplyByAndAddTo() | Trivial | . | Daniel Templeton | Daniel Templeton | -| [YARN-4880](https://issues.apache.org/jira/browse/YARN-4880) | Running TestZKRMStateStorePerf with real zookeeper cluster throws NPE | Major | . | Rohith Sharma K S | Sunil G | +| [YARN-4880](https://issues.apache.org/jira/browse/YARN-4880) | Running TestZKRMStateStorePerf with real zookeeper cluster throws NPE | Major | . | Rohith Sharma K S | Sunil Govindan | | [YARN-4609](https://issues.apache.org/jira/browse/YARN-4609) | RM Nodes list page takes too much time to load | Major | webapp | Bibin A Chundatt | Bibin A Chundatt | | [YARN-4906](https://issues.apache.org/jira/browse/YARN-4906) | Capture container start/finish time in container metrics | Major | . | Jian He | Jian He | | [HDFS-10192](https://issues.apache.org/jira/browse/HDFS-10192) | Namenode safemode not coming out during failover | Major | namenode | Brahma Reddy Battula | Brahma Reddy Battula | @@ -525,7 +600,7 @@ | [HDFS-10449](https://issues.apache.org/jira/browse/HDFS-10449) | TestRollingFileSystemSinkWithHdfs#testFailedClose() fails on branch-2 | Major | test | Takanobu Asanuma | Takanobu Asanuma | | [HDFS-10468](https://issues.apache.org/jira/browse/HDFS-10468) | HDFS read ends up ignoring an interrupt | Major | . | Siddharth Seth | Jing Zhao | | [MAPREDUCE-6240](https://issues.apache.org/jira/browse/MAPREDUCE-6240) | Hadoop client displays confusing error message | Major | client | Mohammad Kamrul Islam | Gera Shegalov | -| [YARN-4308](https://issues.apache.org/jira/browse/YARN-4308) | ContainersAggregated CPU resource utilization reports negative usage in first few heartbeats | Major | nodemanager | Sunil G | Sunil G | +| [YARN-4308](https://issues.apache.org/jira/browse/YARN-4308) | ContainersAggregated CPU resource utilization reports negative usage in first few heartbeats | Major | nodemanager | Sunil Govindan | Sunil Govindan | | [HDFS-10508](https://issues.apache.org/jira/browse/HDFS-10508) | DFSInputStream should set thread's interrupt status after catching InterruptException from sleep | Major | . | Jing Zhao | Jing Zhao | | [HADOOP-13243](https://issues.apache.org/jira/browse/HADOOP-13243) | TestRollingFileSystemSink.testSetInitialFlushTime() fails intermittently | Minor | test | Daniel Templeton | Daniel Templeton | | [YARN-5077](https://issues.apache.org/jira/browse/YARN-5077) | Fix FSLeafQueue#getFairShare() for queues with zero fairshare | Major | . | Yufei Gu | Yufei Gu | @@ -548,7 +623,6 @@ | [HADOOP-13254](https://issues.apache.org/jira/browse/HADOOP-13254) | Create framework for configurable disk checkers | Major | util | Yufei Gu | Yufei Gu | | [YARN-5272](https://issues.apache.org/jira/browse/YARN-5272) | Handle queue names consistently in FairScheduler | Major | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | | [YARN-5213](https://issues.apache.org/jira/browse/YARN-5213) | Fix a bug in LogCLIHelpers which cause TestLogsCLI#testFetchApplictionLogs fails intermittently | Major | test | Rohith Sharma K S | Xuan Gong | -| [YARN-5195](https://issues.apache.org/jira/browse/YARN-5195) | RM intermittently crashed with NPE while handling APP\_ATTEMPT\_REMOVED event when async-scheduling enabled in CapacityScheduler | Major | resourcemanager | Karam Singh | sandflee | | [YARN-5441](https://issues.apache.org/jira/browse/YARN-5441) | Fixing minor Scheduler test case failures | Major | . | Subru Krishnan | Subru Krishnan | | [YARN-5440](https://issues.apache.org/jira/browse/YARN-5440) | Use AHSClient in YarnClient when TimelineServer is running | Major | . | Xuan Gong | Xuan Gong | | [YARN-5203](https://issues.apache.org/jira/browse/YARN-5203) | Return ResourceRequest JAXB object in ResourceManager Cluster Applications REST API | Major | . | Subru Krishnan | Ellen Hui | @@ -565,12 +639,14 @@ | [HADOOP-13461](https://issues.apache.org/jira/browse/HADOOP-13461) | NPE in KeyProvider.rollNewVersion | Minor | . | Colm O hEigeartaigh | Colm O hEigeartaigh | | [HADOOP-13441](https://issues.apache.org/jira/browse/HADOOP-13441) | Document LdapGroupsMapping keystore password properties | Minor | security | Wei-Chiu Chuang | Yuanbo Liu | | [YARN-4833](https://issues.apache.org/jira/browse/YARN-4833) | For Queue AccessControlException client retries multiple times on both RM | Major | . | Bibin A Chundatt | Bibin A Chundatt | +| [HDFS-9696](https://issues.apache.org/jira/browse/HDFS-9696) | Garbage snapshot records lingering forever | Critical | . | Kihwal Lee | Kihwal Lee | | [YARN-5521](https://issues.apache.org/jira/browse/YARN-5521) | TestCapacityScheduler#testKillAllAppsInQueue fails randomly | Major | . | Varun Saxena | sandflee | | [HADOOP-13437](https://issues.apache.org/jira/browse/HADOOP-13437) | KMS should reload whitelist and default key ACLs when hot-reloading | Major | kms | Xiao Chen | Xiao Chen | | [YARN-5475](https://issues.apache.org/jira/browse/YARN-5475) | Test failed for TestAggregatedLogFormat on trunk | Major | . | Junping Du | Jun Gong | | [YARN-5523](https://issues.apache.org/jira/browse/YARN-5523) | Yarn running container log fetching causes OutOfMemoryError | Major | log-aggregation | Prasanth Jayachandran | Xuan Gong | | [HADOOP-11786](https://issues.apache.org/jira/browse/HADOOP-11786) | Fix Javadoc typos in org.apache.hadoop.fs.FileSystem | Trivial | documentation | Chen He | Andras Bokor | | [YARN-5526](https://issues.apache.org/jira/browse/YARN-5526) | DrainDispacher#ServiceStop blocked if setDrainEventsOnStop invoked | Major | . | sandflee | sandflee | +| [HDFS-10763](https://issues.apache.org/jira/browse/HDFS-10763) | Open files can leak permanently due to inconsistent lease update | Critical | . | Kihwal Lee | Kihwal Lee | | [YARN-5533](https://issues.apache.org/jira/browse/YARN-5533) | JMX AM Used metrics for queue wrong when app submited to nodelabel partition | Major | . | Bibin A Chundatt | Bibin A Chundatt | | [MAPREDUCE-6762](https://issues.apache.org/jira/browse/MAPREDUCE-6762) | ControlledJob#toString failed with NPE when job status is not successfully updated | Major | . | Weiwei Yang | Weiwei Yang | | [HADOOP-13526](https://issues.apache.org/jira/browse/HADOOP-13526) | Add detailed logging in KMS for the authentication failure of proxy user | Minor | kms | Suraj Acharya | Suraj Acharya | @@ -585,6 +661,7 @@ | [HADOOP-13602](https://issues.apache.org/jira/browse/HADOOP-13602) | Fix some warnings by findbugs in hadoop-maven-plugin | Major | . | Tsuyoshi Ozawa | Tsuyoshi Ozawa | | [YARN-4973](https://issues.apache.org/jira/browse/YARN-4973) | YarnWebParams next.fresh.interval should be next.refresh.interval | Minor | webapp | Daniel Templeton | Daniel Templeton | | [YARN-5662](https://issues.apache.org/jira/browse/YARN-5662) | Provide an option to enable ContainerMonitor | Major | . | Jian He | Jian He | +| [HADOOP-11780](https://issues.apache.org/jira/browse/HADOOP-11780) | Prevent IPC reader thread death | Critical | ipc | Daryn Sharp | Daryn Sharp | | [HADOOP-13164](https://issues.apache.org/jira/browse/HADOOP-13164) | Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories | Minor | fs/s3 | Rajesh Balamohan | Rajesh Balamohan | | [YARN-5693](https://issues.apache.org/jira/browse/YARN-5693) | Reduce loglevel to Debug in ContainerManagementProtocolProxy and AMRMClientImpl | Major | yarn | Yufei Gu | Yufei Gu | | [YARN-5678](https://issues.apache.org/jira/browse/YARN-5678) | Log demand as demand in FSLeafQueue and FSParentQueue | Major | fairscheduler | Yufei Gu | Yufei Gu | @@ -601,18 +678,18 @@ | [YARN-5711](https://issues.apache.org/jira/browse/YARN-5711) | Propogate exceptions back to client when using hedging RM failover provider | Critical | applications, resourcemanager | Subru Krishnan | Subru Krishnan | | [YARN-5754](https://issues.apache.org/jira/browse/YARN-5754) | Null check missing for earliest in FifoPolicy | Major | fairscheduler | Yufei Gu | Yufei Gu | | [HDFS-11040](https://issues.apache.org/jira/browse/HDFS-11040) | Add documentation for HDFS-9820 distcp improvement | Major | distcp | Yongjun Zhang | Yongjun Zhang | +| [YARN-5753](https://issues.apache.org/jira/browse/YARN-5753) | fix NPE in AMRMClientImpl.getMatchingRequests() | Major | yarn | Haibo Chen | Haibo Chen | | [HDFS-9929](https://issues.apache.org/jira/browse/HDFS-9929) | Duplicate keys in NAMENODE\_SPECIFIC\_KEYS | Minor | namenode | Akira Ajisaka | Akira Ajisaka | | [YARN-5752](https://issues.apache.org/jira/browse/YARN-5752) | TestLocalResourcesTrackerImpl#testLocalResourceCache times out | Major | . | Eric Badger | Eric Badger | | [YARN-5710](https://issues.apache.org/jira/browse/YARN-5710) | Fix inconsistent naming in class ResourceRequest | Trivial | yarn | Yufei Gu | Yufei Gu | | [YARN-5686](https://issues.apache.org/jira/browse/YARN-5686) | DefaultContainerExecutor random working dir algorigthm skews results | Minor | . | Miklos Szegedi | Vrushali C | | [MAPREDUCE-6798](https://issues.apache.org/jira/browse/MAPREDUCE-6798) | Fix intermittent failure of TestJobHistoryParsing.testJobHistoryMethods() | Major | jobhistoryserver | Haibo Chen | Haibo Chen | | [YARN-5757](https://issues.apache.org/jira/browse/YARN-5757) | RM REST API documentation is not up to date | Trivial | resourcemanager, yarn | Miklos Szegedi | Miklos Szegedi | -| [YARN-5420](https://issues.apache.org/jira/browse/YARN-5420) | Delete org.apache.hadoop.yarn.server.resourcemanager.resource.Priority as its not necessary | Minor | resourcemanager | Sunil G | Sunil G | +| [YARN-5420](https://issues.apache.org/jira/browse/YARN-5420) | Delete org.apache.hadoop.yarn.server.resourcemanager.resource.Priority as its not necessary | Minor | resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-5172](https://issues.apache.org/jira/browse/YARN-5172) | Update yarn daemonlog documentation due to HADOOP-12847 | Trivial | documentation | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-4388](https://issues.apache.org/jira/browse/YARN-4388) | Cleanup "mapreduce.job.hdfs-servers" from yarn-default.xml | Minor | yarn | Junping Du | Junping Du | | [YARN-2306](https://issues.apache.org/jira/browse/YARN-2306) | Add test for leakage of reservation metrics in fair scheduler | Minor | fairscheduler | Hong Zhiguo | Hong Zhiguo | | [YARN-4743](https://issues.apache.org/jira/browse/YARN-4743) | FairSharePolicy breaks TimSort assumption | Major | fairscheduler | Zephyr Guo | Zephyr Guo | -| [YARN-5793](https://issues.apache.org/jira/browse/YARN-5793) | Trim configuration values in DockerLinuxContainerRuntime | Minor | nodemanager | Tianyin Xu | Tianyin Xu | | [YARN-5809](https://issues.apache.org/jira/browse/YARN-5809) | AsyncDispatcher possibly invokes multiple shutdown thread when handling exception | Major | . | Jian He | Jian He | | [YARN-5805](https://issues.apache.org/jira/browse/YARN-5805) | Add isDebugEnabled check for debug logs in nodemanager | Minor | . | Bibin A Chundatt | Bibin A Chundatt | | [YARN-5788](https://issues.apache.org/jira/browse/YARN-5788) | Apps not activiated and AM limit resource in UI and REST not updated after -replaceLabelsOnNode | Major | . | Bibin A Chundatt | Bibin A Chundatt | @@ -628,7 +705,7 @@ | [YARN-5843](https://issues.apache.org/jira/browse/YARN-5843) | Incorrect documentation for timeline service entityType/events REST end points | Minor | . | Bibin A Chundatt | Bibin A Chundatt | | [YARN-5834](https://issues.apache.org/jira/browse/YARN-5834) | TestNodeStatusUpdater.testNMRMConnectionConf compares nodemanager wait time to the incorrect value | Trivial | . | Miklos Szegedi | Chang Li | | [YARN-5545](https://issues.apache.org/jira/browse/YARN-5545) | Fix issues related to Max App in capacity scheduler | Major | capacity scheduler | Bibin A Chundatt | Bibin A Chundatt | -| [YARN-5825](https://issues.apache.org/jira/browse/YARN-5825) | ProportionalPreemptionalPolicy could use readLock over LeafQueue instead of synchronized block | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-5825](https://issues.apache.org/jira/browse/YARN-5825) | ProportionalPreemptionalPolicy could use readLock over LeafQueue instead of synchronized block | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [YARN-5874](https://issues.apache.org/jira/browse/YARN-5874) | RM -format-state-store and -remove-application-from-state-store commands fail with NPE | Critical | resourcemanager | Varun Saxena | Varun Saxena | | [YARN-5873](https://issues.apache.org/jira/browse/YARN-5873) | RM crashes with NPE if generic application history is enabled | Critical | resourcemanager | Varun Saxena | Varun Saxena | | [MAPREDUCE-6811](https://issues.apache.org/jira/browse/MAPREDUCE-6811) | TestPipeApplication#testSubmitter fails after HADOOP-13802 | Major | test | Brahma Reddy Battula | Brahma Reddy Battula | @@ -649,7 +726,7 @@ | [HADOOP-13675](https://issues.apache.org/jira/browse/HADOOP-13675) | Bug in return value for delete() calls in WASB | Major | fs/azure | Dushyanth | Dushyanth | | [MAPREDUCE-6571](https://issues.apache.org/jira/browse/MAPREDUCE-6571) | JobEndNotification info logs are missing in AM container syslog | Minor | applicationmaster | Prabhu Joseph | Haibo Chen | | [HADOOP-13859](https://issues.apache.org/jira/browse/HADOOP-13859) | TestConfigurationFieldsBase fails for fields that are DEFAULT values of skipped properties. | Major | common | Haibo Chen | Haibo Chen | -| [YARN-5932](https://issues.apache.org/jira/browse/YARN-5932) | Retrospect moveApplicationToQueue in align with YARN-5611 | Major | capacity scheduler, resourcemanager | Sunil G | Sunil G | +| [YARN-5932](https://issues.apache.org/jira/browse/YARN-5932) | Retrospect moveApplicationToQueue in align with YARN-5611 | Major | capacity scheduler, resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-5136](https://issues.apache.org/jira/browse/YARN-5136) | Error in handling event type APP\_ATTEMPT\_REMOVED to the scheduler | Major | . | tangshangwen | Wilfred Spiegelenburg | | [MAPREDUCE-6817](https://issues.apache.org/jira/browse/MAPREDUCE-6817) | The format of job start time in JHS is different from those of submit and finish time | Major | jobhistoryserver | Haibo Chen | Haibo Chen | | [YARN-5963](https://issues.apache.org/jira/browse/YARN-5963) | Spelling errors in logging and exceptions for node manager, client, web-proxy, common, and app history code | Trivial | client, nodemanager | Grant Sohn | Grant Sohn | @@ -670,7 +747,7 @@ | [HADOOP-13943](https://issues.apache.org/jira/browse/HADOOP-13943) | TestCommonConfigurationFields#testCompareXmlAgainstConfigurationClass fails after HADOOP-13863 | Major | test | Brahma Reddy Battula | Brahma Reddy Battula | | [YARN-5962](https://issues.apache.org/jira/browse/YARN-5962) | Spelling errors in logging and exceptions for resource manager code | Trivial | resourcemanager | Grant Sohn | Grant Sohn | | [YARN-5257](https://issues.apache.org/jira/browse/YARN-5257) | Fix unreleased resources and null dereferences | Major | . | Yufei Gu | Yufei Gu | -| [YARN-6001](https://issues.apache.org/jira/browse/YARN-6001) | Improve moveApplicationQueues command line | Major | client | Sunil G | Sunil G | +| [YARN-6001](https://issues.apache.org/jira/browse/YARN-6001) | Improve moveApplicationQueues command line | Major | client | Sunil Govindan | Sunil Govindan | | [YARN-4882](https://issues.apache.org/jira/browse/YARN-4882) | Change the log level to DEBUG for recovering completed applications | Major | resourcemanager | Rohith Sharma K S | Daniel Templeton | | [HDFS-11251](https://issues.apache.org/jira/browse/HDFS-11251) | ConcurrentModificationException during DataNode#refreshVolumes | Major | . | Jason Lowe | Manoj Govindassamy | | [HDFS-11267](https://issues.apache.org/jira/browse/HDFS-11267) | Avoid redefinition of storageDirs in NNStorage and cleanup its accessors in Storage | Major | . | Manoj Govindassamy | Manoj Govindassamy | @@ -702,13 +779,14 @@ | [YARN-6104](https://issues.apache.org/jira/browse/YARN-6104) | RegistrySecurity overrides zookeeper sasl system properties | Major | . | Billie Rinaldi | Billie Rinaldi | | [HADOOP-13858](https://issues.apache.org/jira/browse/HADOOP-13858) | TestGridmixMemoryEmulation and TestResourceUsageEmulators fail on the environment other than Linux or Windows | Major | test | Akira Ajisaka | Akira Ajisaka | | [YARN-6117](https://issues.apache.org/jira/browse/YARN-6117) | SharedCacheManager does not start up | Major | . | Chris Trezzo | Chris Trezzo | -| [YARN-6082](https://issues.apache.org/jira/browse/YARN-6082) | Invalid REST api response for getApps since queueUsagePercentage is coming as INF | Critical | . | Sunil G | Sunil G | +| [YARN-6082](https://issues.apache.org/jira/browse/YARN-6082) | Invalid REST api response for getApps since queueUsagePercentage is coming as INF | Critical | . | Sunil Govindan | Sunil Govindan | | [HDFS-11365](https://issues.apache.org/jira/browse/HDFS-11365) | Log portnumber in PrivilegedNfsGatewayStarter | Minor | nfs | Mukul Kumar Singh | Mukul Kumar Singh | +| [MAPREDUCE-6808](https://issues.apache.org/jira/browse/MAPREDUCE-6808) | Log map attempts as part of shuffle handler audit log | Major | . | Jonathan Eagles | Gergő Pásztor | | [HDFS-11369](https://issues.apache.org/jira/browse/HDFS-11369) | Change exception message in StorageLocationChecker | Minor | datanode | Arpit Agarwal | Arpit Agarwal | | [YARN-4975](https://issues.apache.org/jira/browse/YARN-4975) | Fair Scheduler: exception thrown when a parent queue marked 'parent' has configured child queues | Major | fairscheduler | Ashwin Shankar | Yufei Gu | | [HDFS-11364](https://issues.apache.org/jira/browse/HDFS-11364) | Add a test to verify Audit log entries for setfacl/getfacl commands over FS shell | Major | hdfs, test | Manoj Govindassamy | Manoj Govindassamy | | [HADOOP-13988](https://issues.apache.org/jira/browse/HADOOP-13988) | KMSClientProvider does not work with WebHDFS and Apache Knox w/ProxyUser | Major | common, kms | Greg Senia | Xiaoyu Yao | -| [HADOOP-14029](https://issues.apache.org/jira/browse/HADOOP-14029) | Fix KMSClientProvider for non-secure proxyuser use case | Major | common,kms | Xiaoyu Yao | Xiaoyu Yao | +| [HADOOP-14029](https://issues.apache.org/jira/browse/HADOOP-14029) | Fix KMSClientProvider for non-secure proxyuser use case | Major | kms | Xiaoyu Yao | Xiaoyu Yao | | [YARN-5641](https://issues.apache.org/jira/browse/YARN-5641) | Localizer leaves behind tarballs after container is complete | Major | . | Eric Badger | Eric Badger | | [HDFS-11378](https://issues.apache.org/jira/browse/HDFS-11378) | Verify multiple DataNodes can be decommissioned/maintenance at the same time | Major | hdfs | Manoj Govindassamy | Manoj Govindassamy | | [YARN-6103](https://issues.apache.org/jira/browse/YARN-6103) | Log updates for ZKRMStateStore | Trivial | . | Bibin A Chundatt | Daniel Sturman | @@ -725,12 +803,11 @@ | [YARN-6112](https://issues.apache.org/jira/browse/YARN-6112) | UpdateCallDuration is calculated only when debug logging is enabled | Major | fairscheduler | Yufei Gu | Yufei Gu | | [YARN-6144](https://issues.apache.org/jira/browse/YARN-6144) | FairScheduler: preempted resources can become negative | Blocker | fairscheduler, resourcemanager | Miklos Szegedi | Miklos Szegedi | | [YARN-6118](https://issues.apache.org/jira/browse/YARN-6118) | Add javadoc for Resources.isNone | Minor | scheduler | Karthik Kambatla | Andres Perez | -| [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | Add ability to secure log servlet using proxy users | Major | . | Jeffrey E Rodriguez | Yuanbo Liu | | [YARN-6166](https://issues.apache.org/jira/browse/YARN-6166) | Unnecessary INFO logs in AMRMClientAsyncImpl$CallbackHandlerThread.run | Trivial | . | Grant W | Grant W | | [HADOOP-14055](https://issues.apache.org/jira/browse/HADOOP-14055) | SwiftRestClient includes pass length in exception if auth fails | Minor | security | Marcell Hegedus | Marcell Hegedus | | [HDFS-11403](https://issues.apache.org/jira/browse/HDFS-11403) | Zookeper ACLs on NN HA enabled clusters to be handled consistently | Major | hdfs | Laszlo Puskas | Hanisha Koneru | | [HADOOP-13233](https://issues.apache.org/jira/browse/HADOOP-13233) | help of stat is confusing | Trivial | documentation, fs | Xiaohe Lan | Attila Bukor | -| [HADOOP-14058](https://issues.apache.org/jira/browse/HADOOP-14058) | Fix NativeS3FileSystemContractBaseTest#testDirWithDifferentMarkersWorks | Major | fs/s3, test | Akira Ajisaka | Yiqun Lin | +| [HADOOP-14058](https://issues.apache.org/jira/browse/HADOOP-14058) | Fix NativeS3FileSystemContractBaseTest#testDirWithDifferentMarkersWorks | Minor | fs/s3, test | Akira Ajisaka | Yiqun Lin | | [HDFS-11084](https://issues.apache.org/jira/browse/HDFS-11084) | Add a regression test for sticky bit support of OIV ReverseXML processor | Major | tools | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-11391](https://issues.apache.org/jira/browse/HDFS-11391) | Numeric usernames do no work with WebHDFS FS (write access) | Major | webhdfs | Pierre Villard | Pierre Villard | | [YARN-4212](https://issues.apache.org/jira/browse/YARN-4212) | FairScheduler: Can't create a DRF queue under a FAIR policy queue | Major | . | Arun Suresh | Yufei Gu | @@ -762,6 +839,7 @@ | [YARN-6218](https://issues.apache.org/jira/browse/YARN-6218) | Fix TestAMRMClient when using FairScheduler | Minor | . | Miklos Szegedi | Miklos Szegedi | | [HDFS-11476](https://issues.apache.org/jira/browse/HDFS-11476) | Fix NPE in FsDatasetImpl#checkAndUpdate | Major | datanode | Xiaobing Zhou | Xiaobing Zhou | | [YARN-6271](https://issues.apache.org/jira/browse/YARN-6271) | yarn rmadin -getGroups returns information from standby RM | Critical | yarn | Sumana Sathish | Jian He | +| [YARN-6248](https://issues.apache.org/jira/browse/YARN-6248) | user is not removed from UsersManager’s when app is killed with pending container requests. | Major | . | Eric Payne | Eric Payne | | [HADOOP-14026](https://issues.apache.org/jira/browse/HADOOP-14026) | start-build-env.sh: invalid docker image name | Major | build | Gergő Pásztor | Gergő Pásztor | | [HDFS-11441](https://issues.apache.org/jira/browse/HDFS-11441) | Add escaping to error message in KMS web UI | Minor | security | Aaron T. Myers | Aaron T. Myers | | [YARN-5665](https://issues.apache.org/jira/browse/YARN-5665) | Enhance documentation for yarn.resourcemanager.scheduler.class property | Trivial | documentation | Miklos Szegedi | Yufei Gu | @@ -777,7 +855,7 @@ | [HDFS-11340](https://issues.apache.org/jira/browse/HDFS-11340) | DataNode reconfigure for disks doesn't remove the failed volumes | Major | . | Manoj Govindassamy | Manoj Govindassamy | | [HDFS-11512](https://issues.apache.org/jira/browse/HDFS-11512) | Increase timeout on TestShortCircuitLocalRead#testSkipWithVerifyChecksum | Minor | . | Eric Badger | Eric Badger | | [HDFS-11499](https://issues.apache.org/jira/browse/HDFS-11499) | Decommissioning stuck because of failing recovery | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | -| [HDFS-11395](https://issues.apache.org/jira/browse/HDFS-11395) | RequestHedgingProxyProvider#RequestHedgingInvocationHandler hides the Exception thrown from NameNode | Major | ha | Nandakumar | Nandakumar | +| [HDFS-11395](https://issues.apache.org/jira/browse/HDFS-11395) | RequestHedgingProxyProvider#RequestHedgingInvocationHandler hides the Exception thrown from NameNode | Major | ha | Nanda kumar | Nanda kumar | | [HDFS-11526](https://issues.apache.org/jira/browse/HDFS-11526) | Fix confusing block recovery message | Minor | datanode | Wei-Chiu Chuang | Yiqun Lin | | [HADOOP-14170](https://issues.apache.org/jira/browse/HADOOP-14170) | FileSystemContractBaseTest is not cleaning up test directory clearly | Major | fs | Mingliang Liu | Mingliang Liu | | [YARN-6328](https://issues.apache.org/jira/browse/YARN-6328) | Fix a spelling mistake in CapacityScheduler | Trivial | capacity scheduler | Jin Yibo | Jin Yibo | @@ -815,7 +893,7 @@ | [YARN-6352](https://issues.apache.org/jira/browse/YARN-6352) | Header injections are possible in application proxy servlet | Major | resourcemanager, security | Naganarasimha G R | Naganarasimha G R | | [MAPREDUCE-6862](https://issues.apache.org/jira/browse/MAPREDUCE-6862) | Fragments are not handled correctly by resource limit checking | Minor | . | Chris Trezzo | Chris Trezzo | | [MAPREDUCE-6873](https://issues.apache.org/jira/browse/MAPREDUCE-6873) | MR Job Submission Fails if MR framework application path not on defaultFS | Minor | mrv2 | Erik Krogen | Erik Krogen | -| [HADOOP-14256](https://issues.apache.org/jira/browse/HADOOP-14256) | [S3A DOC] Correct the format for "Seoul" example | Minor | documentation, s3 | Brahma Reddy Battula | Brahma Reddy Battula | +| [HADOOP-14256](https://issues.apache.org/jira/browse/HADOOP-14256) | [S3A DOC] Correct the format for "Seoul" example | Minor | documentation, fs/s3 | Brahma Reddy Battula | Brahma Reddy Battula | | [MAPREDUCE-6850](https://issues.apache.org/jira/browse/MAPREDUCE-6850) | Shuffle Handler keep-alive connections are closed from the server side | Major | . | Jonathan Eagles | Jonathan Eagles | | [MAPREDUCE-6836](https://issues.apache.org/jira/browse/MAPREDUCE-6836) | exception thrown when accessing the job configuration web UI | Minor | webapps | Sangjin Lee | Haibo Chen | | [HDFS-11592](https://issues.apache.org/jira/browse/HDFS-11592) | Closing a file has a wasteful preconditions in NameNode | Major | namenode | Eric Badger | Eric Badger | @@ -867,7 +945,7 @@ | [YARN-6453](https://issues.apache.org/jira/browse/YARN-6453) | fairscheduler-statedump.log gets generated regardless of service | Blocker | fairscheduler, scheduler | Allen Wittenauer | Yufei Gu | | [YARN-6153](https://issues.apache.org/jira/browse/YARN-6153) | keepContainer does not work when AM retry window is set | Major | resourcemanager | kyungwan nam | kyungwan nam | | [HDFS-11689](https://issues.apache.org/jira/browse/HDFS-11689) | New exception thrown by DFSClient#isHDFSEncryptionEnabled broke hacky hive code | Major | . | Yongjun Zhang | Yongjun Zhang | -| [YARN-5889](https://issues.apache.org/jira/browse/YARN-5889) | Improve and refactor user-limit calculation in capacity scheduler | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-5889](https://issues.apache.org/jira/browse/YARN-5889) | Improve and refactor user-limit calculation in capacity scheduler | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [YARN-6500](https://issues.apache.org/jira/browse/YARN-6500) | Do not mount inaccessible cgroups directories in CgroupsLCEResourcesHandler | Major | nodemanager | Miklos Szegedi | Miklos Szegedi | | [HDFS-11691](https://issues.apache.org/jira/browse/HDFS-11691) | Add a proper scheme to the datanode links in NN web UI | Major | . | Kihwal Lee | Kihwal Lee | | [HADOOP-14341](https://issues.apache.org/jira/browse/HADOOP-14341) | Support multi-line value for ssl.server.exclude.cipher.list | Major | . | John Zhuge | John Zhuge | @@ -938,7 +1016,7 @@ | [YARN-6141](https://issues.apache.org/jira/browse/YARN-6141) | ppc64le on Linux doesn't trigger \_\_linux get\_executable codepath | Major | nodemanager | Sonia Garudi | Ayappan | | [HADOOP-14399](https://issues.apache.org/jira/browse/HADOOP-14399) | Configuration does not correctly XInclude absolute file URIs | Blocker | conf | Andrew Wang | Jonathan Eagles | | [HADOOP-14430](https://issues.apache.org/jira/browse/HADOOP-14430) | the accessTime of FileStatus returned by SFTPFileSystem's getFileStatus method is always 0 | Trivial | fs | Hongyuan Li | Hongyuan Li | -| [HDFS-11445](https://issues.apache.org/jira/browse/HDFS-11445) | FSCK shows overall health stauts as corrupt even one replica is corrupt | Critical | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HDFS-11445](https://issues.apache.org/jira/browse/HDFS-11445) | FSCK shows overall health status as corrupt even one replica is corrupt | Critical | . | Brahma Reddy Battula | Brahma Reddy Battula | | [YARN-6643](https://issues.apache.org/jira/browse/YARN-6643) | TestRMFailover fails rarely due to port conflict | Major | test | Robert Kanter | Robert Kanter | | [HDFS-11817](https://issues.apache.org/jira/browse/HDFS-11817) | A faulty node can cause a lease leak and NPE on accessing data | Critical | . | Kihwal Lee | Kihwal Lee | | [YARN-6641](https://issues.apache.org/jira/browse/YARN-6641) | Non-public resource localization on a bad disk causes subsequent containers failure | Major | . | Kuhu Shukla | Kuhu Shukla | @@ -971,7 +1049,7 @@ | [HADOOP-14500](https://issues.apache.org/jira/browse/HADOOP-14500) | Azure: TestFileSystemOperationExceptionHandling{,MultiThreaded} fails | Major | fs/azure, test | Mingliang Liu | Rajesh Balamohan | | [HADOOP-14283](https://issues.apache.org/jira/browse/HADOOP-14283) | Upgrade AWS SDK to 1.11.134 | Critical | fs/s3 | Aaron Fabbri | Aaron Fabbri | | [HADOOP-14511](https://issues.apache.org/jira/browse/HADOOP-14511) | WritableRpcEngine.Invocation#toString NPE on null parameters | Minor | ipc | John Zhuge | John Zhuge | -| [YARN-6585](https://issues.apache.org/jira/browse/YARN-6585) | RM fails to start when upgrading from 2.7 to 2.8 for clusters with node labels. | Blocker | . | Eric Payne | Sunil G | +| [YARN-6585](https://issues.apache.org/jira/browse/YARN-6585) | RM fails to start when upgrading from 2.7 to 2.8 for clusters with node labels. | Blocker | . | Eric Payne | Sunil Govindan | | [YARN-6703](https://issues.apache.org/jira/browse/YARN-6703) | RM startup failure with old state store due to version mismatch | Critical | . | Bibin A Chundatt | Varun Saxena | | [HADOOP-14501](https://issues.apache.org/jira/browse/HADOOP-14501) | Switch from aalto-xml to woodstox to handle odd XML features | Blocker | conf | Andrew Wang | Jonathan Eagles | | [HDFS-11967](https://issues.apache.org/jira/browse/HDFS-11967) | TestJMXGet fails occasionally | Major | . | Arpit Agarwal | Arpit Agarwal | @@ -982,7 +1060,7 @@ | [YARN-6517](https://issues.apache.org/jira/browse/YARN-6517) | Fix warnings from Spotbugs in hadoop-yarn-common | Major | . | Weiwei Yang | Weiwei Yang | | [YARN-6719](https://issues.apache.org/jira/browse/YARN-6719) | Fix findbugs warnings in SLSCapacityScheduler.java | Major | . | Akira Ajisaka | Akira Ajisaka | | [HADOOP-14540](https://issues.apache.org/jira/browse/HADOOP-14540) | Replace MRv1 specific terms in HostsFileReader | Minor | documentation | Akira Ajisaka | hu xiaodong | -| [HDFS-11995](https://issues.apache.org/jira/browse/HDFS-11995) | HDFS Architecture documentation incorrectly describes writing to a local temporary file. | Minor | documentation | Chris Nauroth | Nandakumar | +| [HDFS-11995](https://issues.apache.org/jira/browse/HDFS-11995) | HDFS Architecture documentation incorrectly describes writing to a local temporary file. | Minor | documentation | Chris Nauroth | Nanda kumar | | [HDFS-11736](https://issues.apache.org/jira/browse/HDFS-11736) | OIV tests should not write outside 'target' directory. | Major | . | Konstantin Shvachko | Yiqun Lin | | [YARN-6713](https://issues.apache.org/jira/browse/YARN-6713) | Fix dead link in the Javadoc of FairSchedulerEventLog.java | Minor | documentation | Akira Ajisaka | Weiwei Yang | | [HADOOP-14533](https://issues.apache.org/jira/browse/HADOOP-14533) | Size of args cannot be less than zero in TraceAdmin#run as its linkedlist | Trivial | common, tracing | Weisen Han | Weisen Han | @@ -991,6 +1069,7 @@ | [HDFS-12010](https://issues.apache.org/jira/browse/HDFS-12010) | TestCopyPreserveFlag fails consistently because of mismatch in access time | Major | hdfs, test | Mukul Kumar Singh | Mukul Kumar Singh | | [HADOOP-14568](https://issues.apache.org/jira/browse/HADOOP-14568) | GenericTestUtils#waitFor missing parameter verification | Major | test | Yiqun Lin | Yiqun Lin | | [HADOOP-14146](https://issues.apache.org/jira/browse/HADOOP-14146) | KerberosAuthenticationHandler should authenticate with SPN in AP-REQ | Major | security | Daryn Sharp | Daryn Sharp | +| [YARN-5876](https://issues.apache.org/jira/browse/YARN-5876) | TestResourceTrackerService#testGracefulDecommissionWithApp fails intermittently on trunk | Major | . | Varun Saxena | Robert Kanter | | [HADOOP-14543](https://issues.apache.org/jira/browse/HADOOP-14543) | ZKFC should use getAversion() while setting the zkacl | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | | [YARN-5006](https://issues.apache.org/jira/browse/YARN-5006) | ResourceManager quit due to ApplicationStateData exceed the limit size of znode in zk | Critical | resourcemanager | dongtingting | Bibin A Chundatt | | [HADOOP-14461](https://issues.apache.org/jira/browse/HADOOP-14461) | Azure: handle failure gracefully in case of missing account access key | Major | fs/azure | Mingliang Liu | Mingliang Liu | @@ -1015,7 +1094,7 @@ | [YARN-6797](https://issues.apache.org/jira/browse/YARN-6797) | TimelineWriter does not fully consume the POST response | Major | timelineclient | Jason Lowe | Jason Lowe | | [HDFS-11502](https://issues.apache.org/jira/browse/HDFS-11502) | Datanode UI should display hostname based on JMX bean instead of window.location.hostname | Major | hdfs | Jeffrey E Rodriguez | Jeffrey E Rodriguez | | [HADOOP-14646](https://issues.apache.org/jira/browse/HADOOP-14646) | FileContextMainOperationsBaseTest#testListStatusFilterWithSomeMatches never runs | Minor | test | Andras Bokor | Andras Bokor | -| [HADOOP-14658](https://issues.apache.org/jira/browse/HADOOP-14658) | branch-2 compilation is broken in hadoop-azure | Blocker | build, fs/azure | Sunil G | Sunil G | +| [HADOOP-14658](https://issues.apache.org/jira/browse/HADOOP-14658) | branch-2 compilation is broken in hadoop-azure | Blocker | build, fs/azure | Sunil Govindan | Sunil Govindan | | [MAPREDUCE-6697](https://issues.apache.org/jira/browse/MAPREDUCE-6697) | Concurrent task limits should only be applied when necessary | Major | mrv2 | Jason Lowe | Nathan Roberts | | [YARN-6654](https://issues.apache.org/jira/browse/YARN-6654) | RollingLevelDBTimelineStore backwards incompatible after fst upgrade | Blocker | . | Jonathan Eagles | Jonathan Eagles | | [YARN-6805](https://issues.apache.org/jira/browse/YARN-6805) | NPE in LinuxContainerExecutor due to null PrivilegedOperationException exit code | Major | nodemanager | Jason Lowe | Jason Lowe | @@ -1028,7 +1107,7 @@ | [YARN-6714](https://issues.apache.org/jira/browse/YARN-6714) | IllegalStateException while handling APP\_ATTEMPT\_REMOVED event when async-scheduling enabled in CapacityScheduler | Major | . | Tao Yang | Tao Yang | | [MAPREDUCE-6889](https://issues.apache.org/jira/browse/MAPREDUCE-6889) | Add Job#close API to shutdown MR client services. | Major | . | Rohith Sharma K S | Rohith Sharma K S | | [MAPREDUCE-6910](https://issues.apache.org/jira/browse/MAPREDUCE-6910) | MapReduceTrackingUriPlugin can not return the right URI of history server with HTTPS | Major | jobhistoryserver | Lantao Jin | Lantao Jin | -| [HDFS-12154](https://issues.apache.org/jira/browse/HDFS-12154) | Incorrect javadoc description in StorageLocationChecker#check | Major | . | Nandakumar | Nandakumar | +| [HDFS-12154](https://issues.apache.org/jira/browse/HDFS-12154) | Incorrect javadoc description in StorageLocationChecker#check | Major | . | Nanda kumar | Nanda kumar | | [YARN-6798](https://issues.apache.org/jira/browse/YARN-6798) | Fix NM startup failure with old state store due to version mismatch | Major | nodemanager | Ray Chiang | Botong Huang | | [HADOOP-14637](https://issues.apache.org/jira/browse/HADOOP-14637) | GenericTestUtils.waitFor needs to check condition again after max wait time | Major | . | Daniel Templeton | Daniel Templeton | | [YARN-6819](https://issues.apache.org/jira/browse/YARN-6819) | Application report fails if app rejected due to nodesize | Major | . | Bibin A Chundatt | Bibin A Chundatt | @@ -1042,7 +1121,6 @@ | [YARN-6844](https://issues.apache.org/jira/browse/YARN-6844) | AMRMClientImpl.checkNodeLabelExpression() has wrong error message | Minor | . | Daniel Templeton | Manikandan R | | [YARN-6150](https://issues.apache.org/jira/browse/YARN-6150) | TestContainerManagerSecurity tests for Yarn Server are flakey | Major | test | Daniel Sturman | Daniel Sturman | | [YARN-6307](https://issues.apache.org/jira/browse/YARN-6307) | Refactor FairShareComparator#compare | Major | fairscheduler | Yufei Gu | Yufei Gu | -| [YARN-6102](https://issues.apache.org/jira/browse/YARN-6102) | RMActiveService context to be updated with new RMContext on failover | Critical | . | Ajith S | Rohith Sharma K S | | [HADOOP-14578](https://issues.apache.org/jira/browse/HADOOP-14578) | Bind IPC connections to kerberos UPN host for proxy users | Major | ipc | Daryn Sharp | Daryn Sharp | | [HDFS-11896](https://issues.apache.org/jira/browse/HDFS-11896) | Non-dfsUsed will be doubled on dead node re-registration | Blocker | . | Brahma Reddy Battula | Brahma Reddy Battula | | [YARN-6870](https://issues.apache.org/jira/browse/YARN-6870) | Fix floating point inaccuracies in resource availability check in AllocationBasedResourceUtilizationTracker | Major | api, nodemanager | Brook Zhou | Brook Zhou | @@ -1052,11 +1130,12 @@ | [YARN-6628](https://issues.apache.org/jira/browse/YARN-6628) | Unexpected jackson-core-2.2.3 dependency introduced | Blocker | timelineserver | Jason Lowe | Jonathan Eagles | | [HADOOP-14644](https://issues.apache.org/jira/browse/HADOOP-14644) | Increase max heap size of Maven javadoc plugin | Major | test | Andras Bokor | Andras Bokor | | [MAPREDUCE-6921](https://issues.apache.org/jira/browse/MAPREDUCE-6921) | TestUmbilicalProtocolWithJobToken#testJobTokenRpc fails | Major | . | Sonia Garudi | Sonia Garudi | +| [HADOOP-14676](https://issues.apache.org/jira/browse/HADOOP-14676) | Wrong default value for "fs.df.interval" | Major | common, conf, fs | Konstantin Shvachko | Sherwood Zheng | | [HADOOP-14701](https://issues.apache.org/jira/browse/HADOOP-14701) | Configuration can log misleading warnings about an attempt to override final parameter | Major | conf | Andrew Sherman | Andrew Sherman | -| [YARN-5731](https://issues.apache.org/jira/browse/YARN-5731) | Preemption calculation is not accurate when reserved containers are present in queue. | Major | capacity scheduler | Sunil G | Wangda Tan | +| [YARN-5731](https://issues.apache.org/jira/browse/YARN-5731) | Preemption calculation is not accurate when reserved containers are present in queue. | Major | capacity scheduler | Sunil Govindan | Wangda Tan | | [HADOOP-14683](https://issues.apache.org/jira/browse/HADOOP-14683) | FileStatus.compareTo binary compatible issue | Blocker | . | Sergey Shelukhin | Akira Ajisaka | | [HADOOP-14702](https://issues.apache.org/jira/browse/HADOOP-14702) | Fix formatting issue and regression caused by conversion from APT to Markdown | Minor | documentation | Doris Gu | Doris Gu | -| [YARN-6872](https://issues.apache.org/jira/browse/YARN-6872) | Ensure apps could run given NodeLabels are disabled post RM switchover/restart | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-6872](https://issues.apache.org/jira/browse/YARN-6872) | Ensure apps could run given NodeLabels are disabled post RM switchover/restart | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-12217](https://issues.apache.org/jira/browse/HDFS-12217) | HDFS snapshots doesn't capture all open files when one of the open files is deleted | Major | snapshots | Manoj Govindassamy | Manoj Govindassamy | | [YARN-6846](https://issues.apache.org/jira/browse/YARN-6846) | Nodemanager can fail to fully delete application local directories when applications are killed | Critical | nodemanager | Jason Lowe | Jason Lowe | | [YARN-6678](https://issues.apache.org/jira/browse/YARN-6678) | Handle IllegalStateException in Async Scheduling mode of CapacityScheduler | Major | capacityscheduler | Tao Yang | Tao Yang | @@ -1067,8 +1146,8 @@ | [YARN-6920](https://issues.apache.org/jira/browse/YARN-6920) | Fix resource leak that happens during container re-initialization. | Major | nodemanager | Arun Suresh | Arun Suresh | | [HDFS-12198](https://issues.apache.org/jira/browse/HDFS-12198) | Document missing namenode metrics that were added recently | Minor | documentation | Yiqun Lin | Yiqun Lin | | [HADOOP-14680](https://issues.apache.org/jira/browse/HADOOP-14680) | Azure: IndexOutOfBoundsException in BlockBlobInputStream | Minor | fs/azure | Rajesh Balamohan | Thomas Marquardt | -| [YARN-6757](https://issues.apache.org/jira/browse/YARN-6757) | Refactor the usage of yarn.nodemanager.linux-container-executor.cgroups.mount-path | Minor | nodemanager | Miklos Szegedi | Miklos Szegedi | | [MAPREDUCE-6927](https://issues.apache.org/jira/browse/MAPREDUCE-6927) | MR job should only set tracking url if history was successfully written | Major | . | Eric Badger | Eric Badger | +| [HDFS-10326](https://issues.apache.org/jira/browse/HDFS-10326) | Disable setting tcp socket send/receive buffers for write pipelines | Major | datanode, hdfs | Daryn Sharp | Daryn Sharp | | [HADOOP-14598](https://issues.apache.org/jira/browse/HADOOP-14598) | Blacklist Http/HttpsFileSystem in FsUrlStreamHandlerFactory | Major | fs/azure, test | Steve Loughran | Steve Loughran | | [HDFS-12157](https://issues.apache.org/jira/browse/HDFS-12157) | Do fsyncDirectory(..) outside of FSDataset lock | Critical | datanode | Vinayakumar B | Vinayakumar B | | [HDFS-12278](https://issues.apache.org/jira/browse/HDFS-12278) | LeaseManager operations are inefficient in 2.8. | Blocker | namenode | Rushabh S Shah | Rushabh S Shah | @@ -1077,6 +1156,7 @@ | [YARN-5927](https://issues.apache.org/jira/browse/YARN-5927) | BaseContainerManagerTest::waitForNMContainerState timeout accounting is not accurate | Trivial | . | Miklos Szegedi | Kai Sasaki | | [YARN-6967](https://issues.apache.org/jira/browse/YARN-6967) | Limit application attempt's diagnostic message size thoroughly | Major | resourcemanager | Chengbing Liu | Chengbing Liu | | [HDFS-11303](https://issues.apache.org/jira/browse/HDFS-11303) | Hedged read might hang infinitely if read data from all DN failed | Major | hdfs-client | Chen Zhang | Chen Zhang | +| [YARN-6996](https://issues.apache.org/jira/browse/YARN-6996) | Change javax.cache library implementation from JSR107 to Apache Geronimo | Blocker | . | Ray Chiang | Ray Chiang | | [YARN-6987](https://issues.apache.org/jira/browse/YARN-6987) | Log app attempt during InvalidStateTransition | Major | . | Jonathan Eagles | Jonathan Eagles | | [HADOOP-14773](https://issues.apache.org/jira/browse/HADOOP-14773) | Extend ZKCuratorManager API for more reusability | Major | . | Íñigo Goiri | Íñigo Goiri | | [YARN-6965](https://issues.apache.org/jira/browse/YARN-6965) | Duplicate instantiation in FairSchedulerQueueInfo | Minor | fairscheduler | Masahiro Tanaka | Masahiro Tanaka | @@ -1100,12 +1180,135 @@ | [HDFS-12215](https://issues.apache.org/jira/browse/HDFS-12215) | DataNode#transferBlock does not create its daemon in the xceiver thread group | Major | datanode | Lei (Eddy) Xu | Lei (Eddy) Xu | | [YARN-7051](https://issues.apache.org/jira/browse/YARN-7051) | Avoid concurrent modification exception in FifoIntraQueuePreemptionPlugin | Critical | capacity scheduler, scheduler preemption, yarn | Eric Payne | Eric Payne | | [YARN-7099](https://issues.apache.org/jira/browse/YARN-7099) | ResourceHandlerModule.parseConfiguredCGroupPath only works for privileged yarn users. | Minor | nodemanager | Miklos Szegedi | Miklos Szegedi | +| [YARN-7112](https://issues.apache.org/jira/browse/YARN-7112) | TestAMRMProxy is failing with invalid request | Major | . | Jason Lowe | Jason Lowe | +| [HDFS-12293](https://issues.apache.org/jira/browse/HDFS-12293) | DataNode should log file name on disk error | Major | datanode | Wei-Chiu Chuang | Ajay Kumar | +| [YARN-7076](https://issues.apache.org/jira/browse/YARN-7076) | yarn application -list -appTypes \ is not working | Blocker | . | Jian He | Jian He | +| [YARN-5816](https://issues.apache.org/jira/browse/YARN-5816) | TestDelegationTokenRenewer#testCancelWithMultipleAppSubmissions is still flakey | Minor | resourcemanager, test | Daniel Templeton | Robert Kanter | +| [MAPREDUCE-6641](https://issues.apache.org/jira/browse/MAPREDUCE-6641) | TestTaskAttempt fails in trunk | Major | test | Tsuyoshi Ozawa | Haibo Chen | +| [YARN-6756](https://issues.apache.org/jira/browse/YARN-6756) | ContainerRequest#executionTypeRequest causes NPE | Critical | . | Jian He | Jian He | +| [HDFS-12191](https://issues.apache.org/jira/browse/HDFS-12191) | Provide option to not capture the accessTime change of a file to snapshot if no other modification has been done to this file | Major | hdfs, namenode | Yongjun Zhang | Yongjun Zhang | +| [YARN-6982](https://issues.apache.org/jira/browse/YARN-6982) | Potential issue on setting AMContainerSpec#tokenConf to null before app is completed | Major | . | Rohith Sharma K S | Manikandan R | +| [HDFS-12336](https://issues.apache.org/jira/browse/HDFS-12336) | Listing encryption zones still fails when deleted EZ is not a direct child of snapshottable directory | Minor | encryption, hdfs | Wellington Chevreuil | Wellington Chevreuil | +| [HADOOP-14814](https://issues.apache.org/jira/browse/HADOOP-14814) | Fix incompatible API change on FsServerDefaults to HADOOP-14104 | Blocker | . | Junping Du | Junping Du | +| [MAPREDUCE-6931](https://issues.apache.org/jira/browse/MAPREDUCE-6931) | Remove TestDFSIO "Total Throughput" calculation | Critical | benchmarks, test | Dennis Huo | Dennis Huo | +| [YARN-7077](https://issues.apache.org/jira/browse/YARN-7077) | TestAMSimulator and TestNMSimulator fail | Major | test | Akira Ajisaka | Akira Ajisaka | +| [YARN-7116](https://issues.apache.org/jira/browse/YARN-7116) | CapacityScheduler Web UI: Queue's AM usage is always show on per-user's AM usage. | Major | capacity scheduler, webapp | Wangda Tan | Wangda Tan | +| [HDFS-12317](https://issues.apache.org/jira/browse/HDFS-12317) | HDFS metrics render error in the page of Github | Minor | documentation, metrics | Yiqun Lin | Yiqun Lin | +| [HADOOP-14824](https://issues.apache.org/jira/browse/HADOOP-14824) | Update ADLS SDK to 2.2.2 for MSI fix | Major | fs/adl | Atul Sikaria | Atul Sikaria | +| [YARN-7120](https://issues.apache.org/jira/browse/YARN-7120) | CapacitySchedulerPage NPE in "Aggregate scheduler counts" section | Minor | . | Eric Payne | Eric Payne | +| [YARN-7023](https://issues.apache.org/jira/browse/YARN-7023) | Incorrect ReservationId.compareTo() implementation | Minor | reservation system | Oleg Danilov | Oleg Danilov | +| [HADOOP-14820](https://issues.apache.org/jira/browse/HADOOP-14820) | Wasb mkdirs security checks inconsistent with HDFS | Major | fs/azure | Sivaguru Sankaridurg | Sivaguru Sankaridurg | +| [YARN-7164](https://issues.apache.org/jira/browse/YARN-7164) | TestAMRMClientOnRMRestart fails sporadically with bind address in use | Major | test | Jason Lowe | Jason Lowe | +| [YARN-6992](https://issues.apache.org/jira/browse/YARN-6992) | Kill application button is visible even if the application is FINISHED in RM UI | Major | . | Sumana Sathish | Suma Shivaprasad | +| [HDFS-12357](https://issues.apache.org/jira/browse/HDFS-12357) | Let NameNode to bypass external attribute provider for special user | Major | . | Yongjun Zhang | Yongjun Zhang | +| [HDFS-12369](https://issues.apache.org/jira/browse/HDFS-12369) | Edit log corruption due to hard lease recovery of not-closed file which has snapshots | Major | namenode | Xiao Chen | Xiao Chen | +| [HADOOP-14842](https://issues.apache.org/jira/browse/HADOOP-14842) | Hadoop 2.8.2 release build process get stuck due to java issue | Blocker | build | Junping Du | Junping Du | +| [YARN-6219](https://issues.apache.org/jira/browse/YARN-6219) | NM web server related UT fails with "NMWebapps failed to start." | Major | . | Yesha Vora | Jason Lowe | +| [HDFS-12407](https://issues.apache.org/jira/browse/HDFS-12407) | Journal nodes fails to shutdown cleanly if JournalNodeHttpServer or JournalNodeRpcServer fails to start | Major | . | Ajay Kumar | Ajay Kumar | +| [YARN-7185](https://issues.apache.org/jira/browse/YARN-7185) | ContainerScheduler should only look at availableResource for GUARANTEED containers when OPPORTUNISTIC container queuing is enabled. | Blocker | yarn | Sumana Sathish | Tan, Wangda | +| [HADOOP-14867](https://issues.apache.org/jira/browse/HADOOP-14867) | Update HDFS Federation setup document, for incorrect property name for secondary name node http address | Major | . | Bharat Viswanadham | Bharat Viswanadham | +| [YARN-4727](https://issues.apache.org/jira/browse/YARN-4727) | Unable to override the $HADOOP\_CONF\_DIR env variable for container | Major | nodemanager | Terence Yim | Jason Lowe | +| [MAPREDUCE-6957](https://issues.apache.org/jira/browse/MAPREDUCE-6957) | shuffle hangs after a node manager connection timeout | Major | mrv2 | Jooseong Kim | Jooseong Kim | +| [YARN-7163](https://issues.apache.org/jira/browse/YARN-7163) | RMContext need not to be injected to webapp and other Always Running services. | Blocker | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-12424](https://issues.apache.org/jira/browse/HDFS-12424) | Datatable sorting on the Datanode Information page in the Namenode UI is broken | Major | . | Shawna Martell | Shawna Martell | +| [HDFS-12323](https://issues.apache.org/jira/browse/HDFS-12323) | NameNode terminates after full GC thinking QJM unresponsive if full GC is much longer than timeout | Major | namenode, qjm | Erik Krogen | Erik Krogen | +| [YARN-7149](https://issues.apache.org/jira/browse/YARN-7149) | Cross-queue preemption sometimes starves an underserved queue | Major | capacity scheduler | Eric Payne | Eric Payne | +| [YARN-7192](https://issues.apache.org/jira/browse/YARN-7192) | Add a pluggable StateMachine Listener that is notified of NM Container State changes | Major | . | Arun Suresh | Arun Suresh | +| [MAPREDUCE-6960](https://issues.apache.org/jira/browse/MAPREDUCE-6960) | Shuffle Handler prints disk error stack traces for every read failure. | Major | . | Kuhu Shukla | Kuhu Shukla | +| [HDFS-11799](https://issues.apache.org/jira/browse/HDFS-11799) | Introduce a config to allow setting up write pipeline with fewer nodes than replication factor | Major | . | Yongjun Zhang | Brahma Reddy Battula | +| [YARN-7196](https://issues.apache.org/jira/browse/YARN-7196) | Fix finicky TestContainerManager tests | Major | . | Arun Suresh | Arun Suresh | +| [YARN-6771](https://issues.apache.org/jira/browse/YARN-6771) | Use classloader inside configuration class to make new classes | Major | . | Jongyoul Lee | Jongyoul Lee | +| [HDFS-12526](https://issues.apache.org/jira/browse/HDFS-12526) | FSDirectory should use Time.monotonicNow for durations | Minor | . | Chetna Chaudhari | Bharat Viswanadham | +| [YARN-6968](https://issues.apache.org/jira/browse/YARN-6968) | Hardcoded absolute pathname in DockerLinuxContainerRuntime | Major | nodemanager | Miklos Szegedi | Eric Badger | +| [HDFS-12371](https://issues.apache.org/jira/browse/HDFS-12371) | "BlockVerificationFailures" and "BlocksVerified" show up as 0 in Datanode JMX | Major | metrics | Sai Nukavarapu | Hanisha Koneru | +| [MAPREDUCE-6964](https://issues.apache.org/jira/browse/MAPREDUCE-6964) | BaileyBorweinPlouffe should use Time.monotonicNow for measuring durations | Minor | examples | Chetna Chaudhari | Chetna Chaudhari | +| [YARN-5195](https://issues.apache.org/jira/browse/YARN-5195) | RM intermittently crashed with NPE while handling APP\_ATTEMPT\_REMOVED event when async-scheduling enabled in CapacityScheduler | Major | resourcemanager | Karam Singh | sandflee | +| [YARN-7034](https://issues.apache.org/jira/browse/YARN-7034) | DefaultLinuxContainerRuntime and DockerLinuxContainerRuntime sends client environment variables to container-executor | Blocker | nodemanager | Miklos Szegedi | Miklos Szegedi | +| [MAPREDUCE-6966](https://issues.apache.org/jira/browse/MAPREDUCE-6966) | DistSum should use Time.monotonicNow for measuring durations | Minor | . | Chetna Chaudhari | Chetna Chaudhari | +| [MAPREDUCE-6965](https://issues.apache.org/jira/browse/MAPREDUCE-6965) | QuasiMonteCarlo should use Time.monotonicNow for measuring durations | Minor | examples | Chetna Chaudhari | Chetna Chaudhari | +| [MAPREDUCE-6967](https://issues.apache.org/jira/browse/MAPREDUCE-6967) | gridmix/SleepReducer should use Time.monotonicNow for measuring durations | Minor | . | Chetna Chaudhari | Chetna Chaudhari | +| [HDFS-12531](https://issues.apache.org/jira/browse/HDFS-12531) | Fix conflict in the javadoc of UnderReplicatedBlocks.java in branch-2 | Minor | documentation | Akira Ajisaka | Bharat Viswanadham | +| [YARN-7153](https://issues.apache.org/jira/browse/YARN-7153) | Remove duplicated code in AMRMClientAsyncImpl.java | Minor | client | Sen Zhao | Sen Zhao | +| [HADOOP-14901](https://issues.apache.org/jira/browse/HADOOP-14901) | ReuseObjectMapper in Hadoop Common | Minor | . | Hanisha Koneru | Hanisha Koneru | +| [YARN-7118](https://issues.apache.org/jira/browse/YARN-7118) | AHS REST API can return NullPointerException | Major | . | Prabhu Joseph | Billie Rinaldi | +| [HDFS-12495](https://issues.apache.org/jira/browse/HDFS-12495) | TestPendingInvalidateBlock#testPendingDeleteUnknownBlocks fails intermittently | Major | . | Eric Badger | Eric Badger | +| [HADOOP-14891](https://issues.apache.org/jira/browse/HADOOP-14891) | Remove references to Guava Objects.toStringHelper | Major | . | Jonathan Eagles | Jonathan Eagles | +| [HADOOP-14822](https://issues.apache.org/jira/browse/HADOOP-14822) | hadoop-project/pom.xml is executable | Minor | . | Akira Ajisaka | Ajay Kumar | +| [YARN-7157](https://issues.apache.org/jira/browse/YARN-7157) | Add admin configuration to filter per-user's apps in secure cluster | Major | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-7257](https://issues.apache.org/jira/browse/YARN-7257) | AggregatedLogsBlock reports a bad 'end' value as a bad 'start' value | Major | log-aggregation | Jason Lowe | Jason Lowe | +| [YARN-7248](https://issues.apache.org/jira/browse/YARN-7248) | NM returns new SCHEDULED container status to older clients | Blocker | nodemanager | Jason Lowe | Arun Suresh | +| [HADOOP-14902](https://issues.apache.org/jira/browse/HADOOP-14902) | LoadGenerator#genFile write close timing is incorrectly calculated | Major | fs | Jason Lowe | Hanisha Koneru | +| [YARN-7260](https://issues.apache.org/jira/browse/YARN-7260) | yarn.router.pipeline.cache-max-size is missing in yarn-default.xml | Major | . | Rohith Sharma K S | Jason Lowe | +| [YARN-7084](https://issues.apache.org/jira/browse/YARN-7084) | TestSchedulingMonitor#testRMStarts fails sporadically | Major | . | Jason Lowe | Jason Lowe | +| [YARN-6625](https://issues.apache.org/jira/browse/YARN-6625) | yarn application -list returns a tracking URL for AM that doesn't work in secured and HA environment | Major | amrmproxy | Yufei Gu | Yufei Gu | +| [YARN-7044](https://issues.apache.org/jira/browse/YARN-7044) | TestContainerAllocation#testAMContainerAllocationWhenDNSUnavailable fails | Major | capacity scheduler, test | Wangda Tan | Akira Ajisaka | +| [YARN-7226](https://issues.apache.org/jira/browse/YARN-7226) | Whitelisted variables do not support delayed variable expansion | Major | nodemanager | Jason Lowe | Jason Lowe | +| [YARN-7258](https://issues.apache.org/jira/browse/YARN-7258) | Add Node and Rack Hints to Opportunistic Scheduler | Major | . | Arun Suresh | kartheek muthyala | +| [YARN-7285](https://issues.apache.org/jira/browse/YARN-7285) | ContainerExecutor always launches with priorities due to yarn-default property | Minor | nodemanager | Jason Lowe | Jason Lowe | +| [YARN-7245](https://issues.apache.org/jira/browse/YARN-7245) | Max AM Resource column in Active Users Info section of Capacity Scheduler UI page should be updated per-user | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | +| [YARN-7309](https://issues.apache.org/jira/browse/YARN-7309) | TestClientRMService#testUpdateApplicationPriorityRequest and TestClientRMService#testUpdatePriorityAndKillAppWithZeroClusterResource test functionality not supported by FairScheduler | Major | test | Robert Kanter | Robert Kanter | +| [HADOOP-14912](https://issues.apache.org/jira/browse/HADOOP-14912) | FairCallQueue may defer servicing calls | Major | ipc | Daryn Sharp | Daryn Sharp | +| [MAPREDUCE-6951](https://issues.apache.org/jira/browse/MAPREDUCE-6951) | Improve exception message when mapreduce.jobhistory.webapp.address is in wrong format | Major | applicationmaster | Prabhu Joseph | Prabhu Joseph | +| [HADOOP-13556](https://issues.apache.org/jira/browse/HADOOP-13556) | Change Configuration.getPropsWithPrefix to use getProps instead of iterator | Major | . | Larry McCay | Larry McCay | +| [HADOOP-13102](https://issues.apache.org/jira/browse/HADOOP-13102) | Update GroupsMapping documentation to reflect the new changes | Major | documentation | Anu Engineer | Esther Kundin | +| [YARN-7270](https://issues.apache.org/jira/browse/YARN-7270) | Fix unsafe casting from long to int for class Resource and its sub-classes | Major | resourcemanager | Yufei Gu | Yufei Gu | +| [YARN-7269](https://issues.apache.org/jira/browse/YARN-7269) | Tracking URL in the app state does not get redirected to ApplicationMaster for Running applications | Critical | . | Sumana Sathish | Tan, Wangda | +| [YARN-7321](https://issues.apache.org/jira/browse/YARN-7321) | Backport container-executor changes from YARN-6852 to branch-2 | Major | . | Varun Vasudev | Varun Vasudev | +| [HDFS-12659](https://issues.apache.org/jira/browse/HDFS-12659) | Update TestDeadDatanode#testNonDFSUsedONDeadNodeReReg to increase heartbeat recheck interval | Minor | . | Ajay Kumar | Ajay Kumar | +| [HDFS-12485](https://issues.apache.org/jira/browse/HDFS-12485) | expunge may fail to remove trash from encryption zone | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-14949](https://issues.apache.org/jira/browse/HADOOP-14949) | TestKMS#testACLs fails intermittently | Major | kms, test | Xiao Chen | Xiao Chen | +| [YARN-7124](https://issues.apache.org/jira/browse/YARN-7124) | LogAggregationTFileController deletes/renames while file is open | Critical | nodemanager | Daryn Sharp | Jason Lowe | +| [YARN-7333](https://issues.apache.org/jira/browse/YARN-7333) | container-executor fails to remove entries from a directory that is not writable or executable | Critical | . | Jason Lowe | Jason Lowe | +| [YARN-7341](https://issues.apache.org/jira/browse/YARN-7341) | TestRouterWebServiceUtil#testMergeMetrics is flakey | Major | federation | Robert Kanter | Robert Kanter | +| [HADOOP-14958](https://issues.apache.org/jira/browse/HADOOP-14958) | CLONE - Fix source-level compatibility after HADOOP-11252 | Blocker | . | Junping Du | Junping Du | +| [YARN-7294](https://issues.apache.org/jira/browse/YARN-7294) | TestSignalContainer#testSignalRequestDeliveryToNM fails intermittently with Fair scheduler | Major | . | Miklos Szegedi | Miklos Szegedi | +| [YARN-7170](https://issues.apache.org/jira/browse/YARN-7170) | Improve bower dependencies for YARN UI v2 | Critical | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-7355](https://issues.apache.org/jira/browse/YARN-7355) | TestDistributedShell should be scheduler agnostic | Major | . | Haibo Chen | Haibo Chen | +| [HADOOP-14942](https://issues.apache.org/jira/browse/HADOOP-14942) | DistCp#cleanup() should check whether jobFS is null | Minor | . | Ted Yu | Andras Bokor | +| [HDFS-12683](https://issues.apache.org/jira/browse/HDFS-12683) | DFSZKFailOverController re-order logic for logging Exception | Major | . | Bharat Viswanadham | Bharat Viswanadham | +| [HADOOP-14966](https://issues.apache.org/jira/browse/HADOOP-14966) | Handle JDK-8071638 for hadoop-common | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [HDFS-12695](https://issues.apache.org/jira/browse/HDFS-12695) | Add a link to HDFS router federation document in site.xml | Minor | documentation | Yiqun Lin | Yiqun Lin | +| [YARN-7382](https://issues.apache.org/jira/browse/YARN-7382) | NoSuchElementException in FairScheduler after failover causes RM crash | Blocker | fairscheduler | Robert Kanter | Robert Kanter | +| [YARN-7385](https://issues.apache.org/jira/browse/YARN-7385) | TestFairScheduler#testUpdateDemand and TestFSLeafQueue#testUpdateDemand are failing with NPE | Major | test | Robert Kanter | Yufei Gu | +| [YARN-7339](https://issues.apache.org/jira/browse/YARN-7339) | LocalityMulticastAMRMProxyPolicy should handle cancel request properly | Minor | . | Botong Huang | Botong Huang | +| [HDFS-12710](https://issues.apache.org/jira/browse/HDFS-12710) | HTTPFS HTTP max header size env variable is not respected in branch-2 | Minor | . | Istvan Fajth | Istvan Fajth | +| [HDFS-9914](https://issues.apache.org/jira/browse/HDFS-9914) | Fix configurable WebhDFS connect/read timeout | Blocker | hdfs-client, webhdfs | Xiaoyu Yao | Xiaoyu Yao | +| [YARN-7375](https://issues.apache.org/jira/browse/YARN-7375) | Possible NPE in RMWebapp when HA is enabled and the active RM fails | Major | . | Chandni Singh | Chandni Singh | +| [HADOOP-14986](https://issues.apache.org/jira/browse/HADOOP-14986) | Enforce JDK limitations | Major | build | Chris Douglas | Chris Douglas | +| [HADOOP-14991](https://issues.apache.org/jira/browse/HADOOP-14991) | Add missing figures to Resource Estimator tool | Major | . | Subru Krishnan | Rui Li | +| [YARN-7299](https://issues.apache.org/jira/browse/YARN-7299) | Fix TestDistributedScheduler | Major | . | Jason Lowe | Arun Suresh | +| [YARN-6747](https://issues.apache.org/jira/browse/YARN-6747) | TestFSAppStarvation.testPreemptionEnable fails intermittently | Major | . | Sunil Govindan | Miklos Szegedi | +| [YARN-7244](https://issues.apache.org/jira/browse/YARN-7244) | ShuffleHandler is not aware of disks that are added | Major | . | Kuhu Shukla | Kuhu Shukla | +| [HADOOP-14990](https://issues.apache.org/jira/browse/HADOOP-14990) | Clean up jdiff xml files added for 2.8.2 release | Blocker | . | Subru Krishnan | Junping Du | +| [HADOOP-14919](https://issues.apache.org/jira/browse/HADOOP-14919) | BZip2 drops records when reading data in splits | Critical | . | Aki Tanaka | Jason Lowe | +| [HDFS-12699](https://issues.apache.org/jira/browse/HDFS-12699) | TestMountTable fails with Java 7 | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-7146](https://issues.apache.org/jira/browse/YARN-7146) | Many RM unit tests failing with FairScheduler | Major | test | Robert Kanter | Robert Kanter | +| [MAPREDUCE-6999](https://issues.apache.org/jira/browse/MAPREDUCE-6999) | Fix typo "onf" in DynamicInputChunk.java | Trivial | . | fang zhenyi | fang zhenyi | +| [YARN-7364](https://issues.apache.org/jira/browse/YARN-7364) | Queue dash board in new YARN UI has incorrect values | Critical | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-7370](https://issues.apache.org/jira/browse/YARN-7370) | Preemption properties should be refreshable | Major | capacity scheduler, scheduler preemption | Eric Payne | Gergely Novák | +| [YARN-7400](https://issues.apache.org/jira/browse/YARN-7400) | incorrect log preview displayed in jobhistory server ui | Major | yarn | Santhosh B Gowda | Xuan Gong | +| [HADOOP-15013](https://issues.apache.org/jira/browse/HADOOP-15013) | Fix ResourceEstimator findbugs issues | Blocker | . | Allen Wittenauer | Arun Suresh | +| [YARN-7432](https://issues.apache.org/jira/browse/YARN-7432) | Fix DominantResourceFairnessPolicy serializable findbugs issues | Blocker | . | Allen Wittenauer | Daniel Templeton | +| [YARN-7434](https://issues.apache.org/jira/browse/YARN-7434) | Router getApps REST invocation fails with multiple RMs | Critical | . | Subru Krishnan | Íñigo Goiri | +| [YARN-4742](https://issues.apache.org/jira/browse/YARN-4742) | [Umbrella] Enhancements to Distributed Scheduling | Major | . | Arun Suresh | Arun Suresh | +| [YARN-5085](https://issues.apache.org/jira/browse/YARN-5085) | Add support for change of container ExecutionType | Major | . | Arun Suresh | Arun Suresh | +| [HDFS-12783](https://issues.apache.org/jira/browse/HDFS-12783) | [branch-2] "dfsrouter" should use hdfsScript | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [YARN-7453](https://issues.apache.org/jira/browse/YARN-7453) | Fix issue where RM fails to switch to active after first successful start | Blocker | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | +| [HADOOP-15025](https://issues.apache.org/jira/browse/HADOOP-15025) | Ensure singleton for ResourceEstimatorService | Major | . | Subru Krishnan | Rui Li | +| [HADOOP-15026](https://issues.apache.org/jira/browse/HADOOP-15026) | Rebase ResourceEstimator start/stop scripts for branch-2 | Major | . | Subru Krishnan | Rui Li | +| [YARN-6102](https://issues.apache.org/jira/browse/YARN-6102) | RMActiveService context to be updated with new RMContext on failover | Critical | . | Ajith S | Rohith Sharma K S | +| [HADOOP-15030](https://issues.apache.org/jira/browse/HADOOP-15030) | [branch-2] Include hadoop-cloud-storage-project in the main hadoop pom modules | Critical | . | Subru Krishnan | Subru Krishnan | +| [YARN-7475](https://issues.apache.org/jira/browse/YARN-7475) | Fix Container log link in new YARN UI | Major | . | Sunil Govindan | Sunil Govindan | +| [YARN-7476](https://issues.apache.org/jira/browse/YARN-7476) | Fix miscellaneous issues in ATSv2 after merge to branch-2 | Major | . | Varun Saxena | Varun Saxena | +| [HADOOP-15036](https://issues.apache.org/jira/browse/HADOOP-15036) | Update LICENSE.txt for HADOOP-14840 | Major | . | Arun Suresh | Arun Suresh | +| [HDFS-13054](https://issues.apache.org/jira/browse/HDFS-13054) | Handling PathIsNotEmptyDirectoryException in DFSClient delete call | Major | hdfs-client | Nanda kumar | Nanda kumar | ### TESTS: | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-9153](https://issues.apache.org/jira/browse/HDFS-9153) | Pretty-format the output for DFSIO | Major | . | Kai Zheng | Kai Zheng | | [HDFS-9300](https://issues.apache.org/jira/browse/HDFS-9300) | TestDirectoryScanner.testThrottle() is still a little flakey | Major | balancer & mover, test | Daniel Templeton | Daniel Templeton | | [YARN-4704](https://issues.apache.org/jira/browse/YARN-4704) | TestResourceManager#testResourceAllocation() fails when using FairScheduler | Major | fairscheduler, test | Ray Chiang | Yufei Gu | | [HADOOP-12701](https://issues.apache.org/jira/browse/HADOOP-12701) | Run checkstyle on test source files | Minor | . | John Zhuge | John Zhuge | @@ -1127,57 +1330,157 @@ | [YARN-5548](https://issues.apache.org/jira/browse/YARN-5548) | Use MockRMMemoryStateStore to reduce test failures | Major | . | Bibin A Chundatt | Bibin A Chundatt | | [HADOOP-14245](https://issues.apache.org/jira/browse/HADOOP-14245) | Use Mockito.when instead of Mockito.stub | Minor | test | Akira Ajisaka | Andras Bokor | | [YARN-5349](https://issues.apache.org/jira/browse/YARN-5349) | TestWorkPreservingRMRestart#testUAMRecoveryOnRMWorkPreservingRestart fail intermittently | Minor | . | sandflee | Jason Lowe | -| [YARN-6240](https://issues.apache.org/jira/browse/YARN-6240) | TestCapacityScheduler.testRefreshQueuesWithQueueDelete fails randomly | Major | test | Sunil G | Naganarasimha G R | +| [YARN-6240](https://issues.apache.org/jira/browse/YARN-6240) | TestCapacityScheduler.testRefreshQueuesWithQueueDelete fails randomly | Major | test | Sunil Govindan | Naganarasimha G R | | [HDFS-11988](https://issues.apache.org/jira/browse/HDFS-11988) | Verify HDFS Snapshots with open files captured are safe across truncates and appends on current version file | Major | hdfs, snapshots | Manoj Govindassamy | Manoj Govindassamy | +| [MAPREDUCE-6953](https://issues.apache.org/jira/browse/MAPREDUCE-6953) | Skip the testcase testJobWithChangePriority if FairScheduler is used | Major | client | Peter Bacsko | Peter Bacsko | ### SUB-TASKS: | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-3063](https://issues.apache.org/jira/browse/YARN-3063) | Bootstrap TimelineServer Next Gen Module | Major | timelineserver | Zhijie Shen | Zhijie Shen | +| [YARN-3030](https://issues.apache.org/jira/browse/YARN-3030) | [Collector wireup] Set up timeline collector with basic request serving structure and lifecycle | Major | timelineserver | Sangjin Lee | Sangjin Lee | +| [YARN-3041](https://issues.apache.org/jira/browse/YARN-3041) | [Data Model] create overall data objects of TS next gen | Major | timelineserver | Sangjin Lee | Zhijie Shen | +| [YARN-3240](https://issues.apache.org/jira/browse/YARN-3240) | [Data Mode] Implement client API to put generic entities | Major | timelineserver | Zhijie Shen | Zhijie Shen | +| [YARN-3087](https://issues.apache.org/jira/browse/YARN-3087) | [Collector implementation] the REST server (web server) for per-node collector does not work if it runs inside node manager | Major | timelineserver | Sangjin Lee | Li Lu | +| [YARN-3125](https://issues.apache.org/jira/browse/YARN-3125) | [Event producers] Change distributed shell to use new timeline service | Major | timelineserver | Zhijie Shen | Junping Du | +| [YARN-3210](https://issues.apache.org/jira/browse/YARN-3210) | [Source organization] Refactor timeline collector according to new code organization | Major | timelineserver | Li Lu | Li Lu | +| [YARN-3264](https://issues.apache.org/jira/browse/YARN-3264) | [Storage implementation] Create backing storage write interface and a POC only file based storage implementation | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-3039](https://issues.apache.org/jira/browse/YARN-3039) | [Collector wireup] Implement timeline app-level collector service discovery | Major | timelineserver | Sangjin Lee | Junping Du | +| [YARN-3333](https://issues.apache.org/jira/browse/YARN-3333) | rename TimelineAggregator etc. to TimelineCollector | Major | timelineserver | Sangjin Lee | Sangjin Lee | +| [YARN-3377](https://issues.apache.org/jira/browse/YARN-3377) | TestTimelineServiceClientIntegration fails | Minor | timelineserver | Sangjin Lee | Sangjin Lee | +| [YARN-3034](https://issues.apache.org/jira/browse/YARN-3034) | [Collector wireup] Implement RM starting its timeline collector | Major | timelineserver | Sangjin Lee | Naganarasimha G R | +| [YARN-3040](https://issues.apache.org/jira/browse/YARN-3040) | [Data Model] Make putEntities operation be aware of the app's context | Major | timelineserver | Sangjin Lee | Zhijie Shen | +| [YARN-3374](https://issues.apache.org/jira/browse/YARN-3374) | Collector's web server should randomly bind an available port | Major | timelineserver | Zhijie Shen | Zhijie Shen | +| [YARN-3334](https://issues.apache.org/jira/browse/YARN-3334) | [Event Producers] NM TimelineClient container metrics posting to new timeline service. | Major | nodemanager | Junping Du | Junping Du | +| [YARN-3391](https://issues.apache.org/jira/browse/YARN-3391) | Clearly define flow ID/ flow run / flow version in API and storage | Major | timelineserver | Zhijie Shen | Zhijie Shen | +| [MAPREDUCE-6327](https://issues.apache.org/jira/browse/MAPREDUCE-6327) | [Event producers] Implement MapReduce AM writing MR events/counters to v2 ATS | Major | . | Sangjin Lee | Junping Du | +| [YARN-3390](https://issues.apache.org/jira/browse/YARN-3390) | Reuse TimelineCollectorManager for RM | Major | timelineserver | Zhijie Shen | Zhijie Shen | +| [YARN-3431](https://issues.apache.org/jira/browse/YARN-3431) | Sub resources of timeline entity needs to be passed to a separate endpoint. | Major | timelineserver | Zhijie Shen | Zhijie Shen | +| [MAPREDUCE-6335](https://issues.apache.org/jira/browse/MAPREDUCE-6335) | convert load test driver to timeline service v.2 | Major | . | Sangjin Lee | Sangjin Lee | +| [YARN-3551](https://issues.apache.org/jira/browse/YARN-3551) | Consolidate data model change according to the backend implementation | Major | timelineserver | Zhijie Shen | Zhijie Shen | +| [YARN-3562](https://issues.apache.org/jira/browse/YARN-3562) | unit tests failures and issues found from findbug from earlier ATS checkins | Minor | timelineserver | Sangjin Lee | Naganarasimha G R | +| [YARN-3134](https://issues.apache.org/jira/browse/YARN-3134) | [Storage implementation] Exploiting the option of using Phoenix to access HBase backend | Major | timelineserver | Zhijie Shen | Li Lu | +| [YARN-3529](https://issues.apache.org/jira/browse/YARN-3529) | Add miniHBase cluster and Phoenix support to ATS v2 unit tests | Major | timelineserver | Li Lu | Li Lu | +| [YARN-3634](https://issues.apache.org/jira/browse/YARN-3634) | TestMRTimelineEventHandling and TestApplication are broken | Major | timelineserver | Sangjin Lee | Sangjin Lee | +| [MAPREDUCE-6337](https://issues.apache.org/jira/browse/MAPREDUCE-6337) | add a mode to replay MR job history files to the timeline service | Major | . | Sangjin Lee | Sangjin Lee | +| [YARN-3411](https://issues.apache.org/jira/browse/YARN-3411) | [Storage implementation] explore & create the native HBase schema for writes | Critical | timelineserver | Sangjin Lee | Vrushali C | +| [MAPREDUCE-6370](https://issues.apache.org/jira/browse/MAPREDUCE-6370) | Timeline service v2 load generator needs to write event id | Major | . | Li Lu | Li Lu | +| [YARN-3726](https://issues.apache.org/jira/browse/YARN-3726) | Fix TestHBaseTimelineWriterImpl unit test failure by fixing it's test data | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-3721](https://issues.apache.org/jira/browse/YARN-3721) | build is broken on YARN-2928 branch due to possible dependency cycle | Blocker | timelineserver | Sangjin Lee | Li Lu | +| [YARN-3276](https://issues.apache.org/jira/browse/YARN-3276) | Refactor and fix null casting in some map cast for TimelineEntity (old and new) and fix findbug warnings | Major | timelineserver | Junping Du | Junping Du | +| [YARN-3044](https://issues.apache.org/jira/browse/YARN-3044) | [Event producers] Implement RM writing app lifecycle events to ATS | Major | timelineserver | Sangjin Lee | Naganarasimha G R | +| [YARN-3801](https://issues.apache.org/jira/browse/YARN-3801) | [JDK-8][YARN-2928] Exclude jdk.tools from hbase-client and hbase-testing-util | Major | timelineserver | Tsuyoshi Ozawa | Tsuyoshi Ozawa | +| [YARN-3706](https://issues.apache.org/jira/browse/YARN-3706) | Generalize native HBase writer for additional tables | Minor | timelineserver | Joep Rottinghuis | Joep Rottinghuis | +| [YARN-3792](https://issues.apache.org/jira/browse/YARN-3792) | Test case failures in TestDistributedShell and some issue fixes related to ATSV2 | Major | timelineserver | Naganarasimha G R | Naganarasimha G R | +| [YARN-3051](https://issues.apache.org/jira/browse/YARN-3051) | [Storage abstraction] Create backing storage read interface for ATS readers | Major | timelineserver | Sangjin Lee | Varun Saxena | +| [YARN-3047](https://issues.apache.org/jira/browse/YARN-3047) | [Data Serving] Set up ATS reader with basic request serving structure and lifecycle | Major | timelineserver | Sangjin Lee | Varun Saxena | +| [YARN-3836](https://issues.apache.org/jira/browse/YARN-3836) | add equals and hashCode to TimelineEntity and other classes in the data model | Major | timelineserver | Sangjin Lee | Li Lu | +| [YARN-3445](https://issues.apache.org/jira/browse/YARN-3445) | Cache runningApps in RMNode for getting running apps on given NodeId | Major | nodemanager, resourcemanager | Junping Du | Junping Du | +| [YARN-3116](https://issues.apache.org/jira/browse/YARN-3116) | [Collector wireup] We need an assured way to determine if a container is an AM container on NM | Major | nodemanager, timelineserver | Zhijie Shen | Giovanni Matteo Fumarola | +| [YARN-3949](https://issues.apache.org/jira/browse/YARN-3949) | ensure timely flush of timeline writes | Major | timelineserver | Sangjin Lee | Sangjin Lee | +| [YARN-3908](https://issues.apache.org/jira/browse/YARN-3908) | Bugs in HBaseTimelineWriterImpl | Major | timelineserver | Zhijie Shen | Vrushali C | +| [YARN-3993](https://issues.apache.org/jira/browse/YARN-3993) | Change to use the AM flag in ContainerContext determine AM container | Major | timelineserver | Zhijie Shen | Sunil Govindan | +| [YARN-3984](https://issues.apache.org/jira/browse/YARN-3984) | Rethink event column key issue | Major | timelineserver | Zhijie Shen | Vrushali C | +| [YARN-3049](https://issues.apache.org/jira/browse/YARN-3049) | [Storage Implementation] Implement storage reader interface to fetch raw data from HBase backend | Major | timelineserver | Sangjin Lee | Zhijie Shen | +| [YARN-3906](https://issues.apache.org/jira/browse/YARN-3906) | split the application table from the entity table | Major | timelineserver | Sangjin Lee | Sangjin Lee | +| [YARN-3904](https://issues.apache.org/jira/browse/YARN-3904) | Refactor timelineservice.storage to add support to online and offline aggregation writers | Major | timelineserver | Li Lu | Li Lu | +| [YARN-3045](https://issues.apache.org/jira/browse/YARN-3045) | [Event producers] Implement NM writing container lifecycle events to ATS | Major | timelineserver | Sangjin Lee | Naganarasimha G R | +| [YARN-4025](https://issues.apache.org/jira/browse/YARN-4025) | Deal with byte representations of Longs in writer code | Major | timelineserver | Vrushali C | Sangjin Lee | +| [YARN-4064](https://issues.apache.org/jira/browse/YARN-4064) | build is broken at TestHBaseTimelineWriterImpl.java | Blocker | timelineserver | Sangjin Lee | Sangjin Lee | +| [YARN-3814](https://issues.apache.org/jira/browse/YARN-3814) | REST API implementation for getting raw entities in TimelineReader | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-4058](https://issues.apache.org/jira/browse/YARN-4058) | Miscellaneous issues in NodeManager project | Minor | timelineserver | Naganarasimha G R | Naganarasimha G R | +| [YARN-2884](https://issues.apache.org/jira/browse/YARN-2884) | Proxying all AM-RM communications | Major | nodemanager, resourcemanager | Carlo Curino | Kishore Chaliparambil | +| [YARN-4102](https://issues.apache.org/jira/browse/YARN-4102) | Add a "skip existing table" mode for timeline schema creator | Major | timelineserver | Li Lu | Li Lu | +| [YARN-3901](https://issues.apache.org/jira/browse/YARN-3901) | Populate flow run data in the flow\_run & flow activity tables | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-4074](https://issues.apache.org/jira/browse/YARN-4074) | [timeline reader] implement support for querying for flows and flow runs | Major | timelineserver | Sangjin Lee | Sangjin Lee | +| [YARN-4075](https://issues.apache.org/jira/browse/YARN-4075) | [reader REST API] implement support for querying for flows and flow runs | Major | timelineserver | Sangjin Lee | Varun Saxena | +| [YARN-4203](https://issues.apache.org/jira/browse/YARN-4203) | Add request/response logging & timing for each REST endpoint call | Minor | timelineserver | Vrushali C | Varun Saxena | +| [YARN-4210](https://issues.apache.org/jira/browse/YARN-4210) | HBase reader throws NPE if Get returns no rows | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-3864](https://issues.apache.org/jira/browse/YARN-3864) | Implement support for querying single app and all apps for a flow run | Blocker | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-4178](https://issues.apache.org/jira/browse/YARN-4178) | [storage implementation] app id as string in row keys can cause incorrect ordering | Major | timelineserver | Sangjin Lee | Varun Saxena | +| [YARN-4237](https://issues.apache.org/jira/browse/YARN-4237) | Support additional queries for ATSv2 Web UI | Major | . | Varun Saxena | Varun Saxena | +| [YARN-4179](https://issues.apache.org/jira/browse/YARN-4179) | [reader implementation] support flow activity queries based on time | Minor | timelineserver | Sangjin Lee | Varun Saxena | +| [YARN-4129](https://issues.apache.org/jira/browse/YARN-4129) | Refactor the SystemMetricPublisher in RM to better support newer events | Major | resourcemanager | Naganarasimha G R | Naganarasimha G R | +| [YARN-4221](https://issues.apache.org/jira/browse/YARN-4221) | Store user in app to flow table | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-4053](https://issues.apache.org/jira/browse/YARN-4053) | Change the way metric values are stored in HBase Storage | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-4297](https://issues.apache.org/jira/browse/YARN-4297) | TestJobHistoryEventHandler and TestRMContainerAllocator failing on YARN-2928 branch | Major | . | Varun Saxena | Varun Saxena | +| [YARN-3862](https://issues.apache.org/jira/browse/YARN-3862) | Support for fetching specific configs and metrics based on prefixes | Major | timelineserver | Varun Saxena | Varun Saxena | | [HDFS-9129](https://issues.apache.org/jira/browse/HDFS-9129) | Move the safemode block count into BlockManager | Major | namenode | Haohui Mai | Mingliang Liu | | [HDFS-9414](https://issues.apache.org/jira/browse/HDFS-9414) | Refactor reconfiguration of ClientDatanodeProtocol for reusability | Major | . | Xiaobing Zhou | Xiaobing Zhou | +| [YARN-4356](https://issues.apache.org/jira/browse/YARN-4356) | ensure the timeline service v.2 is disabled cleanly and has no impact when it's turned off | Critical | timelineserver | Sangjin Lee | Sangjin Lee | +| [YARN-4450](https://issues.apache.org/jira/browse/YARN-4450) | TestTimelineAuthenticationFilter and TestYarnConfigurationFields fail | Major | timelineserver | Sangjin Lee | Sangjin Lee | | [HDFS-9371](https://issues.apache.org/jira/browse/HDFS-9371) | Code cleanup for DatanodeManager | Major | namenode | Jing Zhao | Jing Zhao | +| [YARN-4460](https://issues.apache.org/jira/browse/YARN-4460) | [Bug fix] RM fails to start when SMP is enabled | Major | timelineserver | Li Lu | Li Lu | +| [YARN-4445](https://issues.apache.org/jira/browse/YARN-4445) | Unify the term flowId and flowName in timeline v2 codebase | Major | timelineserver | Li Lu | Zhan Zhang | | [YARN-1856](https://issues.apache.org/jira/browse/YARN-1856) | cgroups based memory monitoring for containers | Major | nodemanager | Karthik Kambatla | Varun Vasudev | +| [YARN-4350](https://issues.apache.org/jira/browse/YARN-4350) | TestDistributedShell fails for V2 scenarios | Major | timelineserver | Sangjin Lee | Naganarasimha G R | +| [YARN-3586](https://issues.apache.org/jira/browse/YARN-3586) | RM only get back addresses of Collectors that NM needs to know. | Critical | resourcemanager, timelineserver | Junping Du | Junping Du | | [YARN-2882](https://issues.apache.org/jira/browse/YARN-2882) | Add an OPPORTUNISTIC ExecutionType | Major | nodemanager | Konstantinos Karanasos | Konstantinos Karanasos | | [YARN-3480](https://issues.apache.org/jira/browse/YARN-3480) | Recovery may get very slow with lots of services with lots of app-attempts | Major | resourcemanager | Jun Gong | Jun Gong | | [HDFS-9498](https://issues.apache.org/jira/browse/HDFS-9498) | Move code that tracks blocks with future generation stamps to BlockManagerSafeMode | Major | namenode | Mingliang Liu | Mingliang Liu | | [YARN-4550](https://issues.apache.org/jira/browse/YARN-4550) | some tests in TestContainerLanch fails on non-english locale environment | Minor | nodemanager, test | Takashi Ohnishi | Takashi Ohnishi | | [YARN-4335](https://issues.apache.org/jira/browse/YARN-4335) | Allow ResourceRequests to specify ExecutionType of a request ask | Major | nodemanager, resourcemanager | Konstantinos Karanasos | Konstantinos Karanasos | +| [YARN-3995](https://issues.apache.org/jira/browse/YARN-3995) | Some of the NM events are not getting published due race condition when AM container finishes in NM | Major | nodemanager, timelineserver | Naganarasimha G R | Naganarasimha G R | +| [YARN-4200](https://issues.apache.org/jira/browse/YARN-4200) | Refactor reader classes in storage to nest under hbase specific package name | Minor | timelineserver | Vrushali C | Li Lu | | [YARN-4553](https://issues.apache.org/jira/browse/YARN-4553) | Add cgroups support for docker containers | Major | yarn | Sidharta Seethana | Sidharta Seethana | | [YARN-4526](https://issues.apache.org/jira/browse/YARN-4526) | Make SystemClock singleton so AppSchedulingInfo could use it | Major | scheduler | Karthik Kambatla | Karthik Kambatla | | [HDFS-9542](https://issues.apache.org/jira/browse/HDFS-9542) | Move BlockIdManager from FSNamesystem to BlockManager | Major | namenode | Jing Zhao | Jing Zhao | +| [YARN-4622](https://issues.apache.org/jira/browse/YARN-4622) | TestDistributedShell fails for v2 test cases after modifications for 1.5 | Major | timelineserver | Naganarasimha G R | Naganarasimha G R | | [YARN-4578](https://issues.apache.org/jira/browse/YARN-4578) | Directories that are mounted in docker containers need to be more restrictive/container-specific | Major | yarn | Sidharta Seethana | Sidharta Seethana | | [YARN-4574](https://issues.apache.org/jira/browse/YARN-4574) | TestAMRMClientOnRMRestart fails on trunk | Major | client, test | Takashi Ohnishi | Takashi Ohnishi | | [YARN-4613](https://issues.apache.org/jira/browse/YARN-4613) | TestClientRMService#testGetClusterNodes fails occasionally | Major | test | Jason Lowe | Takashi Ohnishi | | [HDFS-9094](https://issues.apache.org/jira/browse/HDFS-9094) | Add command line option to ask NameNode reload configuration. | Major | namenode | Xiaobing Zhou | Xiaobing Zhou | +| [YARN-4238](https://issues.apache.org/jira/browse/YARN-4238) | correctly set createdTime and remove modifiedTime when publishing entities | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-4224](https://issues.apache.org/jira/browse/YARN-4224) | Support fetching entities by UID and change the REST interface to conform to current REST APIs' in YARN | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-4644](https://issues.apache.org/jira/browse/YARN-4644) | TestRMRestart fails and findbugs issue in YARN-2928 branch | Major | timelineserver | Varun Saxena | Varun Saxena | | [YARN-4633](https://issues.apache.org/jira/browse/YARN-4633) | TestRMRestart.testRMRestartAfterPreemption fails intermittently in trunk | Major | test | Rohith Sharma K S | Bibin A Chundatt | -| [YARN-4615](https://issues.apache.org/jira/browse/YARN-4615) | TestAbstractYarnScheduler#testResourceRequestRecoveryToTheRightAppAttempt fails occasionally | Major | test | Jason Lowe | Sunil G | +| [YARN-4615](https://issues.apache.org/jira/browse/YARN-4615) | TestAbstractYarnScheduler#testResourceRequestRecoveryToTheRightAppAttempt fails occasionally | Major | test | Jason Lowe | Sunil Govindan | +| [YARN-4446](https://issues.apache.org/jira/browse/YARN-4446) | Refactor reader API for better extensibility | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-4409](https://issues.apache.org/jira/browse/YARN-4409) | Fix javadoc and checkstyle issues in timelineservice code | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-3367](https://issues.apache.org/jira/browse/YARN-3367) | Replace starting a separate thread for post entity with event loop in TimelineClient | Major | timelineserver | Junping Du | Naganarasimha G R | | [YARN-4684](https://issues.apache.org/jira/browse/YARN-4684) | TestYarnCLI#testGetContainers failing in CN locale | Major | yarn | Bibin A Chundatt | Bibin A Chundatt | -| [HDFS-9754](https://issues.apache.org/jira/browse/HDFS-9754) | Avoid unnecessary getBlockCollection calls in BlockManager | Major | namenode | Jing Zhao | Jing Zhao | | [HDFS-9691](https://issues.apache.org/jira/browse/HDFS-9691) | TestBlockManagerSafeMode#testCheckSafeMode fails intermittently | Major | test | Mingliang Liu | Mingliang Liu | | [HADOOP-12710](https://issues.apache.org/jira/browse/HADOOP-12710) | Remove dependency on commons-httpclient for TestHttpServerLogs | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-4700](https://issues.apache.org/jira/browse/YARN-4700) | ATS storage has one extra record each time the RM got restarted | Major | timelineserver | Li Lu | Naganarasimha G R | +| [MAPREDUCE-6546](https://issues.apache.org/jira/browse/MAPREDUCE-6546) | reconcile the two versions of the timeline service performance tests | Minor | . | Sangjin Lee | Sangjin Lee | | [HDFS-1477](https://issues.apache.org/jira/browse/HDFS-1477) | Support reconfiguring dfs.heartbeat.interval and dfs.namenode.heartbeat.recheck-interval without NN restart | Major | namenode | Patrick Kling | Xiaobing Zhou | +| [YARN-4062](https://issues.apache.org/jira/browse/YARN-4062) | Add the flush and compaction functionality via coprocessors and scanners for flow run table | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-4712](https://issues.apache.org/jira/browse/YARN-4712) | CPU Usage Metric is not captured properly in YARN-2928 | Major | timelineserver | Naganarasimha G R | Naganarasimha G R | | [HADOOP-12926](https://issues.apache.org/jira/browse/HADOOP-12926) | lz4.c does not detect 64-bit mode properly | Major | native | Alan Burlison | Alan Burlison | +| [YARN-4687](https://issues.apache.org/jira/browse/YARN-4687) | Document Reservation ACLs | Minor | . | Sean Po | Sean Po | | [YARN-4805](https://issues.apache.org/jira/browse/YARN-4805) | Don't go through all schedulers in ParameterizedTestBase | Major | . | Karthik Kambatla | Karthik Kambatla | +| [YARN-4711](https://issues.apache.org/jira/browse/YARN-4711) | NM is going down with NPE's due to single thread processing of events by Timeline client | Critical | timelineserver | Naganarasimha G R | Naganarasimha G R | | [HDFS-9349](https://issues.apache.org/jira/browse/HDFS-9349) | Support reconfiguring fs.protected.directories without NN restart | Major | namenode | Xiaobing Zhou | Xiaobing Zhou | | [YARN-4811](https://issues.apache.org/jira/browse/YARN-4811) | Generate histograms in ContainerMetrics for actual container resource usage | Major | . | Varun Vasudev | Varun Vasudev | +| [YARN-3461](https://issues.apache.org/jira/browse/YARN-3461) | Consolidate flow name/version/run defaults | Major | timelineserver | Zhijie Shen | Sangjin Lee | | [HADOOP-12753](https://issues.apache.org/jira/browse/HADOOP-12753) | S3A JUnit tests failing if using HTTP proxy | Minor | fs/s3 | Zoran Rajic | Zoran Rajic | +| [YARN-3863](https://issues.apache.org/jira/browse/YARN-3863) | Support complex filters in TimelineReader | Major | . | Varun Saxena | Varun Saxena | | [HDFS-10209](https://issues.apache.org/jira/browse/HDFS-10209) | Support enable caller context in HDFS namenode audit log without restart namenode | Major | . | Xiaoyu Yao | Xiaobing Zhou | | [HDFS-10286](https://issues.apache.org/jira/browse/HDFS-10286) | Fix TestDFSAdmin#testNameNodeGetReconfigurableProperties | Major | . | Xiaoyu Yao | Xiaobing Zhou | | [HDFS-10284](https://issues.apache.org/jira/browse/HDFS-10284) | o.a.h.hdfs.server.blockmanagement.TestBlockManagerSafeMode.testCheckSafeMode fails intermittently | Major | test | Mingliang Liu | Mingliang Liu | | [YARN-2883](https://issues.apache.org/jira/browse/YARN-2883) | Queuing of container requests in the NM | Major | nodemanager, resourcemanager | Konstantinos Karanasos | Konstantinos Karanasos | -| [YARN-4890](https://issues.apache.org/jira/browse/YARN-4890) | Unit test intermittent failure: TestNodeLabelContainerAllocation#testQueueUsedCapacitiesUpdate | Major | . | Wangda Tan | Sunil G | +| [YARN-4890](https://issues.apache.org/jira/browse/YARN-4890) | Unit test intermittent failure: TestNodeLabelContainerAllocation#testQueueUsedCapacitiesUpdate | Major | . | Wangda Tan | Sunil Govindan | | [HDFS-10207](https://issues.apache.org/jira/browse/HDFS-10207) | Support enable Hadoop IPC backoff without namenode restart | Major | . | Xiaoyu Yao | Xiaobing Zhou | | [YARN-4968](https://issues.apache.org/jira/browse/YARN-4968) | A couple of AM retry unit tests need to wait SchedulerApplicationAttempt stopped. | Major | . | Wangda Tan | Wangda Tan | +| [YARN-3816](https://issues.apache.org/jira/browse/YARN-3816) | [Aggregation] App-level aggregation and accumulation for YARN system metrics | Major | timelineserver | Junping Du | Li Lu | | [YARN-4846](https://issues.apache.org/jira/browse/YARN-4846) | Random failures for TestCapacitySchedulerPreemption#testPreemptionPolicyShouldRespectAlreadyMarkedKillableContainers | Major | . | Bibin A Chundatt | Bibin A Chundatt | | [YARN-2885](https://issues.apache.org/jira/browse/YARN-2885) | Create AMRMProxy request interceptor for distributed scheduling decisions for queueable containers | Major | nodemanager, resourcemanager | Konstantinos Karanasos | Arun Suresh | +| [YARN-4991](https://issues.apache.org/jira/browse/YARN-4991) | Fix ContainerRequest Constructor to set nodelabelExpression correctly | Major | test | Bibin A Chundatt | Bibin A Chundatt | | [YARN-4966](https://issues.apache.org/jira/browse/YARN-4966) | Improve yarn logs to fetch container logs without specifying nodeId | Major | . | Xuan Gong | Xuan Gong | | [YARN-4807](https://issues.apache.org/jira/browse/YARN-4807) | MockAM#waitForState sleep duration is too long | Major | . | Karthik Kambatla | Yufei Gu | | [YARN-3998](https://issues.apache.org/jira/browse/YARN-3998) | Add support in the NodeManager to re-launch containers | Major | . | Jun Gong | Jun Gong | +| [YARN-4986](https://issues.apache.org/jira/browse/YARN-4986) | Add a check in the coprocessor for table to operated on | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-5014](https://issues.apache.org/jira/browse/YARN-5014) | Ensure non-metric values are returned as is for flow run table from the coprocessor | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-3150](https://issues.apache.org/jira/browse/YARN-3150) | [Documentation] Documenting the timeline service v2 | Major | . | Zhijie Shen | Sangjin Lee | +| [MAPREDUCE-6424](https://issues.apache.org/jira/browse/MAPREDUCE-6424) | Store MR counters as timeline metrics instead of event | Major | . | Junping Du | Naganarasimha G R | +| [YARN-4447](https://issues.apache.org/jira/browse/YARN-4447) | Provide a mechanism to represent complex filters and parse them at the REST layer | Major | timelineserver | Varun Saxena | Varun Saxena | +| [MAPREDUCE-6688](https://issues.apache.org/jira/browse/MAPREDUCE-6688) | Store job configurations in Timeline Service v2 | Major | applicationmaster | Junping Du | Varun Saxena | | [YARN-4920](https://issues.apache.org/jira/browse/YARN-4920) | ATS/NM should support a link to dowload/get the logs in text format | Major | yarn | Xuan Gong | Xuan Gong | | [YARN-4905](https://issues.apache.org/jira/browse/YARN-4905) | Improve "yarn logs" command-line to optionally show log metadata also | Major | . | Xuan Gong | Xuan Gong | | [YARN-4595](https://issues.apache.org/jira/browse/YARN-4595) | Add support for configurable read-only mounts when launching Docker containers | Major | yarn | Billie Rinaldi | Billie Rinaldi | +| [YARN-5045](https://issues.apache.org/jira/browse/YARN-5045) | hbase unit tests fail due to dependency issues | Blocker | timelineserver | Sangjin Lee | Sangjin Lee | | [YARN-4778](https://issues.apache.org/jira/browse/YARN-4778) | Support specifying resources for task containers in SLS | Major | . | Wangda Tan | Wangda Tan | | [YARN-4842](https://issues.apache.org/jira/browse/YARN-4842) | "yarn logs" command should not require the appOwner argument | Major | . | Ram Venkatesh | Xuan Gong | | [YARN-5073](https://issues.apache.org/jira/browse/YARN-5073) | Refactor startContainerInternal() in ContainerManager to remove unused parameter | Minor | nodemanager, resourcemanager | Konstantinos Karanasos | Konstantinos Karanasos | @@ -1186,37 +1489,60 @@ | [YARN-4738](https://issues.apache.org/jira/browse/YARN-4738) | Notify the RM about the status of OPPORTUNISTIC containers | Major | nodemanager, resourcemanager | Konstantinos Karanasos | Konstantinos Karanasos | | [YARN-5075](https://issues.apache.org/jira/browse/YARN-5075) | Fix findbugs warning in hadoop-yarn-common module | Major | . | Akira Ajisaka | Arun Suresh | | [YARN-4412](https://issues.apache.org/jira/browse/YARN-4412) | Create ClusterMonitor to compute ordered list of preferred NMs for OPPORTUNITIC containers | Major | nodemanager, resourcemanager | Arun Suresh | Arun Suresh | +| [YARN-5102](https://issues.apache.org/jira/browse/YARN-5102) | timeline service build fails with java 8 | Blocker | timelineserver | Sangjin Lee | Sangjin Lee | | [YARN-5090](https://issues.apache.org/jira/browse/YARN-5090) | Add End-to-End test-cases for DistributedScheduling using MiniYarnCluster | Major | . | Arun Suresh | Arun Suresh | +| [YARN-5096](https://issues.apache.org/jira/browse/YARN-5096) | timelinereader has a lot of logging that's not useful | Minor | timelineserver | Sangjin Lee | Sangjin Lee | | [YARN-4913](https://issues.apache.org/jira/browse/YARN-4913) | Yarn logs should take a -out option to write to a directory | Major | . | Xuan Gong | Xuan Gong | | [HDFS-2173](https://issues.apache.org/jira/browse/HDFS-2173) | saveNamespace should not throw IOE when only one storage directory fails to write VERSION file | Major | . | Todd Lipcon | Andras Bokor | | [YARN-5110](https://issues.apache.org/jira/browse/YARN-5110) | Fix OpportunisticContainerAllocator to insert complete HostAddress in issued ContainerTokenIds | Major | . | Arun Suresh | Konstantinos Karanasos | +| [YARN-5050](https://issues.apache.org/jira/browse/YARN-5050) | Code cleanup for TestDistributedShell | Major | timelineserver | Li Lu | Li Lu | | [YARN-5016](https://issues.apache.org/jira/browse/YARN-5016) | Add support for a minimum retry interval for container retries | Major | . | Varun Vasudev | Jun Gong | | [HDFS-7766](https://issues.apache.org/jira/browse/HDFS-7766) | Add a flag to WebHDFS op=CREATE to not respond with a 307 redirect | Major | ui, webhdfs | Ravi Prakash | Ravi Prakash | +| [YARN-5093](https://issues.apache.org/jira/browse/YARN-5093) | created time shows 0 in most REST output | Critical | timelineserver | Sangjin Lee | Varun Saxena | +| [YARN-5018](https://issues.apache.org/jira/browse/YARN-5018) | Online aggregation logic should not run immediately after collectors got started | Major | timelineserver | Li Lu | Li Lu | | [YARN-5115](https://issues.apache.org/jira/browse/YARN-5115) | Avoid setting CONTENT-DISPOSITION header in the container-logs web-service | Major | . | Xuan Gong | Xuan Gong | +| [YARN-5097](https://issues.apache.org/jira/browse/YARN-5097) | NPE in Separator.joinEncoded() | Critical | timelineserver | Sangjin Lee | Vrushali C | +| [YARN-5095](https://issues.apache.org/jira/browse/YARN-5095) | flow activities and flow runs are populated with wrong timestamp when RM restarts w/ recovery enabled | Critical | timelineserver | Sangjin Lee | Varun Saxena | +| [YARN-5138](https://issues.apache.org/jira/browse/YARN-5138) | fix "no findbugs output file" error for hadoop-yarn-server-timelineservice-hbase-tests | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-5109](https://issues.apache.org/jira/browse/YARN-5109) | timestamps are stored unencoded causing parse errors | Blocker | timelineserver | Sangjin Lee | Varun Saxena | | [YARN-5089](https://issues.apache.org/jira/browse/YARN-5089) | Improve "yarn log" command-line "logFiles" option to support regex | Major | . | Xuan Gong | Xuan Gong | | [YARN-5127](https://issues.apache.org/jira/browse/YARN-5127) | Expose ExecutionType in Container api record | Major | . | Arun Suresh | Hitesh Sharma | | [YARN-5117](https://issues.apache.org/jira/browse/YARN-5117) | QueuingContainerManager does not start GUARANTEED Container even if Resources are available | Major | . | Arun Suresh | Konstantinos Karanasos | +| [YARN-5111](https://issues.apache.org/jira/browse/YARN-5111) | YARN container system metrics are not aggregated to application | Critical | timelineserver | Sangjin Lee | Naganarasimha G R | | [YARN-4007](https://issues.apache.org/jira/browse/YARN-4007) | Add support for different network setups when launching the docker container | Major | nodemanager | Varun Vasudev | Sidharta Seethana | | [YARN-5141](https://issues.apache.org/jira/browse/YARN-5141) | Get Container logs for the Running application from Yarn Logs CommandLine | Major | . | Xuan Gong | Xuan Gong | +| [YARN-5105](https://issues.apache.org/jira/browse/YARN-5105) | entire time series is returned for YARN container system metrics (CPU and memory) | Major | timelineserver | Sangjin Lee | Varun Saxena | | [YARN-5088](https://issues.apache.org/jira/browse/YARN-5088) | Improve "yarn log" command-line to read the last K bytes for the log files | Major | . | Xuan Gong | Xuan Gong | | [YARN-5162](https://issues.apache.org/jira/browse/YARN-5162) | Fix Exceptions thrown during registerAM call when Distributed Scheduling is Enabled | Major | . | Arun Suresh | Hitesh Sharma | +| [YARN-5189](https://issues.apache.org/jira/browse/YARN-5189) | Make HBaseTimeline[Reader\|Writer]Impl default and move FileSystemTimeline\*Impl | Major | timelineserver | Joep Rottinghuis | Joep Rottinghuis | | [HDFS-9877](https://issues.apache.org/jira/browse/HDFS-9877) | HDFS Namenode UI: Fix browsing directories that need to be encoded | Major | ui | Ravi Prakash | Ravi Prakash | | [HDFS-7767](https://issues.apache.org/jira/browse/HDFS-7767) | Use the noredirect flag in WebHDFS to allow web browsers to upload files via the NN UI | Major | ui, webhdfs | Ravi Prakash | Ravi Prakash | +| [YARN-5142](https://issues.apache.org/jira/browse/YARN-5142) | fix findbugs warnings/errors for hadoop-yarn-server-timelineservice-hbase-tests | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-5167](https://issues.apache.org/jira/browse/YARN-5167) | Escaping occurences of encodedValues | Critical | timelineserver | Joep Rottinghuis | Sangjin Lee | | [YARN-5180](https://issues.apache.org/jira/browse/YARN-5180) | Allow ResourceRequest to specify an enforceExecutionType flag | Major | . | Arun Suresh | Arun Suresh | | [YARN-5176](https://issues.apache.org/jira/browse/YARN-5176) | More test cases for queuing of containers at the NM | Major | . | Konstantinos Karanasos | Konstantinos Karanasos | | [YARN-5204](https://issues.apache.org/jira/browse/YARN-5204) | Properly report status of killed/stopped queued containers | Major | . | Konstantinos Karanasos | Konstantinos Karanasos | +| [YARN-5210](https://issues.apache.org/jira/browse/YARN-5210) | NPE in Distributed Shell while publishing DS\_CONTAINER\_START event and other miscellaneous issues | Major | timelineserver | Varun Saxena | Varun Saxena | | [YARN-5191](https://issues.apache.org/jira/browse/YARN-5191) | Rename the “download=true” option for getLogs in NMWebServices and AHSWebServices | Major | . | Xuan Gong | Xuan Gong | +| [YARN-5170](https://issues.apache.org/jira/browse/YARN-5170) | Eliminate singleton converters and static method access | Major | timelineserver | Joep Rottinghuis | Joep Rottinghuis | | [HDFS-7987](https://issues.apache.org/jira/browse/HDFS-7987) | Allow files / directories to be moved | Major | ui | Ravi Prakash | Ravi Prakash | +| [YARN-5052](https://issues.apache.org/jira/browse/YARN-5052) | [Documentation] Update timeline service v2 documentation to capture information about filters | Major | timelineserver | Varun Saxena | Varun Saxena | | [YARN-5124](https://issues.apache.org/jira/browse/YARN-5124) | Modify AMRMClient to set the ExecutionType in the ResourceRequest | Major | . | Arun Suresh | Arun Suresh | | [YARN-5212](https://issues.apache.org/jira/browse/YARN-5212) | Run existing ContainerManager tests using QueuingContainerManagerImpl | Major | . | Konstantinos Karanasos | Konstantinos Karanasos | | [YARN-4887](https://issues.apache.org/jira/browse/YARN-4887) | AM-RM protocol changes for identifying resource-requests explicitly | Major | applications, resourcemanager | Subru Krishnan | Subru Krishnan | | [YARN-5223](https://issues.apache.org/jira/browse/YARN-5223) | Container line in yarn logs output for a live application should include the hostname for the container | Major | . | Siddharth Seth | Xuan Gong | | [YARN-5122](https://issues.apache.org/jira/browse/YARN-5122) | "yarn logs" for running containers should print an explicit footer saying that the log may be incomplete | Major | . | Vinod Kumar Vavilapalli | Jian He | +| [YARN-5243](https://issues.apache.org/jira/browse/YARN-5243) | fix several rebase and other miscellaneous issues before merge | Major | timelineserver | Sangjin Lee | Sangjin Lee | +| [YARN-5070](https://issues.apache.org/jira/browse/YARN-5070) | upgrade HBase version for first merge | Critical | timelineserver | Sangjin Lee | Vrushali C | | [YARN-5251](https://issues.apache.org/jira/browse/YARN-5251) | Yarn CLI to obtain App logs for last 'n' bytes fails with 'java.io.IOException' and for 'n' bytes fails with NumberFormatException | Blocker | . | Sumana Sathish | Xuan Gong | | [HDFS-10328](https://issues.apache.org/jira/browse/HDFS-10328) | Add per-cache-pool default replication num configuration | Minor | caching | xupeng | xupeng | | [YARN-5171](https://issues.apache.org/jira/browse/YARN-5171) | Extend DistributedSchedulerProtocol to notify RM of containers allocated by the Node | Major | . | Arun Suresh | Íñigo Goiri | +| [MAPREDUCE-6720](https://issues.apache.org/jira/browse/MAPREDUCE-6720) | Inconsistent values of counters across tasks and job reported to timeline service. | Major | applicationmaster | Varun Saxena | Varun Saxena | +| [YARN-5300](https://issues.apache.org/jira/browse/YARN-5300) | Exclude generated federation protobuf sources from YARN Javadoc/findbugs build | Minor | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-5174](https://issues.apache.org/jira/browse/YARN-5174) | [documentation] several updates/corrections to timeline service documentation | Major | timelineserver | Sangjin Lee | Sangjin Lee | | [YARN-5227](https://issues.apache.org/jira/browse/YARN-5227) | yarn logs command: no need to specify -applicationId when specifying containerId | Major | . | Jian He | Gergely Novák | | [YARN-5224](https://issues.apache.org/jira/browse/YARN-5224) | Logs for a completed container are not available in the yarn logs output for a live application | Major | . | Siddharth Seth | Xuan Gong | +| [YARN-5316](https://issues.apache.org/jira/browse/YARN-5316) | fix hadoop-aws pom not to do the exclusion | Major | timelineserver | Sangjin Lee | Sangjin Lee | | [YARN-5233](https://issues.apache.org/jira/browse/YARN-5233) | Support for specifying a path for ATS plugin jars | Major | timelineserver | Li Lu | Li Lu | | [YARN-5200](https://issues.apache.org/jira/browse/YARN-5200) | Improve yarn logs to get Container List | Major | . | Xuan Gong | Xuan Gong | | [YARN-5299](https://issues.apache.org/jira/browse/YARN-5299) | Log Docker run command when container fails | Major | yarn | Varun Vasudev | Varun Vasudev | @@ -1224,36 +1550,56 @@ | [YARN-5363](https://issues.apache.org/jira/browse/YARN-5363) | For AM containers, or for containers of running-apps, "yarn logs" incorrectly only (tries to) shows syslog file-type by default | Major | log-aggregation | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | | [YARN-5298](https://issues.apache.org/jira/browse/YARN-5298) | Mount usercache and NM filecache directories into Docker container | Major | yarn | Varun Vasudev | Sidharta Seethana | | [YARN-5361](https://issues.apache.org/jira/browse/YARN-5361) | Obtaining logs for completed container says 'file belongs to a running container ' at the end | Critical | . | Sumana Sathish | Xuan Gong | +| [YARN-5156](https://issues.apache.org/jira/browse/YARN-5156) | YARN\_CONTAINER\_FINISHED of YARN\_CONTAINERs will always have running state | Major | timelineserver | Li Lu | Vrushali C | | [YARN-5350](https://issues.apache.org/jira/browse/YARN-5350) | Distributed Scheduling: Ensure sort order of allocatable nodes returned by the RM is not lost | Major | . | Arun Suresh | Arun Suresh | | [YARN-5392](https://issues.apache.org/jira/browse/YARN-5392) | Replace use of Priority in the Scheduling infrastructure with an opaque ShedulerRequestKey | Major | . | Arun Suresh | Arun Suresh | | [YARN-5351](https://issues.apache.org/jira/browse/YARN-5351) | ResourceRequest should take ExecutionType into account during comparison | Major | . | Konstantinos Karanasos | Konstantinos Karanasos | +| [YARN-3662](https://issues.apache.org/jira/browse/YARN-3662) | Federation Membership State Store internal APIs | Major | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-5229](https://issues.apache.org/jira/browse/YARN-5229) | Refactor #isApplicationEntity and #getApplicationEvent from HBaseTimelineWriterImpl | Minor | timelineserver | Joep Rottinghuis | Vrushali C | | [YARN-5113](https://issues.apache.org/jira/browse/YARN-5113) | Refactoring and other clean-up for distributed scheduling | Major | . | Arun Suresh | Konstantinos Karanasos | | [YARN-5458](https://issues.apache.org/jira/browse/YARN-5458) | Rename DockerStopCommandTest to TestDockerStopCommand | Trivial | . | Shane Kumpf | Shane Kumpf | | [YARN-5443](https://issues.apache.org/jira/browse/YARN-5443) | Add support for docker inspect command | Major | yarn | Shane Kumpf | Shane Kumpf | | [YARN-5226](https://issues.apache.org/jira/browse/YARN-5226) | remove AHS enable check from LogsCLI#fetchAMContainerLogs | Major | . | Xuan Gong | Xuan Gong | | [YARN-5459](https://issues.apache.org/jira/browse/YARN-5459) | Add support for docker rm | Minor | yarn | Shane Kumpf | Shane Kumpf | +| [YARN-5406](https://issues.apache.org/jira/browse/YARN-5406) | In-memory based implementation of the FederationMembershipStateStore | Major | nodemanager, resourcemanager | Subru Krishnan | Ellen Hui | +| [YARN-5390](https://issues.apache.org/jira/browse/YARN-5390) | Federation Subcluster Resolver | Major | nodemanager, resourcemanager | Carlo Curino | Ellen Hui | | [YARN-5429](https://issues.apache.org/jira/browse/YARN-5429) | Fix @return related javadoc warnings in yarn-api | Major | . | Vrushali C | Vrushali C | | [YARN-4888](https://issues.apache.org/jira/browse/YARN-4888) | Changes in scheduler to identify resource-requests explicitly by allocation-id | Major | resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-5307](https://issues.apache.org/jira/browse/YARN-5307) | Federation Application State Store internal APIs | Major | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-3664](https://issues.apache.org/jira/browse/YARN-3664) | Federation PolicyStore internal APIs | Major | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | | [YARN-5470](https://issues.apache.org/jira/browse/YARN-5470) | Differentiate exactly match with regex in yarn log CLI | Major | . | Xuan Gong | Xuan Gong | +| [YARN-5408](https://issues.apache.org/jira/browse/YARN-5408) | Compose Federation membership/application/policy APIs into an uber FederationStateStore API | Major | nodemanager, resourcemanager | Subru Krishnan | Ellen Hui | | [YARN-5394](https://issues.apache.org/jira/browse/YARN-5394) | Remove bind-mount /etc/passwd for Docker containers | Major | yarn | Zhankun Tang | Zhankun Tang | +| [YARN-5407](https://issues.apache.org/jira/browse/YARN-5407) | In-memory based implementation of the FederationApplicationStateStore, FederationPolicyStateStore | Major | nodemanager, resourcemanager | Subru Krishnan | Ellen Hui | | [YARN-5137](https://issues.apache.org/jira/browse/YARN-5137) | Make DiskChecker pluggable in NodeManager | Major | nodemanager | Ray Chiang | Yufei Gu | +| [YARN-5519](https://issues.apache.org/jira/browse/YARN-5519) | Add SubClusterId in AddApplicationHomeSubClusterResponse for Router Failover | Major | nodemanager, resourcemanager | Giovanni Matteo Fumarola | Ellen Hui | | [YARN-5514](https://issues.apache.org/jira/browse/YARN-5514) | Clarify DecommissionType.FORCEFUL comment | Minor | documentation | Robert Kanter | Vrushali C | +| [YARN-3672](https://issues.apache.org/jira/browse/YARN-3672) | Create Facade for Federation State and Policy Store | Major | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-5467](https://issues.apache.org/jira/browse/YARN-5467) | InputValidator for the FederationStateStore internal APIs | Major | nodemanager, resourcemanager | Subru Krishnan | Giovanni Matteo Fumarola | | [YARN-4676](https://issues.apache.org/jira/browse/YARN-4676) | Automatic and Asynchronous Decommissioning Nodes Status Tracking | Major | resourcemanager | Daniel Zhi | Daniel Zhi | +| [YARN-3673](https://issues.apache.org/jira/browse/YARN-3673) | Create a FailoverProxy for Federation services | Major | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-5457](https://issues.apache.org/jira/browse/YARN-5457) | Refactor DistributedScheduling framework to pull out common functionality | Major | resourcemanager | Arun Suresh | Arun Suresh | | [YARN-5042](https://issues.apache.org/jira/browse/YARN-5042) | Mount /sys/fs/cgroup into Docker containers as read only mount | Major | yarn | Varun Vasudev | luhuichun | | [YARN-5564](https://issues.apache.org/jira/browse/YARN-5564) | Fix typo in RM\_SCHEDULER\_RESERVATION\_THRESHOLD\_INCREMENT\_MULTIPLE | Trivial | fairscheduler | Ray Chiang | Ray Chiang | | [YARN-5557](https://issues.apache.org/jira/browse/YARN-5557) | Add localize API to the ContainerManagementProtocol | Major | . | Jian He | Jian He | | [YARN-5327](https://issues.apache.org/jira/browse/YARN-5327) | API changes required to support recurring reservations in the YARN ReservationSystem | Major | resourcemanager | Subru Krishnan | Sangeetha Abdu Jyothi | | [YARN-4889](https://issues.apache.org/jira/browse/YARN-4889) | Changes in AMRMClient for identifying resource-requests explicitly | Major | resourcemanager | Subru Krishnan | Arun Suresh | +| [YARN-3671](https://issues.apache.org/jira/browse/YARN-3671) | Integrate Federation services with ResourceManager | Major | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | | [HDFS-9392](https://issues.apache.org/jira/browse/HDFS-9392) | Admins support for maintenance state | Major | . | Ming Ma | Ming Ma | | [HDFS-10813](https://issues.apache.org/jira/browse/HDFS-10813) | DiskBalancer: Add the getNodeList method in Command | Minor | balancer & mover | Yiqun Lin | Yiqun Lin | +| [YARN-5221](https://issues.apache.org/jira/browse/YARN-5221) | Expose UpdateResourceRequest API to allow AM to request for change in container properties | Major | . | Arun Suresh | Arun Suresh | | [YARN-5596](https://issues.apache.org/jira/browse/YARN-5596) | Fix failing unit test in TestDockerContainerRuntime | Minor | nodemanager, yarn | Sidharta Seethana | Sidharta Seethana | +| [YARN-5612](https://issues.apache.org/jira/browse/YARN-5612) | Return SubClusterId in FederationStateStoreFacade#addApplicationHomeSubCluster for Router Failover | Major | nodemanager, resourcemanager | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-5601](https://issues.apache.org/jira/browse/YARN-5601) | Make the RM epoch base value configurable | Major | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | | [YARN-5264](https://issues.apache.org/jira/browse/YARN-5264) | Store all queue-specific information in FSQueue | Major | fairscheduler | Yufei Gu | Yufei Gu | | [YARN-5576](https://issues.apache.org/jira/browse/YARN-5576) | Allow resource localization while container is running | Major | . | Jian He | Jian He | +| [YARN-5323](https://issues.apache.org/jira/browse/YARN-5323) | Policies APIs (for Router and AMRMProxy policies) | Major | nodemanager, resourcemanager | Carlo Curino | Carlo Curino | | [YARN-5620](https://issues.apache.org/jira/browse/YARN-5620) | Core changes in NodeManager to support re-initialization of Containers with new launchContext | Major | . | Arun Suresh | Arun Suresh | -| [YARN-4091](https://issues.apache.org/jira/browse/YARN-4091) | Add REST API to retrieve scheduler activity | Major | capacity scheduler, resourcemanager | Sunil G | Chen Ge | +| [YARN-4091](https://issues.apache.org/jira/browse/YARN-4091) | Add REST API to retrieve scheduler activity | Major | capacity scheduler, resourcemanager | Sunil Govindan | Chen Ge | | [YARN-5637](https://issues.apache.org/jira/browse/YARN-5637) | Changes in NodeManager to support Container rollback and commit | Major | . | Arun Suresh | Arun Suresh | | [YARN-3140](https://issues.apache.org/jira/browse/YARN-3140) | Improve locks in AbstractCSQueue/LeafQueue/ParentQueue | Major | resourcemanager, scheduler | Wangda Tan | Wangda Tan | | [YARN-3141](https://issues.apache.org/jira/browse/YARN-3141) | Improve locks in SchedulerApplicationAttempt/FSAppAttempt/FiCaSchedulerApp | Major | resourcemanager, scheduler | Wangda Tan | Wangda Tan | +| [YARN-5324](https://issues.apache.org/jira/browse/YARN-5324) | Stateless Federation router policies implementation | Major | nodemanager, resourcemanager | Carlo Curino | Carlo Curino | | [YARN-5609](https://issues.apache.org/jira/browse/YARN-5609) | Expose upgrade and restart API in ContainerManagementProtocol | Major | . | Arun Suresh | Arun Suresh | | [HDFS-9895](https://issues.apache.org/jira/browse/HDFS-9895) | Remove unnecessary conf cache from DataNode | Major | datanode | Xiaobing Zhou | Xiaobing Zhou | | [YARN-3142](https://issues.apache.org/jira/browse/YARN-3142) | Improve locks in AppSchedulingInfo | Major | resourcemanager, scheduler | Wangda Tan | Varun Saxena | @@ -1264,19 +1610,36 @@ | [YARN-3139](https://issues.apache.org/jira/browse/YARN-3139) | Improve locks in AbstractYarnScheduler/CapacityScheduler/FairScheduler | Major | resourcemanager, scheduler | Wangda Tan | Wangda Tan | | [HDFS-10893](https://issues.apache.org/jira/browse/HDFS-10893) | Refactor TestDFSShell by setting up MiniDFSCluser once for all commands test | Major | test | Mingliang Liu | Mingliang Liu | | [HADOOP-13627](https://issues.apache.org/jira/browse/HADOOP-13627) | Have an explicit KerberosAuthException for UGI to throw, text from public constants | Major | security | Steve Loughran | Xiao Chen | +| [YARN-5325](https://issues.apache.org/jira/browse/YARN-5325) | Stateless ARMRMProxy policies implementation | Major | nodemanager, resourcemanager | Carlo Curino | Carlo Curino | +| [YARN-5638](https://issues.apache.org/jira/browse/YARN-5638) | Introduce a collector timestamp to uniquely identify collectors creation order in collector discovery | Major | timelineserver | Li Lu | Li Lu | +| [YARN-5699](https://issues.apache.org/jira/browse/YARN-5699) | Retrospect yarn entity fields which are publishing in events info fields. | Major | . | Rohith Sharma K S | Rohith Sharma K S | | [HDFS-9390](https://issues.apache.org/jira/browse/HDFS-9390) | Block management for maintenance states | Major | . | Ming Ma | Ming Ma | +| [YARN-5561](https://issues.apache.org/jira/browse/YARN-5561) | [Atsv2] : Support for ability to retrieve apps/app-attempt/containers and entities via REST | Major | timelinereader | Rohith Sharma K S | Rohith Sharma K S | | [YARN-4911](https://issues.apache.org/jira/browse/YARN-4911) | Bad placement policy in FairScheduler causes the RM to crash | Major | fairscheduler | Ray Chiang | Ray Chiang | | [YARN-5047](https://issues.apache.org/jira/browse/YARN-5047) | Refactor nodeUpdate across schedulers | Major | capacityscheduler, fairscheduler, scheduler | Ray Chiang | Ray Chiang | +| [YARN-3649](https://issues.apache.org/jira/browse/YARN-3649) | Allow configurable prefix for hbase table names (like prod, exp, test etc) | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-5715](https://issues.apache.org/jira/browse/YARN-5715) | introduce entity prefix for return and sort order | Critical | timelineserver | Sangjin Lee | Rohith Sharma K S | +| [YARN-4097](https://issues.apache.org/jira/browse/YARN-4097) | Create POC timeline web UI with new YARN web UI framework | Major | timelineserver | Li Lu | Li Lu | | [YARN-5799](https://issues.apache.org/jira/browse/YARN-5799) | Fix Opportunistic Allocation to set the correct value of Node Http Address | Major | resourcemanager | Arun Suresh | Arun Suresh | +| [YARN-4765](https://issues.apache.org/jira/browse/YARN-4765) | Split TestHBaseTimelineStorage into multiple test classes | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-5793](https://issues.apache.org/jira/browse/YARN-5793) | Trim configuration values in DockerLinuxContainerRuntime | Minor | nodemanager | Tianyin Xu | Tianyin Xu | +| [YARN-5391](https://issues.apache.org/jira/browse/YARN-5391) | PolicyManager to tie together Router/AMRM Federation policies | Major | nodemanager, resourcemanager | Carlo Curino | Carlo Curino | +| [YARN-5265](https://issues.apache.org/jira/browse/YARN-5265) | Make HBase configuration for the timeline service configurable | Major | timelineserver | Joep Rottinghuis | Joep Rottinghuis | | [YARN-2995](https://issues.apache.org/jira/browse/YARN-2995) | Enhance UI to show cluster resource utilization of various container Execution types | Blocker | resourcemanager | Sriram Rao | Konstantinos Karanasos | +| [YARN-3359](https://issues.apache.org/jira/browse/YARN-3359) | Recover collector list when RM fails over | Major | resourcemanager | Junping Du | Li Lu | | [YARN-5716](https://issues.apache.org/jira/browse/YARN-5716) | Add global scheduler interface definition and update CapacityScheduler to use it. | Major | capacityscheduler | Wangda Tan | Wangda Tan | | [YARN-5833](https://issues.apache.org/jira/browse/YARN-5833) | Add validation to ensure default ports are unique in Configuration | Major | yarn | Konstantinos Karanasos | Konstantinos Karanasos | | [YARN-4329](https://issues.apache.org/jira/browse/YARN-4329) | Allow fetching exact reason as to why a submitted app is in ACCEPTED state in Fair Scheduler | Major | fairscheduler, resourcemanager | Naganarasimha G R | Yufei Gu | | [YARN-5611](https://issues.apache.org/jira/browse/YARN-5611) | Provide an API to update lifetime of an application. | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | | [HDFS-11119](https://issues.apache.org/jira/browse/HDFS-11119) | Support for parallel checking of StorageLocations on DataNode startup | Major | datanode | Arpit Agarwal | Arpit Agarwal | | [HDFS-11114](https://issues.apache.org/jira/browse/HDFS-11114) | Support for running async disk checks in DataNode | Major | datanode | Arpit Agarwal | Arpit Agarwal | +| [YARN-5634](https://issues.apache.org/jira/browse/YARN-5634) | Simplify initialization/use of RouterPolicy via a RouterPolicyFacade | Major | nodemanager, resourcemanager | Carlo Curino | Carlo Curino | +| [YARN-5792](https://issues.apache.org/jira/browse/YARN-5792) | adopt the id prefix for YARN, MR, and DS entities | Major | timelineserver | Sangjin Lee | Varun Saxena | | [HDFS-11148](https://issues.apache.org/jira/browse/HDFS-11148) | Update DataNode to use StorageLocationChecker at startup | Major | datanode | Arpit Agarwal | Arpit Agarwal | -| [YARN-5865](https://issues.apache.org/jira/browse/YARN-5865) | Retrospect updateApplicationPriority api to handle state store exception in align with YARN-5611 | Major | . | Sunil G | Sunil G | +| [YARN-5676](https://issues.apache.org/jira/browse/YARN-5676) | Add a HashBasedRouterPolicy, and small policies and test refactoring. | Major | nodemanager, resourcemanager | Carlo Curino | Carlo Curino | +| [YARN-5872](https://issues.apache.org/jira/browse/YARN-5872) | Add AlwayReject policies for router and amrmproxy. | Major | nodemanager, resourcemanager | Carlo Curino | Carlo Curino | +| [YARN-5905](https://issues.apache.org/jira/browse/YARN-5905) | Update the RM webapp host that is reported as part of Federation membership to current primary RM's IP | Minor | federation, resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-5865](https://issues.apache.org/jira/browse/YARN-5865) | Retrospect updateApplicationPriority api to handle state store exception in align with YARN-5611 | Major | . | Sunil Govindan | Sunil Govindan | | [YARN-5649](https://issues.apache.org/jira/browse/YARN-5649) | Add REST endpoints for updating application timeouts | Major | scheduler | Rohith Sharma K S | Rohith Sharma K S | | [YARN-4206](https://issues.apache.org/jira/browse/YARN-4206) | Add Application timeouts in Application report and CLI | Major | scheduler | nijel | Rohith Sharma K S | | [HDFS-11149](https://issues.apache.org/jira/browse/HDFS-11149) | Support for parallel checking of FsVolumes | Major | datanode | Arpit Agarwal | Arpit Agarwal | @@ -1284,11 +1647,15 @@ | [YARN-5761](https://issues.apache.org/jira/browse/YARN-5761) | Separate QueueManager from Scheduler | Major | capacityscheduler | Xuan Gong | Xuan Gong | | [YARN-5746](https://issues.apache.org/jira/browse/YARN-5746) | The state of the parentQueue and its childQueues should be synchronized. | Major | capacity scheduler, resourcemanager | Xuan Gong | Xuan Gong | | [YARN-5965](https://issues.apache.org/jira/browse/YARN-5965) | Retrospect ApplicationReport#getApplicationTimeouts | Major | scheduler | Jian He | Rohith Sharma K S | +| [YARN-5739](https://issues.apache.org/jira/browse/YARN-5739) | Provide timeline reader API to list available timeline entity types for one application | Major | timelinereader | Li Lu | Li Lu | +| [MAPREDUCE-6818](https://issues.apache.org/jira/browse/MAPREDUCE-6818) | Remove direct reference to TimelineClientImpl | Major | . | Li Lu | Li Lu | | [YARN-5982](https://issues.apache.org/jira/browse/YARN-5982) | Simplify opportunistic container parameters and metrics | Major | . | Konstantinos Karanasos | Konstantinos Karanasos | | [YARN-5970](https://issues.apache.org/jira/browse/YARN-5970) | Validate application update timeout request parameters | Major | scheduler | Rohith Sharma K S | Rohith Sharma K S | | [YARN-5524](https://issues.apache.org/jira/browse/YARN-5524) | Yarn live log aggregation does not throw if command line arg is wrong | Major | log-aggregation | Prasanth Jayachandran | Xuan Gong | | [YARN-5650](https://issues.apache.org/jira/browse/YARN-5650) | Render Application Timeout value in web UI | Major | scheduler | Rohith Sharma K S | Akhil PB | | [HDFS-11182](https://issues.apache.org/jira/browse/HDFS-11182) | Update DataNode to use DatasetVolumeChecker | Major | datanode | Arpit Agarwal | Arpit Agarwal | +| [YARN-5976](https://issues.apache.org/jira/browse/YARN-5976) | Update hbase version to 1.2 (removes phoenix dependencies) | Major | . | Vrushali C | Vrushali C | +| [YARN-5216](https://issues.apache.org/jira/browse/YARN-5216) | Expose configurable preemption policy for OPPORTUNISTIC containers running on the NM | Major | distributed-scheduling | Arun Suresh | Hitesh Sharma | | [YARN-5938](https://issues.apache.org/jira/browse/YARN-5938) | Refactoring OpportunisticContainerAllocator to use SchedulerRequestKey instead of Priority and other misc fixes | Major | . | Arun Suresh | Arun Suresh | | [YARN-5756](https://issues.apache.org/jira/browse/YARN-5756) | Add state-machine implementation for scheduler queues | Major | . | Xuan Gong | Xuan Gong | | [YARN-5931](https://issues.apache.org/jira/browse/YARN-5931) | Document timeout interfaces CLI and REST APIs | Major | scheduler | Rohith Sharma K S | Rohith Sharma K S | @@ -1297,13 +1664,19 @@ | [YARN-5923](https://issues.apache.org/jira/browse/YARN-5923) | Unable to access logs for a running application if YARN\_ACL\_ENABLE is enabled | Major | . | Xuan Gong | Xuan Gong | | [YARN-5906](https://issues.apache.org/jira/browse/YARN-5906) | Update AppSchedulingInfo to use SchedulingPlacementSet | Major | . | Wangda Tan | Wangda Tan | | [YARN-6009](https://issues.apache.org/jira/browse/YARN-6009) | RM fails to start during an upgrade - Failed to load/recover state (YarnException: Invalid application timeout, value=0 for type=LIFETIME) | Critical | resourcemanager | Gour Saha | Rohith Sharma K S | -| [YARN-3955](https://issues.apache.org/jira/browse/YARN-3955) | Support for application priority ACLs in queues of CapacityScheduler | Major | capacityscheduler | Sunil G | Sunil G | +| [YARN-5585](https://issues.apache.org/jira/browse/YARN-5585) | [Atsv2] Reader side changes for entity prefix and support for pagination via additional filters | Critical | timelinereader | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-6074](https://issues.apache.org/jira/browse/YARN-6074) | FlowRunEntity does not deserialize long values correctly | Major | timelinereader | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-3955](https://issues.apache.org/jira/browse/YARN-3955) | Support for application priority ACLs in queues of CapacityScheduler | Major | capacityscheduler | Sunil Govindan | Sunil Govindan | | [HDFS-9391](https://issues.apache.org/jira/browse/HDFS-9391) | Update webUI/JMX to display maintenance state info | Major | . | Ming Ma | Manoj Govindassamy | | [YARN-5416](https://issues.apache.org/jira/browse/YARN-5416) | TestRMRestart#testRMRestartWaitForPreviousAMToFinish failed intermittently due to not wait SchedulerApplicationAttempt to be stopped | Minor | test, yarn | Junping Du | Junping Du | +| [YARN-5980](https://issues.apache.org/jira/browse/YARN-5980) | Update documentation for single node hbase deploy | Major | timelineserver | Vrushali C | Vrushali C | | [YARN-6011](https://issues.apache.org/jira/browse/YARN-6011) | Add a new web service to list the files on a container in AHSWebService | Major | . | Xuan Gong | Xuan Gong | +| [YARN-5378](https://issues.apache.org/jira/browse/YARN-5378) | Accommodate app-id-\>cluster mapping | Major | timelineserver | Joep Rottinghuis | Sangjin Lee | | [YARN-6016](https://issues.apache.org/jira/browse/YARN-6016) | Fix minor bugs in handling of local AMRMToken in AMRMProxy | Minor | federation | Botong Huang | Botong Huang | | [YARN-5556](https://issues.apache.org/jira/browse/YARN-5556) | CapacityScheduler: Support deleting queues without requiring a RM restart | Major | capacity scheduler | Xuan Gong | Naganarasimha G R | +| [YARN-6064](https://issues.apache.org/jira/browse/YARN-6064) | Support fromId for flowRuns and flow/flowRun apps REST API's | Major | timelinereader | Rohith Sharma K S | Rohith Sharma K S | | [HDFS-11259](https://issues.apache.org/jira/browse/HDFS-11259) | Update fsck to display maintenance state info | Major | datanode, namenode | Manoj Govindassamy | Manoj Govindassamy | +| [YARN-6094](https://issues.apache.org/jira/browse/YARN-6094) | Update the coprocessor to be a dynamically loaded one | Major | timelineserver | Vrushali C | Vrushali C | | [YARN-5831](https://issues.apache.org/jira/browse/YARN-5831) | Propagate allowPreemptionFrom flag all the way down to the app | Major | fairscheduler | Karthik Kambatla | Yufei Gu | | [HDFS-11296](https://issues.apache.org/jira/browse/HDFS-11296) | Maintenance state expiry should be an epoch time and not jvm monotonic | Major | . | Manoj Govindassamy | Manoj Govindassamy | | [YARN-6099](https://issues.apache.org/jira/browse/YARN-6099) | Improve webservice to list aggregated log files | Major | . | Xuan Gong | Xuan Gong | @@ -1311,11 +1684,13 @@ | [YARN-3637](https://issues.apache.org/jira/browse/YARN-3637) | Handle localization sym-linking correctly at the YARN level | Major | . | Chris Trezzo | Chris Trezzo | | [YARN-6126](https://issues.apache.org/jira/browse/YARN-6126) | Obtaining app logs for Running application fails with "Unable to parse json from webservice. Error:" | Major | . | Sumana Sathish | Xuan Gong | | [YARN-6100](https://issues.apache.org/jira/browse/YARN-6100) | improve YARN webservice to output aggregated container logs | Major | . | Xuan Gong | Xuan Gong | +| [YARN-5951](https://issues.apache.org/jira/browse/YARN-5951) | Changes to allow CapacityScheduler to use configuration store | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-6108](https://issues.apache.org/jira/browse/YARN-6108) | Improve AHS webservice to accept NM address as a parameter to get container logs | Major | . | Xuan Gong | Xuan Gong | | [YARN-5258](https://issues.apache.org/jira/browse/YARN-5258) | Document Use of Docker with LinuxContainerExecutor | Critical | documentation | Daniel Templeton | Daniel Templeton | | [HADOOP-14032](https://issues.apache.org/jira/browse/HADOOP-14032) | Reduce fair call queue priority inversion | Major | ipc | Daryn Sharp | Daryn Sharp | | [HADOOP-14034](https://issues.apache.org/jira/browse/HADOOP-14034) | Allow ipc layer exceptions to selectively close connections | Major | ipc | Daryn Sharp | Daryn Sharp | | [HADOOP-14033](https://issues.apache.org/jira/browse/HADOOP-14033) | Reduce fair call queue lock contention | Major | ipc | Daryn Sharp | Daryn Sharp | +| [YARN-6170](https://issues.apache.org/jira/browse/YARN-6170) | TimelineReaderServer should wait to join with HttpServer2 | Minor | timelinereader | Sangjin Lee | Sangjin Lee | | [HADOOP-13075](https://issues.apache.org/jira/browse/HADOOP-13075) | Add support for SSE-KMS and SSE-C in s3a filesystem | Major | fs/s3 | Andrew Olson | Steve Moist | | [YARN-6113](https://issues.apache.org/jira/browse/YARN-6113) | re-direct NM Web Service to get container logs for finished applications | Major | . | Xuan Gong | Xuan Gong | | [YARN-5966](https://issues.apache.org/jira/browse/YARN-5966) | AMRMClient changes to support ExecutionType update | Major | . | Arun Suresh | Arun Suresh | @@ -1323,26 +1698,40 @@ | [HDFS-11265](https://issues.apache.org/jira/browse/HDFS-11265) | Extend visualization for Maintenance Mode under Datanode tab in the NameNode UI | Major | datanode, namenode | Manoj Govindassamy | Elek, Marton | | [YARN-6163](https://issues.apache.org/jira/browse/YARN-6163) | FS Preemption is a trickle for severely starved applications | Major | fairscheduler | Karthik Kambatla | Karthik Kambatla | | [YARN-5798](https://issues.apache.org/jira/browse/YARN-5798) | Set UncaughtExceptionHandler for all FairScheduler threads | Major | fairscheduler | Karthik Kambatla | Yufei Gu | +| [YARN-4675](https://issues.apache.org/jira/browse/YARN-4675) | Reorganize TimelineClient and TimelineClientImpl into separate classes for ATSv1.x and ATSv2 | Major | timelineserver | Naganarasimha G R | Naganarasimha G R | | [HADOOP-14040](https://issues.apache.org/jira/browse/HADOOP-14040) | Use shaded aws-sdk uber-JAR 1.11.86 | Major | build, fs/s3 | Steve Loughran | Steve Loughran | | [YARN-6193](https://issues.apache.org/jira/browse/YARN-6193) | FairScheduler might not trigger preemption when using DRF | Major | fairscheduler | Karthik Kambatla | Karthik Kambatla | +| [YARN-6159](https://issues.apache.org/jira/browse/YARN-6159) | Documentation changes for TimelineV2Client | Minor | documentation | Varun Saxena | Naganarasimha G R | | [HDFS-11430](https://issues.apache.org/jira/browse/HDFS-11430) | Separate class InnerNode from class NetworkTopology and make it extendable | Major | namenode | Chen Liang | Tsz Wo Nicholas Sze | | [HADOOP-14099](https://issues.apache.org/jira/browse/HADOOP-14099) | Split S3 testing documentation out into its own file | Minor | documentation, fs/s3 | Steve Loughran | Steve Loughran | | [HDFS-11411](https://issues.apache.org/jira/browse/HDFS-11411) | Avoid OutOfMemoryError in TestMaintenanceState test runs | Major | datanode, namenode | Manoj Govindassamy | Manoj Govindassamy | | [HADOOP-14102](https://issues.apache.org/jira/browse/HADOOP-14102) | Relax error message assertion in S3A test ITestS3AEncryptionSSEC | Minor | fs/s3 | Mingliang Liu | Mingliang Liu | +| [YARN-6069](https://issues.apache.org/jira/browse/YARN-6069) | CORS support in timeline v2 | Major | timelinereader | Sreenath Somarajapuram | Rohith Sharma K S | | [YARN-5959](https://issues.apache.org/jira/browse/YARN-5959) | RM changes to support change of container ExecutionType | Major | . | Arun Suresh | Arun Suresh | | [YARN-6228](https://issues.apache.org/jira/browse/YARN-6228) | EntityGroupFSTimelineStore should allow configurable cache stores. | Major | timelineserver | Li Lu | Li Lu | -| [YARN-6215](https://issues.apache.org/jira/browse/YARN-6215) | FairScheduler preemption and update should not run concurrently | Major | fairscheduler, test | Sunil G | Tao Jie | +| [YARN-5410](https://issues.apache.org/jira/browse/YARN-5410) | Bootstrap Router server module | Major | nodemanager, resourcemanager | Subru Krishnan | Giovanni Matteo Fumarola | +| [YARN-5946](https://issues.apache.org/jira/browse/YARN-5946) | Create YarnConfigurationStore interface and InMemoryConfigurationStore class | Major | . | Jonathan Hung | Jonathan Hung | +| [YARN-6215](https://issues.apache.org/jira/browse/YARN-6215) | FairScheduler preemption and update should not run concurrently | Major | fairscheduler, test | Sunil Govindan | Tao Jie | | [YARN-6123](https://issues.apache.org/jira/browse/YARN-6123) | [YARN-5864] Add a test to make sure queues of orderingPolicy will be updated when childQueues is added or removed. | Major | . | Wangda Tan | Wangda Tan | +| [YARN-6253](https://issues.apache.org/jira/browse/YARN-6253) | FlowAcitivityColumnPrefix.store(byte[] rowKey, ...) drops timestamp | Major | . | Haibo Chen | Haibo Chen | +| [YARN-6190](https://issues.apache.org/jira/browse/YARN-6190) | Validation and synchronization fixes in LocalityMulticastAMRMProxyPolicy | Minor | federation | Botong Huang | Botong Huang | +| [YARN-6027](https://issues.apache.org/jira/browse/YARN-6027) | Support fromid(offset) filter for /flows API | Major | timelineserver | Rohith Sharma K S | Rohith Sharma K S | | [HDFS-11450](https://issues.apache.org/jira/browse/HDFS-11450) | HDFS specific network topology classes with storage type info included | Major | namenode | Chen Liang | Chen Liang | | [HDFS-11412](https://issues.apache.org/jira/browse/HDFS-11412) | Maintenance minimum replication config value allowable range should be [0, DefaultReplication] | Major | datanode, namenode | Manoj Govindassamy | Manoj Govindassamy | +| [YARN-6247](https://issues.apache.org/jira/browse/YARN-6247) | Share a single instance of SubClusterResolver instead of instantiating one per AM | Minor | . | Botong Huang | Botong Huang | +| [YARN-6256](https://issues.apache.org/jira/browse/YARN-6256) | Add FROM\_ID info key for timeline entities in reader response. | Major | timelineserver | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-6237](https://issues.apache.org/jira/browse/YARN-6237) | Move UID constant to TimelineReaderUtils | Major | timelinereader | Rohith Sharma K S | Rohith Sharma K S | | [YARN-5669](https://issues.apache.org/jira/browse/YARN-5669) | Add support for Docker pull | Major | yarn | Zhankun Tang | luhuichun | +| [YARN-5948](https://issues.apache.org/jira/browse/YARN-5948) | Implement MutableConfigurationManager for handling storage into configuration store | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-1047](https://issues.apache.org/jira/browse/YARN-1047) | Expose # of pre-emptions as a queue counter | Major | fairscheduler | Philip Zeyliger | Karthik Kambatla | | [YARN-6281](https://issues.apache.org/jira/browse/YARN-6281) | Cleanup when AMRMProxy fails to initialize a new interceptor chain | Minor | . | Botong Huang | Botong Huang | +| [YARN-6318](https://issues.apache.org/jira/browse/YARN-6318) | timeline service schema creator fails if executed from a remote machine | Minor | timelineserver | Sangjin Lee | Sangjin Lee | | [HDFS-11482](https://issues.apache.org/jira/browse/HDFS-11482) | Add storage type demand to into DFSNetworkTopology#chooseRandom | Major | namenode | Chen Liang | Chen Liang | | [YARN-6314](https://issues.apache.org/jira/browse/YARN-6314) | Potential infinite redirection on YARN log redirection web service | Major | . | Xuan Gong | Xuan Gong | | [YARN-6313](https://issues.apache.org/jira/browse/YARN-6313) | yarn logs cli should provide logs for a completed container even when application is still running | Major | . | Siddharth Seth | Xuan Gong | | [HDFS-11514](https://issues.apache.org/jira/browse/HDFS-11514) | DFSTopologyNodeImpl#chooseRandom optimizations | Major | namenode | Chen Liang | Chen Liang | | [YARN-6367](https://issues.apache.org/jira/browse/YARN-6367) | YARN logs CLI needs alway check containerLogsInfo/containerLogInfo before parse the JSON object from NMWebService | Major | . | Siddharth Seth | Xuan Gong | +| [YARN-6146](https://issues.apache.org/jira/browse/YARN-6146) | Add Builder methods for TimelineEntityFilters | Major | timelineserver | Rohith Sharma K S | Haibo Chen | | [HADOOP-14120](https://issues.apache.org/jira/browse/HADOOP-14120) | needless S3AFileSystem.setOptionalPutRequestParameters in S3ABlockOutputStream putObject() | Minor | fs/s3 | Steve Loughran | Yuanbo Liu | | [HADOOP-14135](https://issues.apache.org/jira/browse/HADOOP-14135) | Remove URI parameter in AWSCredentialProvider constructors | Major | fs/s3 | Mingliang Liu | Mingliang Liu | | [HADOOP-14196](https://issues.apache.org/jira/browse/HADOOP-14196) | Azure Data Lake doc is missing required config entry | Major | fs/adl | Atul Sikaria | Atul Sikaria | @@ -1350,18 +1739,30 @@ | [HADOOP-14230](https://issues.apache.org/jira/browse/HADOOP-14230) | TestAdlFileSystemContractLive fails to clean up | Minor | fs/adl, test | John Zhuge | John Zhuge | | [HADOOP-14038](https://issues.apache.org/jira/browse/HADOOP-14038) | Rename ADLS credential properties | Minor | fs/adl | John Zhuge | John Zhuge | | [HDFS-11577](https://issues.apache.org/jira/browse/HDFS-11577) | Combine the old and the new chooseRandom for better performance | Major | namenode | Chen Liang | Chen Liang | +| [YARN-6357](https://issues.apache.org/jira/browse/YARN-6357) | Implement putEntitiesAsync API in TimelineCollector | Major | ATSv2, timelineserver | Joep Rottinghuis | Haibo Chen | +| [YARN-6342](https://issues.apache.org/jira/browse/YARN-6342) | Make TimelineV2Client's drain timeout after stop configurable | Major | . | Jian He | Haibo Chen | +| [YARN-6376](https://issues.apache.org/jira/browse/YARN-6376) | Exceptions caused by synchronous putEntities requests can be swallowed | Critical | ATSv2 | Haibo Chen | Haibo Chen | +| [YARN-6414](https://issues.apache.org/jira/browse/YARN-6414) | ATSv2 HBase related tests fail due to guava version upgrade | Major | timelineserver | Sonia Garudi | Haibo Chen | +| [YARN-6377](https://issues.apache.org/jira/browse/YARN-6377) | NMTimelinePublisher#serviceStop does not stop timeline clients | Major | yarn | Haibo Chen | Haibo Chen | +| [YARN-5952](https://issues.apache.org/jira/browse/YARN-5952) | Create REST API for changing YARN scheduler configurations | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-6109](https://issues.apache.org/jira/browse/YARN-6109) | Add an ability to convert ChildQueue to ParentQueue | Major | capacity scheduler | Xuan Gong | Xuan Gong | +| [YARN-6424](https://issues.apache.org/jira/browse/YARN-6424) | TimelineCollector is not stopped when an app finishes in RM | Critical | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-5602](https://issues.apache.org/jira/browse/YARN-5602) | Utils for Federation State and Policy Store | Major | nodemanager, resourcemanager | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HDFS-10882](https://issues.apache.org/jira/browse/HDFS-10882) | Federation State Store Interface API | Major | fs | Jason Kace | Jason Kace | | [HADOOP-14290](https://issues.apache.org/jira/browse/HADOOP-14290) | Update SLF4J from 1.7.10 to 1.7.25 | Major | . | Akira Ajisaka | Akira Ajisaka | | [YARN-6372](https://issues.apache.org/jira/browse/YARN-6372) | Add default value for NM disk validator | Major | nodemanager | Yufei Gu | Yufei Gu | | [HADOOP-14301](https://issues.apache.org/jira/browse/HADOOP-14301) | Deprecate SharedInstanceProfileCredentialsProvider in branch-2. | Major | fs/s3 | Mingliang Liu | Mingliang Liu | | [HADOOP-14255](https://issues.apache.org/jira/browse/HADOOP-14255) | S3A to delete unnecessary fake directory objects in mkdirs() | Major | fs/s3 | Mingliang Liu | Mingliang Liu | | [YARN-6040](https://issues.apache.org/jira/browse/YARN-6040) | Introduce api independent PendingAsk to replace usage of ResourceRequest within Scheduler classes | Major | . | Wangda Tan | Wangda Tan | +| [YARN-6203](https://issues.apache.org/jira/browse/YARN-6203) | Occasional test failure in TestWeightedRandomRouterPolicy | Minor | federation | Botong Huang | Carlo Curino | | [YARN-6432](https://issues.apache.org/jira/browse/YARN-6432) | FairScheduler: Reserve preempted resources for corresponding applications | Major | . | Miklos Szegedi | Miklos Szegedi | | [HADOOP-14321](https://issues.apache.org/jira/browse/HADOOP-14321) | Explicitly exclude S3A root dir ITests from parallel runs | Minor | fs/s3, test | Steve Loughran | Steve Loughran | | [HADOOP-14241](https://issues.apache.org/jira/browse/HADOOP-14241) | Add ADLS sensitive config keys to default list | Minor | fs, fs/adl, security | John Zhuge | John Zhuge | | [HADOOP-14324](https://issues.apache.org/jira/browse/HADOOP-14324) | Refine S3 server-side-encryption key as encryption secret; improve error reporting and diagnostics | Blocker | fs/s3 | Steve Loughran | Steve Loughran | | [HADOOP-14305](https://issues.apache.org/jira/browse/HADOOP-14305) | S3A SSE tests won't run in parallel: Bad request in directory GetFileStatus | Minor | fs/s3, test | Steve Loughran | Steve Moist | +| [YARN-3663](https://issues.apache.org/jira/browse/YARN-3663) | Federation State and Policy Store (DBMS implementation) | Major | nodemanager, resourcemanager | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | | [HADOOP-14349](https://issues.apache.org/jira/browse/HADOOP-14349) | Rename ADLS CONTRACT\_ENABLE\_KEY | Minor | fs/adl | Mingliang Liu | Mingliang Liu | +| [YARN-6455](https://issues.apache.org/jira/browse/YARN-6455) | Enhance the timelinewriter.flush() race condition fix | Major | yarn | Haibo Chen | Haibo Chen | | [HDFS-7964](https://issues.apache.org/jira/browse/HDFS-7964) | Add support for async edit logging | Major | namenode | Daryn Sharp | Daryn Sharp | | [YARN-4359](https://issues.apache.org/jira/browse/YARN-4359) | Update LowCost agents logic to take advantage of YARN-4358 | Major | capacityscheduler, fairscheduler, resourcemanager | Carlo Curino | Ishai Menache | | [YARN-6542](https://issues.apache.org/jira/browse/YARN-6542) | Fix the logger in TestAlignedPlanner and TestGreedyReservationAgent | Major | reservation system | Subru Krishnan | Subru Krishnan | @@ -1369,32 +1770,57 @@ | [HDFS-9005](https://issues.apache.org/jira/browse/HDFS-9005) | Provide configuration support for upgrade domain | Major | . | Ming Ma | Ming Ma | | [HDFS-9016](https://issues.apache.org/jira/browse/HDFS-9016) | Display upgrade domain information in fsck | Major | . | Ming Ma | Ming Ma | | [HDFS-9922](https://issues.apache.org/jira/browse/HDFS-9922) | Upgrade Domain placement policy status marks a good block in violation when there are decommissioned nodes | Minor | . | Chris Trezzo | Chris Trezzo | +| [HDFS-10630](https://issues.apache.org/jira/browse/HDFS-10630) | Federation State Store FS Implementation | Major | hdfs | Íñigo Goiri | Jason Kace | +| [YARN-5411](https://issues.apache.org/jira/browse/YARN-5411) | Create a proxy chain for ApplicationClientProtocol in the Router | Major | nodemanager, resourcemanager | Subru Krishnan | Giovanni Matteo Fumarola | | [YARN-6374](https://issues.apache.org/jira/browse/YARN-6374) | Improve test coverage and add utility classes for common Docker operations | Major | nodemanager, yarn | Shane Kumpf | Shane Kumpf | +| [YARN-6375](https://issues.apache.org/jira/browse/YARN-6375) | App level aggregation should not consider metric values reported in the previous aggregation cycle | Major | timelineserver | Varun Saxena | Varun Saxena | | [HDFS-11530](https://issues.apache.org/jira/browse/HDFS-11530) | Use HDFS specific network topology to choose datanode in BlockPlacementPolicyDefault | Major | namenode | Yiqun Lin | Yiqun Lin | | [YARN-6565](https://issues.apache.org/jira/browse/YARN-6565) | Fix memory leak and finish app trigger in AMRMProxy | Critical | . | Botong Huang | Botong Huang | | [YARN-6234](https://issues.apache.org/jira/browse/YARN-6234) | Support multiple attempts on the node when AMRMProxy is enabled | Major | amrmproxy, federation, nodemanager | Subru Krishnan | Giovanni Matteo Fumarola | | [HADOOP-14384](https://issues.apache.org/jira/browse/HADOOP-14384) | Reduce the visibility of FileSystem#newFSDataOutputStreamBuilder before the API becomes stable | Blocker | fs | Lei (Eddy) Xu | Lei (Eddy) Xu | +| [YARN-6563](https://issues.apache.org/jira/browse/YARN-6563) | ConcurrentModificationException in TimelineCollectorManager while stopping RM | Major | resourcemanager | Rohith Sharma K S | Haibo Chen | +| [YARN-5413](https://issues.apache.org/jira/browse/YARN-5413) | Create a proxy chain for ResourceManager Admin API in the Router | Major | nodemanager, resourcemanager | Subru Krishnan | Giovanni Matteo Fumarola | +| [YARN-6435](https://issues.apache.org/jira/browse/YARN-6435) | [ATSv2] Can't retrieve more than 1000 versions of metrics in time series | Critical | timelineserver | Rohith Sharma K S | Vrushali C | +| [YARN-6561](https://issues.apache.org/jira/browse/YARN-6561) | Update exception message during timeline collector aux service initialization | Minor | timelineserver | Vrushali C | Vrushali C | | [YARN-6306](https://issues.apache.org/jira/browse/YARN-6306) | NMClient API change for container upgrade | Major | . | Jian He | Arun Suresh | | [HADOOP-11572](https://issues.apache.org/jira/browse/HADOOP-11572) | s3a delete() operation fails during a concurrent delete of child entries | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-5949](https://issues.apache.org/jira/browse/YARN-5949) | Add pluggable configuration ACL policy interface and implementation | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-6587](https://issues.apache.org/jira/browse/YARN-6587) | Refactor of ResourceManager#startWebApp in a Util class | Major | nodemanager, resourcemanager | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-6555](https://issues.apache.org/jira/browse/YARN-6555) | Store application flow context in NM state store for work-preserving restart | Major | timelineserver | Vrushali C | Rohith Sharma K S | +| [YARN-5531](https://issues.apache.org/jira/browse/YARN-5531) | UnmanagedAM pool manager for federating application across clusters | Major | nodemanager, resourcemanager | Subru Krishnan | Botong Huang | | [HDFS-11446](https://issues.apache.org/jira/browse/HDFS-11446) | TestMaintenanceState#testWithNNAndDNRestart fails intermittently | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-6666](https://issues.apache.org/jira/browse/YARN-6666) | Fix unit test failure in TestRouterClientRMService | Minor | . | Botong Huang | Botong Huang | +| [YARN-6484](https://issues.apache.org/jira/browse/YARN-6484) | [Documentation] Documenting the YARN Federation feature | Major | nodemanager, resourcemanager | Subru Krishnan | Carlo Curino | +| [YARN-6658](https://issues.apache.org/jira/browse/YARN-6658) | Remove columnFor() methods of Columns in HBaseTimeline backend | Major | . | Haibo Chen | Haibo Chen | +| [YARN-3666](https://issues.apache.org/jira/browse/YARN-3666) | Federation Intercepting and propagating AM- home RM communications | Major | nodemanager, resourcemanager | Kishore Chaliparambil | Botong Huang | | [YARN-6246](https://issues.apache.org/jira/browse/YARN-6246) | Identifying starved apps does not need the scheduler writelock | Major | fairscheduler | Karthik Kambatla | Karthik Kambatla | | [HDFS-11904](https://issues.apache.org/jira/browse/HDFS-11904) | Reuse iip in unprotectedRemoveXAttrs calls | Major | . | Xiao Chen | Xiao Chen | | [HDFS-11359](https://issues.apache.org/jira/browse/HDFS-11359) | DFSAdmin report command supports displaying maintenance state datanodes | Major | datanode, namenode | Yiqun Lin | Yiqun Lin | +| [YARN-6316](https://issues.apache.org/jira/browse/YARN-6316) | Provide help information and documentation for TimelineSchemaCreator | Major | timelineserver | Li Lu | Haibo Chen | +| [YARN-6575](https://issues.apache.org/jira/browse/YARN-6575) | Support global configuration mutation in MutableConfProvider | Major | . | Jonathan Hung | Jonathan Hung | | [HADOOP-14035](https://issues.apache.org/jira/browse/HADOOP-14035) | Reduce fair call queue backoff's impact on clients | Major | ipc | Daryn Sharp | Daryn Sharp | +| [YARN-6604](https://issues.apache.org/jira/browse/YARN-6604) | Allow metric TTL for Application table to be specified through cmd | Major | ATSv2 | Haibo Chen | Haibo Chen | +| [YARN-5647](https://issues.apache.org/jira/browse/YARN-5647) | [ATSv2 Security] Collector side changes for loading auth filters and principals | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-6511](https://issues.apache.org/jira/browse/YARN-6511) | Federation: transparently spanning application across multiple sub-clusters | Major | . | Botong Huang | Botong Huang | | [YARN-6679](https://issues.apache.org/jira/browse/YARN-6679) | Reduce Resource instance overhead via non-PBImpl | Major | resourcemanager | Daryn Sharp | Daryn Sharp | | [HADOOP-13174](https://issues.apache.org/jira/browse/HADOOP-13174) | Add more debug logs for delegation tokens and authentication | Minor | security | Xiao Chen | Xiao Chen | | [HADOOP-13854](https://issues.apache.org/jira/browse/HADOOP-13854) | KMS should log error details in KMSExceptionsProvider | Major | kms | Xiao Chen | Xiao Chen | | [YARN-6682](https://issues.apache.org/jira/browse/YARN-6682) | Improve performance of AssignmentInformation datastructures | Major | . | Daryn Sharp | Daryn Sharp | +| [YARN-6638](https://issues.apache.org/jira/browse/YARN-6638) | [ATSv2 Security] Timeline reader side changes for loading auth filters and principals | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-6707](https://issues.apache.org/jira/browse/YARN-6707) | [ATSv2] Update HBase version to 1.2.6 | Major | timelineserver | Varun Saxena | Vrushali C | | [HADOOP-14394](https://issues.apache.org/jira/browse/HADOOP-14394) | Provide Builder pattern for DistributedFileSystem.create | Major | fs | Lei (Eddy) Xu | Lei (Eddy) Xu | | [HADOOP-14289](https://issues.apache.org/jira/browse/HADOOP-14289) | Move log4j APIs over to slf4j in hadoop-common | Major | . | Akira Ajisaka | Akira Ajisaka | | [HADOOP-14395](https://issues.apache.org/jira/browse/HADOOP-14395) | Provide Builder pattern for DistributedFileSystem.append | Major | fs | Lei (Eddy) Xu | Lei (Eddy) Xu | | [HADOOP-14538](https://issues.apache.org/jira/browse/HADOOP-14538) | Fix TestFilterFileSystem and TestHarFileSystem failures after DistributedFileSystem.append API | Major | fs | Lei (Eddy) Xu | Lei (Eddy) Xu | | [YARN-6680](https://issues.apache.org/jira/browse/YARN-6680) | Avoid locking overhead for NO\_LABEL lookups | Major | resourcemanager | Daryn Sharp | Daryn Sharp | | [HADOOP-14296](https://issues.apache.org/jira/browse/HADOOP-14296) | Move logging APIs over to slf4j in hadoop-tools | Major | . | Akira Ajisaka | Akira Ajisaka | +| [YARN-6724](https://issues.apache.org/jira/browse/YARN-6724) | Add ability to blacklist sub-clusters when invoking Routing policies | Major | router | Subru Krishnan | Giovanni Matteo Fumarola | +| [HDFS-11998](https://issues.apache.org/jira/browse/HDFS-11998) | Enable DFSNetworkTopology as default | Major | namenode | Chen Liang | Chen Liang | | [HADOOP-14542](https://issues.apache.org/jira/browse/HADOOP-14542) | Add IOUtils.cleanupWithLogger that accepts slf4j logger API | Major | . | Akira Ajisaka | Chen Liang | | [HADOOP-14547](https://issues.apache.org/jira/browse/HADOOP-14547) | [WASB] the configured retry policy is not used for all storage operations. | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | +| [YARN-5648](https://issues.apache.org/jira/browse/YARN-5648) | [ATSv2 Security] Client side changes for authentication | Major | timelineserver | Varun Saxena | Varun Saxena | | [HADOOP-14549](https://issues.apache.org/jira/browse/HADOOP-14549) | Use GenericTestUtils.setLogLevel when available in hadoop-tools | Major | . | Akira Ajisaka | Wenxin He | +| [YARN-3659](https://issues.apache.org/jira/browse/YARN-3659) | Federation: routing client invocations transparently to multiple RMs | Major | client, resourcemanager, router | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | | [HADOOP-14573](https://issues.apache.org/jira/browse/HADOOP-14573) | regression: Azure tests which capture logs failing with move to SLF4J | Major | fs/azure, test | Steve Loughran | Steve Loughran | | [HADOOP-14546](https://issues.apache.org/jira/browse/HADOOP-14546) | Azure: Concurrent I/O does not work when secure.mode is enabled | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | | [HADOOP-14190](https://issues.apache.org/jira/browse/HADOOP-14190) | add more on s3 regions to the s3a documentation | Minor | documentation, fs/s3 | Steve Loughran | Steve Loughran | @@ -1406,35 +1832,164 @@ | [HADOOP-14617](https://issues.apache.org/jira/browse/HADOOP-14617) | Add ReflectionUtils.logThreadInfo that accept slf4j logger API | Major | . | Wenxin He | Wenxin He | | [HADOOP-14571](https://issues.apache.org/jira/browse/HADOOP-14571) | Deprecate public APIs relate to log4j1 | Major | . | Akira Ajisaka | Wenxin He | | [HADOOP-14587](https://issues.apache.org/jira/browse/HADOOP-14587) | Use GenericTestUtils.setLogLevel when available in hadoop-common | Major | . | Wenxin He | Wenxin He | +| [YARN-5953](https://issues.apache.org/jira/browse/YARN-5953) | Create CLI for changing YARN configurations | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-6776](https://issues.apache.org/jira/browse/YARN-6776) | Refactor ApplicaitonMasterService to move actual processing logic to a separate class | Minor | . | Arun Suresh | Arun Suresh | | [HADOOP-14638](https://issues.apache.org/jira/browse/HADOOP-14638) | Replace commons-logging APIs with slf4j in StreamPumper | Major | . | Wenxin He | Wenxin He | -| [YARN-2113](https://issues.apache.org/jira/browse/YARN-2113) | Add cross-user preemption within CapacityScheduler's leaf-queue | Major | capacity scheduler | Vinod Kumar Vavilapalli | Sunil G | +| [YARN-6801](https://issues.apache.org/jira/browse/YARN-6801) | NPE in RM while setting collectors map in NodeHeartbeatResponse | Major | timelineserver | Vrushali C | Vrushali C | +| [YARN-6807](https://issues.apache.org/jira/browse/YARN-6807) | Adding required missing configs to Federation configuration guide based on e2e testing | Major | documentation, federation | Subru Krishnan | Tanuj Nayak | +| [YARN-6815](https://issues.apache.org/jira/browse/YARN-6815) | [Bug] FederationStateStoreFacade return behavior should be consistent irrespective of whether caching is enabled or not | Major | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-6821](https://issues.apache.org/jira/browse/YARN-6821) | Move FederationStateStore SQL DDL files from test resource to sbin | Major | nodemanager, resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-2113](https://issues.apache.org/jira/browse/YARN-2113) | Add cross-user preemption within CapacityScheduler's leaf-queue | Major | capacity scheduler | Vinod Kumar Vavilapalli | Sunil Govindan | +| [YARN-5292](https://issues.apache.org/jira/browse/YARN-5292) | NM Container lifecycle and state transitions to support for PAUSED container state. | Major | . | Hitesh Sharma | Hitesh Sharma | | [HADOOP-14642](https://issues.apache.org/jira/browse/HADOOP-14642) | wasb: add support for caching Authorization and SASKeys | Major | fs/azure | Sivaguru Sankaridurg | Sivaguru Sankaridurg | | [YARN-6777](https://issues.apache.org/jira/browse/YARN-6777) | Support for ApplicationMasterService processing chain of interceptors | Major | . | Arun Suresh | Arun Suresh | | [YARN-6775](https://issues.apache.org/jira/browse/YARN-6775) | CapacityScheduler: Improvements to assignContainers, avoid unnecessary canAssignToUser/Queue calls | Major | capacityscheduler | Nathan Roberts | Nathan Roberts | -| [HADOOP-14539](https://issues.apache.org/jira/browse/HADOOP-14539) | Move commons logging APIs over to slf4j in hadoop-common | Major | . | Akira Ajisaka | Wenxin He | +| [YARN-4455](https://issues.apache.org/jira/browse/YARN-4455) | Support fetching metrics by time range | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-6850](https://issues.apache.org/jira/browse/YARN-6850) | Ensure that supplemented timestamp is stored only for flow run metrics | Major | timelineserver | Vrushali C | Varun Saxena | +| [YARN-6733](https://issues.apache.org/jira/browse/YARN-6733) | Add table for storing sub-application entities | Major | timelineserver | Vrushali C | Vrushali C | | [HADOOP-14518](https://issues.apache.org/jira/browse/HADOOP-14518) | Customize User-Agent header sent in HTTP/HTTPS requests by WASB. | Minor | fs/azure | Georgi Chalakov | Georgi Chalakov | | [YARN-6804](https://issues.apache.org/jira/browse/YARN-6804) | Allow custom hostname for docker containers in native services | Major | yarn-native-services | Billie Rinaldi | Billie Rinaldi | +| [YARN-6866](https://issues.apache.org/jira/browse/YARN-6866) | Minor clean-up and fixes in anticipation of YARN-2915 merge with trunk | Major | federation | Subru Krishnan | Botong Huang | +| [YARN-5412](https://issues.apache.org/jira/browse/YARN-5412) | Create a proxy chain for ResourceManager REST API in the Router | Major | nodemanager, resourcemanager | Subru Krishnan | Giovanni Matteo Fumarola | +| [YARN-6888](https://issues.apache.org/jira/browse/YARN-6888) | Refactor AppLevelTimelineCollector such that RM does not have aggregator threads created | Major | timelineserver | Vrushali C | Vrushali C | | [HADOOP-14678](https://issues.apache.org/jira/browse/HADOOP-14678) | AdlFilesystem#initialize swallows exception when getting user name | Minor | fs/adl | John Zhuge | John Zhuge | +| [YARN-6734](https://issues.apache.org/jira/browse/YARN-6734) | Ensure sub-application user is extracted & sent to timeline service | Major | timelineserver | Vrushali C | Rohith Sharma K S | +| [HDFS-12223](https://issues.apache.org/jira/browse/HDFS-12223) | Rebasing HDFS-10467 | Major | fs | Íñigo Goiri | Íñigo Goiri | +| [YARN-6902](https://issues.apache.org/jira/browse/YARN-6902) | Update Microsoft JDBC Driver for SQL Server version in License.txt | Minor | federation | Botong Huang | Botong Huang | +| [HDFS-10687](https://issues.apache.org/jira/browse/HDFS-10687) | Federation Membership State Store internal API | Major | hdfs | Íñigo Goiri | Jason Kace | +| [YARN-5947](https://issues.apache.org/jira/browse/YARN-5947) | Create LeveldbConfigurationStore class using Leveldb as backing store | Major | . | Jonathan Hung | Jonathan Hung | | [HADOOP-14397](https://issues.apache.org/jira/browse/HADOOP-14397) | Pull up the builder pattern to FileSystem and add AbstractContractCreateTest for it | Major | common, fs, hdfs-client | Lei (Eddy) Xu | Lei (Eddy) Xu | +| [YARN-6322](https://issues.apache.org/jira/browse/YARN-6322) | Disable queue refresh when configuration mutation is enabled | Major | . | Jonathan Hung | Jonathan Hung | +| [HDFS-11826](https://issues.apache.org/jira/browse/HDFS-11826) | Federation Namenode Heartbeat | Major | fs | Íñigo Goiri | Íñigo Goiri | +| [YARN-6853](https://issues.apache.org/jira/browse/YARN-6853) | Add MySql Scripts for FederationStateStore | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | | [HDFS-9388](https://issues.apache.org/jira/browse/HDFS-9388) | Refactor decommission related code to support maintenance state for datanodes | Major | . | Ming Ma | Manoj Govindassamy | | [YARN-5977](https://issues.apache.org/jira/browse/YARN-5977) | ContainerManagementProtocol changes to support change of container ExecutionType | Major | . | Arun Suresh | kartheek muthyala | | [HADOOP-14126](https://issues.apache.org/jira/browse/HADOOP-14126) | remove jackson, joda and other transient aws SDK dependencies from hadoop-aws | Minor | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-10880](https://issues.apache.org/jira/browse/HDFS-10880) | Federation Mount Table State Store internal API | Major | fs | Jason Kace | Íñigo Goiri | | [YARN-6957](https://issues.apache.org/jira/browse/YARN-6957) | Moving logging APIs over to slf4j in hadoop-yarn-server-sharedcachemanager | Major | . | Yeliang Cang | Yeliang Cang | +| [YARN-6955](https://issues.apache.org/jira/browse/YARN-6955) | Handle concurrent register AM requests in FederationInterceptor | Minor | . | Botong Huang | Botong Huang | | [YARN-6873](https://issues.apache.org/jira/browse/YARN-6873) | Moving logging APIs over to slf4j in hadoop-yarn-server-applicationhistoryservice | Major | . | Yeliang Cang | Yeliang Cang | | [YARN-6897](https://issues.apache.org/jira/browse/YARN-6897) | Refactoring RMWebServices by moving some util methods to RMWebAppUtil | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HDFS-10646](https://issues.apache.org/jira/browse/HDFS-10646) | Federation admin tool | Major | fs | Íñigo Goiri | Íñigo Goiri | | [HADOOP-14715](https://issues.apache.org/jira/browse/HADOOP-14715) | TestWasbRemoteCallHelper failing | Major | fs/azure, test | Steve Loughran | Esfandiar Manii | +| [YARN-6970](https://issues.apache.org/jira/browse/YARN-6970) | Add PoolInitializationException as retriable exception in FederationFacade | Major | federation | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-6874](https://issues.apache.org/jira/browse/YARN-6874) | Supplement timestamp for min start/max end time columns in flow run table to avoid overwrite | Major | timelineserver | Varun Saxena | Vrushali C | | [YARN-6958](https://issues.apache.org/jira/browse/YARN-6958) | Moving logging APIs over to slf4j in hadoop-yarn-server-timelineservice | Major | . | Yeliang Cang | Yeliang Cang | | [HADOOP-14183](https://issues.apache.org/jira/browse/HADOOP-14183) | Remove service loader config file for wasb fs | Minor | fs/azure | John Zhuge | Esfandiar Manii | +| [YARN-6130](https://issues.apache.org/jira/browse/YARN-6130) | [ATSv2 Security] Generate a delegation token for AM when app collector is created and pass it to AM via NM and RM | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-6133](https://issues.apache.org/jira/browse/YARN-6133) | [ATSv2 Security] Renew delegation token for app automatically if an app collector is active | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-6820](https://issues.apache.org/jira/browse/YARN-6820) | Restrict read access to timelineservice v2 data | Major | timelinereader | Vrushali C | Vrushali C | +| [YARN-6896](https://issues.apache.org/jira/browse/YARN-6896) | Federation: routing REST invocations transparently to multiple RMs (part 1 - basic execution) | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | | [YARN-6687](https://issues.apache.org/jira/browse/YARN-6687) | Validate that the duration of the periodic reservation is less than the periodicity | Major | reservation system | Subru Krishnan | Subru Krishnan | +| [YARN-6905](https://issues.apache.org/jira/browse/YARN-6905) | Multiple HBaseTimelineStorage test failures due to missing FastNumberFormat | Major | timelineserver | Sonia Garudi | Haibo Chen | | [YARN-5978](https://issues.apache.org/jira/browse/YARN-5978) | ContainerScheduler and ContainerManager changes to support ExecType update | Major | . | Arun Suresh | kartheek muthyala | | [YARN-6741](https://issues.apache.org/jira/browse/YARN-6741) | Deleting all children of a Parent Queue on refresh throws exception | Major | capacity scheduler | Naganarasimha G R | Naganarasimha G R | | [HADOOP-14660](https://issues.apache.org/jira/browse/HADOOP-14660) | wasb: improve throughput by 34% when account limit exceeded | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | +| [YARN-7006](https://issues.apache.org/jira/browse/YARN-7006) | [ATSv2 Security] Changes for authentication for CollectorNodemanagerProtocol | Major | timelineserver | Varun Saxena | Varun Saxena | +| [YARN-6900](https://issues.apache.org/jira/browse/YARN-6900) | ZooKeeper based implementation of the FederationStateStore | Major | federation, nodemanager, resourcemanager | Subru Krishnan | Íñigo Goiri | +| [HDFS-11554](https://issues.apache.org/jira/browse/HDFS-11554) | [Documentation] Router-based federation documentation | Minor | fs | Íñigo Goiri | Íñigo Goiri | +| [HDFS-12312](https://issues.apache.org/jira/browse/HDFS-12312) | Rebasing HDFS-10467 (2) | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-7028](https://issues.apache.org/jira/browse/YARN-7028) | Backport HADOOP-14386 to YARN-5355 branch | Major | timelineserver | Vrushali C | Vrushali C | | [YARN-6988](https://issues.apache.org/jira/browse/YARN-6988) | container-executor fails for docker when command length \> 4096 B | Major | yarn | Eric Badger | Eric Badger | +| [YARN-7038](https://issues.apache.org/jira/browse/YARN-7038) | [Atsv2 Security] CollectorNodemanagerProtocol RPC interface doesn't work when service authorization is enabled | Major | . | Rohith Sharma K S | Varun Saxena | | [HADOOP-14769](https://issues.apache.org/jira/browse/HADOOP-14769) | WASB: delete recursive should not fail if a file is deleted | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | +| [YARN-7041](https://issues.apache.org/jira/browse/YARN-7041) | Nodemanager NPE running jobs with security off | Major | timelineserver | Aaron Gresch | Varun Saxena | +| [YARN-6134](https://issues.apache.org/jira/browse/YARN-6134) | [ATSv2 Security] Regenerate delegation token for app just before token expires if app collector is active | Major | timelineserver | Varun Saxena | Varun Saxena | | [YARN-6979](https://issues.apache.org/jira/browse/YARN-6979) | Add flag to notify all types of container updates to NM via NodeHeartbeatResponse | Major | . | Arun Suresh | kartheek muthyala | +| [YARN-6861](https://issues.apache.org/jira/browse/YARN-6861) | Reader API for sub application entities | Major | timelinereader | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-6323](https://issues.apache.org/jira/browse/YARN-6323) | Rolling upgrade/config change is broken on timeline v2. | Major | timelineserver | Li Lu | Vrushali C | +| [HDFS-10631](https://issues.apache.org/jira/browse/HDFS-10631) | Federation State Store ZooKeeper implementation | Major | fs | Íñigo Goiri | Jason Kace | +| [YARN-6047](https://issues.apache.org/jira/browse/YARN-6047) | Documentation updates for TimelineService v2 | Major | documentation, timelineserver | Varun Saxena | Rohith Sharma K S | +| [MAPREDUCE-6838](https://issues.apache.org/jira/browse/MAPREDUCE-6838) | [ATSv2 Security] Add timeline delegation token received in allocate response to UGI | Major | . | Varun Saxena | Varun Saxena | +| [YARN-3053](https://issues.apache.org/jira/browse/YARN-3053) | [Security] Review and implement authentication in ATS v.2 | Major | timelineserver | Sangjin Lee | Varun Saxena | +| [YARN-5603](https://issues.apache.org/jira/browse/YARN-5603) | Metrics for Federation StateStore | Major | . | Subru Krishnan | Ellen Hui | +| [YARN-6923](https://issues.apache.org/jira/browse/YARN-6923) | Metrics for Federation Router | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-7024](https://issues.apache.org/jira/browse/YARN-7024) | Fix issues on recovery in LevelDB store | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-7047](https://issues.apache.org/jira/browse/YARN-7047) | Moving logging APIs over to slf4j in hadoop-yarn-server-nodemanager | Major | . | Yeliang Cang | Yeliang Cang | | [YARN-6876](https://issues.apache.org/jira/browse/YARN-6876) | Create an abstract log writer for extendability | Major | . | Xuan Gong | Xuan Gong | +| [YARN-7010](https://issues.apache.org/jira/browse/YARN-7010) | Federation: routing REST invocations transparently to multiple RMs (part 2 - getApps) | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-5219](https://issues.apache.org/jira/browse/YARN-5219) | When an export var command fails in launch\_container.sh, the full container launch should fail | Major | . | Hitesh Shah | Sunil Govindan | +| [HADOOP-14802](https://issues.apache.org/jira/browse/HADOOP-14802) | Add support for using container saskeys for all accesses | Major | fs/azure | Sivaguru Sankaridurg | Sivaguru Sankaridurg | +| [YARN-7094](https://issues.apache.org/jira/browse/YARN-7094) | Document the current known issue with server-side NM graceful decom | Blocker | graceful | Robert Kanter | Robert Kanter | +| [YARN-7095](https://issues.apache.org/jira/browse/YARN-7095) | Federation: routing getNode/getNodes/getMetrics REST invocations transparently to multiple RMs | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-6726](https://issues.apache.org/jira/browse/YARN-6726) | Fix issues with docker commands executed by container-executor | Major | nodemanager | Shane Kumpf | Shane Kumpf | +| [YARN-6877](https://issues.apache.org/jira/browse/YARN-6877) | Create an abstract log reader for extendability | Major | . | Xuan Gong | Xuan Gong | +| [HADOOP-14103](https://issues.apache.org/jira/browse/HADOOP-14103) | Sort out hadoop-aws contract-test-options.xml | Minor | fs/s3, test | Steve Loughran | John Zhuge | +| [YARN-7148](https://issues.apache.org/jira/browse/YARN-7148) | TestLogsCLI fails in trunk and branch-2 and javadoc error | Major | . | Xuan Gong | Xuan Gong | +| [YARN-5328](https://issues.apache.org/jira/browse/YARN-5328) | Plan/ResourceAllocation data structure enhancements required to support recurring reservations in ReservationSystem | Major | resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-6978](https://issues.apache.org/jira/browse/YARN-6978) | Add updateContainer API to NMClient. | Major | . | Arun Suresh | kartheek muthyala | +| [HADOOP-14774](https://issues.apache.org/jira/browse/HADOOP-14774) | S3A case "testRandomReadOverBuffer" failed due to improper range parameter | Minor | fs/s3 | Yonger | Yonger | +| [YARN-7144](https://issues.apache.org/jira/browse/YARN-7144) | Log Aggregation controller should not swallow the exceptions when it calls closeWriter and closeReader. | Major | . | Xuan Gong | Xuan Gong | +| [HDFS-12384](https://issues.apache.org/jira/browse/HDFS-12384) | Fixing compilation issue with BanDuplicateClasses | Major | fs | Íñigo Goiri | Íñigo Goiri | +| [YARN-6600](https://issues.apache.org/jira/browse/YARN-6600) | Introduce default and max lifetime of application at LeafQueue level | Major | capacity scheduler | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-6849](https://issues.apache.org/jira/browse/YARN-6849) | NMContainerStatus should have the Container ExecutionType. | Major | . | Arun Suresh | kartheek muthyala | +| [HDFS-12335](https://issues.apache.org/jira/browse/HDFS-12335) | Federation Metrics | Major | fs | Giovanni Matteo Fumarola | Íñigo Goiri | +| [YARN-5330](https://issues.apache.org/jira/browse/YARN-5330) | SharingPolicy enhancements required to support recurring reservations in ReservationSystem | Major | resourcemanager | Subru Krishnan | Carlo Curino | +| [YARN-7072](https://issues.apache.org/jira/browse/YARN-7072) | Add a new log aggregation file format controller | Major | . | Xuan Gong | Xuan Gong | +| [YARN-7173](https://issues.apache.org/jira/browse/YARN-7173) | Container update RM-NM communication fix for backward compatibility | Major | . | Arun Suresh | Arun Suresh | +| [YARN-7063](https://issues.apache.org/jira/browse/YARN-7063) | TestTimelineReaderWebServicesHBaseStorage fails with NoClassDefFoundError on TSv2 branch2 | Major | timelineserver | Vrushali C | Varun Saxena | +| [YARN-6059](https://issues.apache.org/jira/browse/YARN-6059) | Update paused container state in the NM state store | Blocker | . | Hitesh Sharma | Hitesh Sharma | +| [HDFS-12430](https://issues.apache.org/jira/browse/HDFS-12430) | Rebasing HDFS-10467 After HDFS-12269 and HDFS-12218 | Major | fs | Íñigo Goiri | Íñigo Goiri | +| [YARN-7194](https://issues.apache.org/jira/browse/YARN-7194) | Log aggregation status is always Failed with the newly added log aggregation IndexedFileFormat | Major | . | Xuan Gong | Xuan Gong | +| [YARN-7162](https://issues.apache.org/jira/browse/YARN-7162) | Remove XML excludes file format | Blocker | graceful | Robert Kanter | Robert Kanter | +| [YARN-7174](https://issues.apache.org/jira/browse/YARN-7174) | Add retry logic in LogsCLI when fetch running application logs | Major | . | Xuan Gong | Xuan Gong | +| [HDFS-12450](https://issues.apache.org/jira/browse/HDFS-12450) | Fixing TestNamenodeHeartbeat and support non-HA | Major | fs | Íñigo Goiri | Íñigo Goiri | +| [YARN-6840](https://issues.apache.org/jira/browse/YARN-6840) | Implement zookeeper based store for scheduler configuration updates | Major | . | Wangda Tan | Jonathan Hung | +| [HADOOP-14583](https://issues.apache.org/jira/browse/HADOOP-14583) | wasb throws an exception if you try to create a file and there's no parent directory | Minor | fs/azure | Steve Loughran | Esfandiar Manii | +| [HDFS-12473](https://issues.apache.org/jira/browse/HDFS-12473) | Change hosts JSON file format | Major | . | Ming Ma | Ming Ma | +| [HDFS-11035](https://issues.apache.org/jira/browse/HDFS-11035) | Better documentation for maintenace mode and upgrade domain | Major | datanode, documentation | Wei-Chiu Chuang | Ming Ma | +| [YARN-7046](https://issues.apache.org/jira/browse/YARN-7046) | Add closing logic to configuration store | Major | . | Jonathan Hung | Jonathan Hung | +| [YARN-4266](https://issues.apache.org/jira/browse/YARN-4266) | Allow users to enter containers as UID:GID pair instead of by username | Major | yarn | Sidharta Seethana | luhuichun | +| [MAPREDUCE-6947](https://issues.apache.org/jira/browse/MAPREDUCE-6947) | Moving logging APIs over to slf4j in hadoop-mapreduce-examples | Major | . | Gergely Novák | Gergely Novák | +| [HDFS-12381](https://issues.apache.org/jira/browse/HDFS-12381) | [Documentation] Adding configuration keys for the Router | Minor | fs | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-14892](https://issues.apache.org/jira/browse/HADOOP-14892) | MetricsSystemImpl should use Time.monotonicNow for measuring durations | Minor | . | Chetna Chaudhari | Chetna Chaudhari | +| [HADOOP-14881](https://issues.apache.org/jira/browse/HADOOP-14881) | LoadGenerator should use Time.monotonicNow() to measure durations | Major | . | Chetna Chaudhari | Bharat Viswanadham | +| [YARN-7238](https://issues.apache.org/jira/browse/YARN-7238) | Documentation for API based scheduler configuration management | Major | . | Jonathan Hung | Jonathan Hung | +| [HADOOP-14893](https://issues.apache.org/jira/browse/HADOOP-14893) | WritableRpcEngine should use Time.monotonicNow | Minor | . | Chetna Chaudhari | Chetna Chaudhari | +| [HADOOP-14890](https://issues.apache.org/jira/browse/HADOOP-14890) | Move up to AWS SDK 1.11.199 | Blocker | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-12386](https://issues.apache.org/jira/browse/HDFS-12386) | Add fsserver defaults call to WebhdfsFileSystem. | Minor | webhdfs | Rushabh S Shah | Rushabh S Shah | +| [YARN-7252](https://issues.apache.org/jira/browse/YARN-7252) | Removing queue then failing over results in exception | Critical | . | Jonathan Hung | Jonathan Hung | +| [YARN-7251](https://issues.apache.org/jira/browse/YARN-7251) | Misc changes to YARN-5734 | Major | . | Jonathan Hung | Jonathan Hung | +| [YARN-6691](https://issues.apache.org/jira/browse/YARN-6691) | Update YARN daemon startup/shutdown scripts to include Router service | Major | nodemanager, resourcemanager | Subru Krishnan | Giovanni Matteo Fumarola | +| [YARN-6962](https://issues.apache.org/jira/browse/YARN-6962) | Add support for updateContainers when allocating using FederationInterceptor | Minor | . | Botong Huang | Botong Huang | +| [YARN-7250](https://issues.apache.org/jira/browse/YARN-7250) | Update Shared cache client api to use URLs | Minor | . | Chris Trezzo | Chris Trezzo | +| [YARN-6509](https://issues.apache.org/jira/browse/YARN-6509) | Add a size threshold beyond which yarn logs will require a force option | Major | . | Siddharth Seth | Xuan Gong | +| [HADOOP-14768](https://issues.apache.org/jira/browse/HADOOP-14768) | Honoring sticky bit during Deletion when authorization is enabled in WASB | Major | fs/azure | Varada Hemeswari | Varada Hemeswari | +| [YARN-7259](https://issues.apache.org/jira/browse/YARN-7259) | Add size-based rolling policy to LogAggregationIndexedFileController | Major | . | Xuan Gong | Xuan Gong | +| [YARN-6550](https://issues.apache.org/jira/browse/YARN-6550) | Capture launch\_container.sh logs to a separate log file | Major | . | Wangda Tan | Suma Shivaprasad | +| [HDFS-12580](https://issues.apache.org/jira/browse/HDFS-12580) | Rebasing HDFS-10467 after HDFS-12447 | Major | fs | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-14220](https://issues.apache.org/jira/browse/HADOOP-14220) | Enhance S3GuardTool with bucket-info and set-capacity commands, tests | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-2037](https://issues.apache.org/jira/browse/YARN-2037) | Add work preserving restart support for Unmanaged AMs | Major | resourcemanager | Karthik Kambatla | Botong Huang | +| [YARN-5329](https://issues.apache.org/jira/browse/YARN-5329) | Placement Agent enhancements required to support recurring reservations in ReservationSystem | Blocker | resourcemanager | Subru Krishnan | Carlo Curino | +| [HADOOP-14845](https://issues.apache.org/jira/browse/HADOOP-14845) | Azure wasb: getFileStatus not making any auth checks | Major | fs/azure, security | Sivaguru Sankaridurg | Sivaguru Sankaridurg | +| [YARN-2960](https://issues.apache.org/jira/browse/YARN-2960) | Add documentation for the YARN shared cache | Major | . | Chris Trezzo | Chris Trezzo | +| [HDFS-12273](https://issues.apache.org/jira/browse/HDFS-12273) | Federation UI | Major | fs | Íñigo Goiri | Íñigo Goiri | +| [HDFS-12577](https://issues.apache.org/jira/browse/HDFS-12577) | Rename Router tooling | Major | fs | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-14899](https://issues.apache.org/jira/browse/HADOOP-14899) | Restrict Access to setPermission operation when authorization is enabled in WASB | Major | fs/azure | Kannapiran Srinivasan | Kannapiran Srinivasan | +| [YARN-7305](https://issues.apache.org/jira/browse/YARN-7305) | Merge YARN-5734 branch to branch-2 | Major | . | Xuan Gong | Xuan Gong | +| [HADOOP-14913](https://issues.apache.org/jira/browse/HADOOP-14913) | Sticky bit implementation for rename() operation in Azure WASB | Major | fs, fs/azure | Varada Hemeswari | Varada Hemeswari | +| [YARN-6033](https://issues.apache.org/jira/browse/YARN-6033) | Add support for sections in container-executor configuration file | Major | nodemanager | Varun Vasudev | Varun Vasudev | +| [HADOOP-14935](https://issues.apache.org/jira/browse/HADOOP-14935) | Azure: POSIX permissions are taking effect in access() method even when authorization is enabled | Major | fs/azure | Santhosh G Nayak | Santhosh G Nayak | +| [YARN-7275](https://issues.apache.org/jira/browse/YARN-7275) | NM Statestore cleanup for Container updates | Blocker | . | Arun Suresh | kartheek muthyala | +| [YARN-7311](https://issues.apache.org/jira/browse/YARN-7311) | Fix TestRMWebServicesReservation parametrization for fair scheduler | Blocker | fairscheduler, reservation system | Yufei Gu | Yufei Gu | +| [YARN-7295](https://issues.apache.org/jira/browse/YARN-7295) | Backport ATSv2 to branch-2 | Critical | . | Varun Saxena | Varun Saxena | +| [YARN-7338](https://issues.apache.org/jira/browse/YARN-7338) | Support same origin policy for cross site scripting prevention. | Major | yarn-ui-v2 | Vrushali C | Sunil Govindan | +| [HDFS-12620](https://issues.apache.org/jira/browse/HDFS-12620) | Backporting HDFS-10467 to branch-2 | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-4090](https://issues.apache.org/jira/browse/YARN-4090) | Make Collections.sort() more efficient by caching resource usage | Major | fairscheduler | Xianyin Xin | Yufei Gu | +| [YARN-7353](https://issues.apache.org/jira/browse/YARN-7353) | Docker permitted volumes don't properly check for directories | Major | yarn | Eric Badger | Eric Badger | +| [YARN-7169](https://issues.apache.org/jira/browse/YARN-7169) | Backport new yarn-ui to branch2 code (starting with YARN-5355\_branch2) | Critical | timelineclient, timelinereader, timelineserver | Vrushali C | Vrushali C | +| [YARN-3661](https://issues.apache.org/jira/browse/YARN-3661) | Basic Federation UI | Major | nodemanager, resourcemanager | Giovanni Matteo Fumarola | Íñigo Goiri | +| [YARN-4827](https://issues.apache.org/jira/browse/YARN-4827) | Document configuration of ReservationSystem for FairScheduler | Blocker | capacity scheduler | Subru Krishnan | Yufei Gu | +| [YARN-5516](https://issues.apache.org/jira/browse/YARN-5516) | Add REST API for supporting recurring reservations | Major | resourcemanager | Sangeetha Abdu Jyothi | Sean Po | +| [YARN-7178](https://issues.apache.org/jira/browse/YARN-7178) | Add documentation for Container Update API | Blocker | . | Arun Suresh | Arun Suresh | +| [YARN-7380](https://issues.apache.org/jira/browse/YARN-7380) | Fix findbugs warning in timeline service on branch-2 | Major | timelineclient, timelinereader, timelineserver | Vrushali C | Vrushali C | +| [YARN-7378](https://issues.apache.org/jira/browse/YARN-7378) | Documentation changes post branch-2 merge | Major | timelineclient, timelinereader, timelineserver | Varun Saxena | Vrushali C | +| [YARN-7398](https://issues.apache.org/jira/browse/YARN-7398) | Update LICENSE.txt with respect to new YARN UI merge | Blocker | . | Subru Krishnan | Varun Saxena | +| [YARN-7276](https://issues.apache.org/jira/browse/YARN-7276) | Federation Router Web Service fixes | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-7289](https://issues.apache.org/jira/browse/YARN-7289) | Application lifetime does not work with FairScheduler | Major | resourcemanager | Miklos Szegedi | Miklos Szegedi | +| [YARN-7286](https://issues.apache.org/jira/browse/YARN-7286) | Add support for docker to have no capabilities | Major | yarn | Eric Badger | Eric Badger | +| [YARN-6627](https://issues.apache.org/jira/browse/YARN-6627) | Use deployed webapp folder to launch new YARN UI | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [HDFS-9754](https://issues.apache.org/jira/browse/HDFS-9754) | Avoid unnecessary getBlockCollection calls in BlockManager | Major | namenode | Jing Zhao | Jing Zhao | ### OTHER: @@ -1451,3 +2006,8 @@ | [HADOOP-14344](https://issues.apache.org/jira/browse/HADOOP-14344) | Revert HADOOP-13606 swift FS to add a service load metadata file | Major | . | John Zhuge | John Zhuge | | [HDFS-11717](https://issues.apache.org/jira/browse/HDFS-11717) | Add unit test for HDFS-11709 StandbyCheckpointer should handle non-existing legacyOivImageDir gracefully | Minor | ha, namenode | Erik Krogen | Erik Krogen | | [YARN-6969](https://issues.apache.org/jira/browse/YARN-6969) | Clean up unused code in class FairSchedulerQueueInfo | Trivial | fairscheduler | Yufei Gu | Larry Lo | +| [YARN-6622](https://issues.apache.org/jira/browse/YARN-6622) | Document Docker work as experimental | Blocker | documentation | Varun Vasudev | Varun Vasudev | +| [YARN-7203](https://issues.apache.org/jira/browse/YARN-7203) | Add container ExecutionType into ContainerReport | Minor | . | Botong Huang | Botong Huang | +| [YARN-6499](https://issues.apache.org/jira/browse/YARN-6499) | Remove the doc about Schedulable#redistributeShare() | Trivial | fairscheduler | Yufei Gu | Chetna Chaudhari | +| [HADOOP-13835](https://issues.apache.org/jira/browse/HADOOP-13835) | Move Google Test Framework code from mapreduce to hadoop-common | Major | test | Varun Vasudev | Varun Vasudev | +| [YARN-7317](https://issues.apache.org/jira/browse/YARN-7317) | Fix overallocation resulted from ceiling in LocalityMulticastAMRMProxyPolicy | Minor | . | Botong Huang | Botong Huang | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/RELEASENOTES.2.9.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/RELEASENOTES.2.9.0.md index 62c27129864..8df8c75e9c8 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/RELEASENOTES.2.9.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.0/RELEASENOTES.2.9.0.md @@ -105,6 +105,17 @@ If the user object returned by LDAP server has the user's group object DN (suppo Add per-cache-pool default replication num configuration +--- + +* [YARN-2928](https://issues.apache.org/jira/browse/YARN-2928) | *Critical* | **YARN Timeline Service v.2: alpha 1** + +We are introducing an early preview (alpha 1) of a major revision of YARN Timeline Service: v.2. YARN Timeline Service v.2 addresses two major challenges: improving scalability and reliability of Timeline Service, and enhancing usability by introducing flows and aggregation. + +YARN Timeline Service v.2 alpha 1 is provided so that users and developers can test it and provide feedback and suggestions for making it a ready replacement for Timeline Service v.1.x. It should be used only in a test capacity. Most importantly, security is not enabled. Do not set up or use Timeline Service v.2 until security is implemented if security is a critical requirement. + +More details are available in the [YARN Timeline Service v.2](./hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html) documentation. + + --- * [HADOOP-13354](https://issues.apache.org/jira/browse/HADOOP-13354) | *Major* | **Update WASB driver to use the latest version (4.2.0) of SDK for Microsoft Azure Storage Clients** @@ -212,6 +223,13 @@ The KMS can now be configured with the additional environment variables `KMS_PRO Add a new configuration - "yarn.app.mapreduce.am.webapp.port-range" to specify port-range for webapp launched by AM. +--- + +* [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | *Major* | **Add ability to secure log servlet using proxy users** + +**WARNING: No release note provided for this change.** + + --- * [HADOOP-13075](https://issues.apache.org/jira/browse/HADOOP-13075) | *Major* | **Add support for SSE-KMS and SSE-C in s3a filesystem** @@ -363,15 +381,6 @@ Fix to wasb:// (Azure) file system that allows the concurrent I/O feature to be Random access and seek improvements for the wasb:// (Azure) file system. ---- - -* [YARN-5049](https://issues.apache.org/jira/browse/YARN-5049) | *Major* | **Extend NMStateStore to save queued container information** - -This breaks rolling upgrades because it changes the major version of the NM state store schema. Therefore when a new NM comes up on an old state store it crashes. - -The state store versions for this change have been updated in YARN-6798. - - --- * [YARN-6798](https://issues.apache.org/jira/browse/YARN-6798) | *Major* | **Fix NM startup failure with old state store due to version mismatch** @@ -418,6 +427,13 @@ Bug fix to Azure Filesystem related to HADOOP-14535. Bug fix to Azure Filesystem related to HADOOP-14535 +--- + +* [HDFS-10326](https://issues.apache.org/jira/browse/HDFS-10326) | *Major* | **Disable setting tcp socket send/receive buffers for write pipelines** + +The size of the TCP socket buffers are no longer hardcoded by default. Instead the OS now will automatically tune the size for the buffer. + + --- * [HADOOP-14260](https://issues.apache.org/jira/browse/HADOOP-14260) | *Major* | **Configuration.dumpConfiguration should redact sensitive information** @@ -438,3 +454,158 @@ Up to 34% throughput improvement for the wasb:// (Azure) file system when fs.azu * [HADOOP-14769](https://issues.apache.org/jira/browse/HADOOP-14769) | *Major* | **WASB: delete recursive should not fail if a file is deleted** Recursive directory delete improvement for the wasb filesystem. + + +--- + +* [YARN-5355](https://issues.apache.org/jira/browse/YARN-5355) | *Critical* | **YARN Timeline Service v.2: alpha 2** + +We are releasing the alpha2 version of a major revision of YARN Timeline Service: v.2. YARN Timeline Service v.2 addresses two major challenges: improving scalability and reliability of Timeline Service, and enhancing usability by introducing flows and aggregation. + +YARN Timeline Service v.2 alpha1 was introduced in 3.0.0-alpha1 via YARN-2928. + +YARN Timeline Service v.2 alpha2 is now being provided so that users and developers can test it and provide feedback and suggestions for making it a ready replacement for Timeline Service v.1.x. Security is provided via Kerberos Authentication and delegation tokens. There is also a simple read level authorization provided via whitelists. + +Some of the notable improvements since alpha-1 are: +- Security via Kerberos Authentication and delegation tokens +- Read side simple authorization via whitelist +- Client configurable entity sort ordering +- New REST APIs for apps, app attempts, containers, fetching metrics by timerange, pagination, sub-app entities +- Support for storing sub-application entities (entities that exist outside the scope of an application) +- Configurable TTLs (time-to-live) for tables, configurable table prefixes, configurable hbase cluster +- Flow level aggregations done as dynamic (table level) coprocessors +- Uses latest stable HBase release 1.2.6 + +More details are available in the [YARN Timeline Service v.2](./hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html) documentation. + + +--- + +* [HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345) | *Major* | **S3Guard: Improved Consistency for S3A** + +S3Guard (pronounced see-guard) is a new feature for the S3A connector to Amazon S3, which uses DynamoDB for a high performance and consistent metadata repository. Essentially: S3Guard caches directory information, so your S3A clients get faster lookups and resilience to inconsistency between S3 list operations and the status of objects. When files are created, with S3Guard, they'll always be found. + +S3Guard does not address update consistency: if a file is updated, while the directory information will be updated, calling open() on the path may still return the old data. Similarly, deleted objects may also potentially be opened. + +Please consult the S3Guard documentation in the Amazon S3 section of our documentation. + +Note: part of this update includes moving to a new version of the AWS SDK 1.11, one which includes the Dynamo DB client and its a shaded version of Jackson 2. The large aws-sdk-bundle JAR is needed to use the S3A client with or without S3Guard enabled. The good news: because Jackson is shaded, there will be no conflict between any Jackson version used in your application and that which the AWS SDK needs. + + +--- + +* [HADOOP-14520](https://issues.apache.org/jira/browse/HADOOP-14520) | *Major* | **WASB: Block compaction for Azure Block Blobs** + +Block Compaction for Azure Block Blobs. When the number of blocks in a block blob is above 32000, the process of compaction replaces a sequence of small blocks with with one big block. + + +--- + +* [HDFS-11799](https://issues.apache.org/jira/browse/HDFS-11799) | *Major* | **Introduce a config to allow setting up write pipeline with fewer nodes than replication factor** + +Added new configuration "dfs.client.block.write.replace-datanode-on-failure.min-replication". + + The minimum number of replications that are needed to not to fail + the write pipeline if new datanodes can not be found to replace + failed datanodes (could be due to network failure) in the write pipeline. + If the number of the remaining datanodes in the write pipeline is greater + than or equal to this property value, continue writing to the remaining nodes. + Otherwise throw exception. + + If this is set to 0, an exception will be thrown, when a replacement + can not be found. + + +--- + +* [YARN-2915](https://issues.apache.org/jira/browse/YARN-2915) | *Major* | **Enable YARN RM scale out via federation using multiple RM's** + +A federation-based approach to transparently scale a single YARN cluster to tens of thousands of nodes, by federating multiple YARN standalone clusters (sub-clusters). The applications running in this federated environment will see a single massive YARN cluster and will be able to schedule tasks on any node of the federated cluster. Under the hood, the federation system will negotiate with sub-clusters ResourceManagers and provide resources to the application. The goal is to allow an individual job to “span” sub-clusters seamlessly. + + +--- + +* [YARN-1492](https://issues.apache.org/jira/browse/YARN-1492) | *Major* | **truly shared cache for jars (jobjar/libjar)** + +The YARN Shared Cache provides the facility to upload and manage shared application resources to HDFS in a safe and scalable manner. YARN applications can leverage resources uploaded by other applications or previous runs of the same application without having to re-­upload and localize identical files multiple times. This will save network resources and reduce YARN application startup time. + + +--- + +* [HDFS-10467](https://issues.apache.org/jira/browse/HDFS-10467) | *Major* | **Router-based HDFS federation** + +HDFS Router-based Federation adds a RPC routing layer that provides a federated view of multiple HDFS namespaces. +This is similar to the existing ViewFS and HDFS federation functionality, except the mount table is managed on the server-side by the routing layer rather than on the client. +This simplifies access to a federated cluster for existing HDFS clients. + +See HDFS-10467 and the HDFS Router-based Federation documentation for more details. + + +--- + +* [YARN-5734](https://issues.apache.org/jira/browse/YARN-5734) | *Major* | **OrgQueue for easy CapacityScheduler queue configuration management** + + + +The OrgQueue extension to the capacity scheduler provides a programmatic way to change configurations by providing a REST API that users can call to modify queue configurations. This enables automation of queue configuration management by administrators in the queue's `administer_queue` ACL. + + +--- + +* [MAPREDUCE-5951](https://issues.apache.org/jira/browse/MAPREDUCE-5951) | *Major* | **Add support for the YARN Shared Cache** + +MapReduce support for the YARN shared cache allows MapReduce jobs to take advantage of additional resource caching. This saves network bandwidth between the job submission client as well as within the YARN cluster itself. This will reduce job submission time and overall job runtime. + + +--- + +* [YARN-6623](https://issues.apache.org/jira/browse/YARN-6623) | *Blocker* | **Add support to turn off launching privileged containers in the container-executor** + +A change in configuration for launching Docker containers under YARN. Docker container capabilities, mounts, networks and allowing privileged container have to specified in the container-executor.cfg. By default, all of the above are turned off. This change will break existing setups launching Docker containers under YARN. Please refer to the Docker containers under YARN documentation for more information. + + +--- + +* [HADOOP-14840](https://issues.apache.org/jira/browse/HADOOP-14840) | *Major* | **Tool to estimate resource requirements of an application pipeline based on prior executions** + +The first version of Resource Estimator service, a tool that captures the historical resource usage of an app and predicts its future resource requirement. + + +--- + +* [YARN-2877](https://issues.apache.org/jira/browse/YARN-2877) | *Major* | **Extend YARN to support distributed scheduling** + +With this JIRA we are introducing distributed scheduling in YARN. +In particular, we make the following contributions: +- Introduce the notion of container types. GUARANTEED containers follow the semantics of the existing YARN containers. OPPORTUNISTIC ones can be seen as lower priority containers, and can be preempted in order to make space for GUARANTEED containers to run. +- Queuing of tasks at the NMs. This enables us to send more containers in an NM than its available resources. At the moment we are allowing queuing of OPPORTUNISTIC containers. Once resources become available at the NM, such containers can immediately start their execution. +- Introduce the AMRMProxy. This is a service running at each node, intercepting the requests between the AM and the RM. It is instrumental for both distributed scheduling and YARN Federation (YARN-2915). +- Enable distributed scheduling. To minimize their allocation latency, OPPORTUNISTIC containers are dispatched immediately to NMs in a distributed fashion by using the AMRMProxy of the node where the corresponding AM resides, without needing to go through the ResourceManager. + +All the functionality introduced in this JIRA is disabled by default, so it will not affect the behavior of existing applications. +We have introduced parameters in YarnConfiguration to enable NM queuing (yarn.nodemanager.container-queuing-enabled), distributed scheduling (yarn.distributed-scheduling.enabled) and the AMRMProxy service (yarn.nodemanager.amrmproxy.enable). +AMs currently need to specify the type of container to be requested for each task. We are in the process of adding in the MapReduce AM the ability to randomly request OPPORTUNISTIC containers for a specified percentage of a job's tasks, so that users can experiment with the new features. + + +--- + +* [YARN-5220](https://issues.apache.org/jira/browse/YARN-5220) | *Major* | **Scheduling of OPPORTUNISTIC containers through YARN RM** + +This extends the centralized YARN RM in to enable the scheduling of OPPORTUNISTIC containers in a centralized fashion. +This way, users can use OPPORTUNISTIC containers to improve the cluster's utilization, without needing to enable distributed scheduling. + + +--- + +* [YARN-5085](https://issues.apache.org/jira/browse/YARN-5085) | *Major* | **Add support for change of container ExecutionType** + +This allows the Application Master to ask the Scheduler to change the ExecutionType of a running/allocated container. + + +--- + +* [YARN-5049](https://issues.apache.org/jira/browse/YARN-5049) | *Major* | **Extend NMStateStore to save queued container information** + +This breaks rolling upgrades because it changes the major version of the NM state store schema. Therefore when a new NM comes up on an old state store it crashes. + +The state store versions for this change have been updated in YARN-6798. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/CHANGES.2.9.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/CHANGELOG.2.9.1.md similarity index 97% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/CHANGES.2.9.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/CHANGELOG.2.9.1.md index c5e53f68878..de2ef572737 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/CHANGES.2.9.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/CHANGELOG.2.9.1.md @@ -16,9 +16,9 @@ # See the License for the specific language governing permissions and # limitations under the License. --> -# "Apache Hadoop" Changelog +# Apache Hadoop Changelog -## Release 2.9.1 - 2018-04-16 +## Release 2.9.1 - 2018-05-03 ### INCOMPATIBLE CHANGES: @@ -49,7 +49,7 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | | [HADOOP-14872](https://issues.apache.org/jira/browse/HADOOP-14872) | CryptoInputStream should implement unbuffer | Major | fs, security | John Zhuge | John Zhuge | -| [HADOOP-14964](https://issues.apache.org/jira/browse/HADOOP-14964) | AliyunOSS: backport Aliyun OSS module to branch-2 | Major | fs/oss | Genmao Yu | SammiChen | +| [HADOOP-14964](https://issues.apache.org/jira/browse/HADOOP-14964) | AliyunOSS: backport Aliyun OSS module to branch-2 | Major | fs/oss | Genmao Yu | Sammi Chen | | [YARN-6851](https://issues.apache.org/jira/browse/YARN-6851) | Capacity Scheduler: document configs for controlling # containers allowed to be allocated per node heartbeat | Minor | . | Wei Yan | Wei Yan | | [YARN-7495](https://issues.apache.org/jira/browse/YARN-7495) | Improve robustness of the AggregatedLogDeletionService | Major | log-aggregation | Jonathan Eagles | Jonathan Eagles | | [YARN-7611](https://issues.apache.org/jira/browse/YARN-7611) | Node manager web UI should display container type in containers page | Major | nodemanager, webapp | Weiwei Yang | Weiwei Yang | @@ -107,7 +107,7 @@ | [HDFS-12889](https://issues.apache.org/jira/browse/HDFS-12889) | Router UI is missing robots.txt file | Major | . | Bharat Viswanadham | Bharat Viswanadham | | [HDFS-11576](https://issues.apache.org/jira/browse/HDFS-11576) | Block recovery will fail indefinitely if recovery time \> heartbeat interval | Critical | datanode, hdfs, namenode | Lukas Majercak | Lukas Majercak | | [YARN-7607](https://issues.apache.org/jira/browse/YARN-7607) | Remove the trailing duplicated timestamp in container diagnostics message | Minor | nodemanager | Weiwei Yang | Weiwei Yang | -| [HADOOP-15080](https://issues.apache.org/jira/browse/HADOOP-15080) | Aliyun OSS: update oss sdk from 2.8.1 to 2.8.3 to remove its dependency on Cat-x "json-lib" | Blocker | fs/oss | Chris Douglas | SammiChen | +| [HADOOP-15080](https://issues.apache.org/jira/browse/HADOOP-15080) | Aliyun OSS: update oss sdk from 2.8.1 to 2.8.3 to remove its dependency on Cat-x "json-lib" | Blocker | fs/oss | Chris Douglas | Sammi Chen | | [YARN-7591](https://issues.apache.org/jira/browse/YARN-7591) | NPE in async-scheduling mode of CapacityScheduler | Critical | capacityscheduler | Tao Yang | Tao Yang | | [YARN-7608](https://issues.apache.org/jira/browse/YARN-7608) | Incorrect sTarget column causing DataTable warning on RM application and scheduler web page | Major | resourcemanager, webapp | Weiwei Yang | Gergely Novák | | [HDFS-12833](https://issues.apache.org/jira/browse/HDFS-12833) | Distcp : Update the usage of delete option for dependency with update and overwrite option | Minor | distcp, hdfs | Harshakiran Reddy | usharani | @@ -120,13 +120,13 @@ | [HDFS-12347](https://issues.apache.org/jira/browse/HDFS-12347) | TestBalancerRPCDelay#testBalancerRPCDelay fails very frequently | Critical | test | Xiao Chen | Bharat Viswanadham | | [YARN-7542](https://issues.apache.org/jira/browse/YARN-7542) | Fix issue that causes some Running Opportunistic Containers to be recovered as PAUSED | Major | . | Arun Suresh | Sampada Dehankar | | [HADOOP-15143](https://issues.apache.org/jira/browse/HADOOP-15143) | NPE due to Invalid KerberosTicket in UGI | Major | . | Jitendra Nath Pandey | Mukul Kumar Singh | -| [YARN-7692](https://issues.apache.org/jira/browse/YARN-7692) | Skip validating priority acls while recovering applications | Blocker | resourcemanager | Charan Hebri | Sunil G | +| [YARN-7692](https://issues.apache.org/jira/browse/YARN-7692) | Skip validating priority acls while recovering applications | Blocker | resourcemanager | Charan Hebri | Sunil Govindan | | [MAPREDUCE-7028](https://issues.apache.org/jira/browse/MAPREDUCE-7028) | Concurrent task progress updates causing NPE in Application Master | Blocker | mr-am | Gergo Repas | Gergo Repas | | [YARN-7619](https://issues.apache.org/jira/browse/YARN-7619) | Max AM Resource value in Capacity Scheduler UI has to be refreshed for every user | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | -| [YARN-7699](https://issues.apache.org/jira/browse/YARN-7699) | queueUsagePercentage is coming as INF for getApp REST api call | Major | webapp | Sunil G | Sunil G | +| [YARN-7699](https://issues.apache.org/jira/browse/YARN-7699) | queueUsagePercentage is coming as INF for getApp REST api call | Major | webapp | Sunil Govindan | Sunil Govindan | | [YARN-7508](https://issues.apache.org/jira/browse/YARN-7508) | NPE in FiCaSchedulerApp when debug log enabled in async-scheduling mode | Major | capacityscheduler | Tao Yang | Tao Yang | -| [YARN-7663](https://issues.apache.org/jira/browse/YARN-7663) | RMAppImpl:Invalid event: START at KILLED | Minor | resourcemanager | lujie | lujie | -| [YARN-6948](https://issues.apache.org/jira/browse/YARN-6948) | Invalid event: ATTEMPT\_ADDED at FINAL\_SAVING | Minor | yarn | lujie | lujie | +| [YARN-7663](https://issues.apache.org/jira/browse/YARN-7663) | RMAppImpl:Invalid event: START at KILLED | Major | resourcemanager | lujie | lujie | +| [YARN-6948](https://issues.apache.org/jira/browse/YARN-6948) | Invalid event: ATTEMPT\_ADDED at FINAL\_SAVING | Major | yarn | lujie | lujie | | [YARN-7735](https://issues.apache.org/jira/browse/YARN-7735) | Fix typo in YARN documentation | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | | [YARN-7727](https://issues.apache.org/jira/browse/YARN-7727) | Incorrect log levels in few logs with QueuePriorityContainerCandidateSelector | Minor | yarn | Prabhu Joseph | Prabhu Joseph | | [HDFS-11915](https://issues.apache.org/jira/browse/HDFS-11915) | Sync rbw dir on the first hsync() to avoid file lost on power failure | Critical | . | Kanaka Kumar Avvaru | Vinayakumar B | @@ -167,7 +167,6 @@ | [HDFS-13109](https://issues.apache.org/jira/browse/HDFS-13109) | Support fully qualified hdfs path in EZ commands | Major | hdfs | Hanisha Koneru | Hanisha Koneru | | [MAPREDUCE-6930](https://issues.apache.org/jira/browse/MAPREDUCE-6930) | mapreduce.map.cpu.vcores and mapreduce.reduce.cpu.vcores are both present twice in mapred-default.xml | Major | mrv2 | Daniel Templeton | Sen Zhao | | [HDFS-12156](https://issues.apache.org/jira/browse/HDFS-12156) | TestFSImage fails without -Pnative | Major | test | Akira Ajisaka | Akira Ajisaka | -| [HADOOP-15308](https://issues.apache.org/jira/browse/HADOOP-15308) | TestConfiguration fails on Windows because of paths | Major | . | Íñigo Goiri | Xiao Liang | | [YARN-7636](https://issues.apache.org/jira/browse/YARN-7636) | Re-reservation count may overflow when cluster resource exhausted for a long time | Major | capacityscheduler | Tao Yang | Tao Yang | | [HDFS-12886](https://issues.apache.org/jira/browse/HDFS-12886) | Ignore minReplication for block recovery | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | | [HDFS-13296](https://issues.apache.org/jira/browse/HDFS-13296) | GenericTestUtils generates paths with drive letter in Windows and fail webhdfs related test cases | Major | . | Xiao Liang | Xiao Liang | @@ -178,6 +177,7 @@ | [HADOOP-15320](https://issues.apache.org/jira/browse/HADOOP-15320) | Remove customized getFileBlockLocations for hadoop-azure and hadoop-azure-datalake | Major | fs/adl, fs/azure | shanyu zhao | shanyu zhao | | [HADOOP-12862](https://issues.apache.org/jira/browse/HADOOP-12862) | LDAP Group Mapping over SSL can not specify trust store | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-13427](https://issues.apache.org/jira/browse/HDFS-13427) | Fix the section titles of transparent encryption document | Minor | documentation | Akira Ajisaka | Akira Ajisaka | +| [MAPREDUCE-7073](https://issues.apache.org/jira/browse/MAPREDUCE-7073) | Optimize TokenCache#obtainTokensForNamenodesInternal | Major | . | Bibin A Chundatt | Bibin A Chundatt | ### TESTS: @@ -195,20 +195,20 @@ | [HADOOP-13591](https://issues.apache.org/jira/browse/HADOOP-13591) | Unit test failure in TestOSSContractGetFileStatus and TestOSSContractRootDir | Major | fs, fs/oss | Genmao Yu | Genmao Yu | | [HADOOP-13624](https://issues.apache.org/jira/browse/HADOOP-13624) | Rename TestAliyunOSSContractDispCp | Major | fs, fs/oss | Kai Zheng | Genmao Yu | | [HADOOP-14065](https://issues.apache.org/jira/browse/HADOOP-14065) | AliyunOSS: oss directory filestatus should use meta time | Major | fs/oss | Fei Hui | Fei Hui | -| [HADOOP-13768](https://issues.apache.org/jira/browse/HADOOP-13768) | AliyunOSS: handle the failure in the batch delete operation `deleteDirs`. | Major | fs | Genmao Yu | Genmao Yu | +| [HADOOP-13768](https://issues.apache.org/jira/browse/HADOOP-13768) | AliyunOSS: handle the failure in the batch delete operation \`deleteDirs\`. | Major | fs | Genmao Yu | Genmao Yu | | [HADOOP-14069](https://issues.apache.org/jira/browse/HADOOP-14069) | AliyunOSS: listStatus returns wrong file info | Major | fs/oss | Fei Hui | Fei Hui | | [HADOOP-13769](https://issues.apache.org/jira/browse/HADOOP-13769) | AliyunOSS: update oss sdk version | Major | fs, fs/oss | Genmao Yu | Genmao Yu | | [HADOOP-14072](https://issues.apache.org/jira/browse/HADOOP-14072) | AliyunOSS: Failed to read from stream when seek beyond the download size | Major | fs/oss | Genmao Yu | Genmao Yu | | [HADOOP-14192](https://issues.apache.org/jira/browse/HADOOP-14192) | Aliyun OSS FileSystem contract test should implement getTestBaseDir() | Major | fs/oss | Mingliang Liu | Mingliang Liu | | [HADOOP-14194](https://issues.apache.org/jira/browse/HADOOP-14194) | Aliyun OSS should not use empty endpoint as default | Major | fs/oss | Mingliang Liu | Genmao Yu | -| [HADOOP-14787](https://issues.apache.org/jira/browse/HADOOP-14787) | AliyunOSS: Implement the `createNonRecursive` operator | Major | fs, fs/oss | Genmao Yu | Genmao Yu | +| [HADOOP-14787](https://issues.apache.org/jira/browse/HADOOP-14787) | AliyunOSS: Implement the \`createNonRecursive\` operator | Major | fs, fs/oss | Genmao Yu | Genmao Yu | | [HADOOP-14649](https://issues.apache.org/jira/browse/HADOOP-14649) | Update aliyun-sdk-oss version to 2.8.1 | Major | fs/oss | Ray Chiang | Genmao Yu | | [HADOOP-14799](https://issues.apache.org/jira/browse/HADOOP-14799) | Update nimbus-jose-jwt to 4.41.1 | Major | . | Ray Chiang | Ray Chiang | | [HADOOP-14997](https://issues.apache.org/jira/browse/HADOOP-14997) | Add hadoop-aliyun as dependency of hadoop-cloud-storage | Minor | fs/oss | Genmao Yu | Genmao Yu | | [HDFS-12801](https://issues.apache.org/jira/browse/HDFS-12801) | RBF: Set MountTableResolver as default file resolver | Minor | . | Íñigo Goiri | Íñigo Goiri | | [YARN-7430](https://issues.apache.org/jira/browse/YARN-7430) | Enable user re-mapping for Docker containers by default | Blocker | security, yarn | Eric Yang | Eric Yang | | [YARN-6128](https://issues.apache.org/jira/browse/YARN-6128) | Add support for AMRMProxy HA | Major | amrmproxy, nodemanager | Subru Krishnan | Botong Huang | -| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server | Major | fs, fs/oss | SammiChen | SammiChen | +| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server | Major | fs, fs/oss | Sammi Chen | Sammi Chen | | [HDFS-12858](https://issues.apache.org/jira/browse/HDFS-12858) | RBF: Add router admin commands usage in HDFS commands reference doc | Minor | documentation | Yiqun Lin | Yiqun Lin | | [HDFS-12835](https://issues.apache.org/jira/browse/HDFS-12835) | RBF: Fix Javadoc parameter errors | Minor | . | Wei Yan | Wei Yan | | [YARN-7587](https://issues.apache.org/jira/browse/YARN-7587) | Skip dispatching opportunistic containers to nodes whose queue is already full | Major | . | Weiwei Yang | Weiwei Yang | @@ -240,19 +240,20 @@ | [HDFS-13230](https://issues.apache.org/jira/browse/HDFS-13230) | RBF: ConnectionManager's cleanup task will compare each pool's own active conns with its total conns | Minor | . | Wei Yan | Chao Sun | | [HDFS-13233](https://issues.apache.org/jira/browse/HDFS-13233) | RBF: MountTableResolver doesn't return the correct mount point of the given path | Major | hdfs | wangzhiyuan | wangzhiyuan | | [HDFS-13212](https://issues.apache.org/jira/browse/HDFS-13212) | RBF: Fix router location cache issue | Major | federation, hdfs | Weiwei Wu | Weiwei Wu | -| [HDFS-13232](https://issues.apache.org/jira/browse/HDFS-13232) | RBF: ConnectionPool should return first usable connection | Minor | . | Wei Yan | Ekanth S | +| [HDFS-13232](https://issues.apache.org/jira/browse/HDFS-13232) | RBF: ConnectionPool should return first usable connection | Minor | . | Wei Yan | Ekanth Sethuramalingam | | [HDFS-13240](https://issues.apache.org/jira/browse/HDFS-13240) | RBF: Update some inaccurate document descriptions | Minor | . | Yiqun Lin | Yiqun Lin | | [HDFS-11399](https://issues.apache.org/jira/browse/HDFS-11399) | Many tests fails in Windows due to injecting disk failures | Major | . | Yiqun Lin | Yiqun Lin | | [HDFS-13241](https://issues.apache.org/jira/browse/HDFS-13241) | RBF: TestRouterSafemode failed if the port 8888 is in use | Major | hdfs, test | maobaolong | maobaolong | | [HDFS-13253](https://issues.apache.org/jira/browse/HDFS-13253) | RBF: Quota management incorrect parent-child relationship judgement | Major | . | Yiqun Lin | Yiqun Lin | | [HDFS-13226](https://issues.apache.org/jira/browse/HDFS-13226) | RBF: Throw the exception if mount table entry validated failed | Major | hdfs | maobaolong | maobaolong | +| [HADOOP-15308](https://issues.apache.org/jira/browse/HADOOP-15308) | TestConfiguration fails on Windows because of paths | Major | test | Íñigo Goiri | Xiao Liang | | [HDFS-12773](https://issues.apache.org/jira/browse/HDFS-12773) | RBF: Improve State Store FS implementation | Major | . | Íñigo Goiri | Íñigo Goiri | | [HDFS-13198](https://issues.apache.org/jira/browse/HDFS-13198) | RBF: RouterHeartbeatService throws out CachedStateStore related exceptions when starting router | Minor | . | Wei Yan | Wei Yan | | [HDFS-13224](https://issues.apache.org/jira/browse/HDFS-13224) | RBF: Resolvers to support mount points across multiple subclusters | Major | . | Íñigo Goiri | Íñigo Goiri | | [HADOOP-15262](https://issues.apache.org/jira/browse/HADOOP-15262) | AliyunOSS: move files under a directory in parallel when rename a directory | Major | fs/oss | wujinhu | wujinhu | | [HDFS-13215](https://issues.apache.org/jira/browse/HDFS-13215) | RBF: Move Router to its own module | Major | . | Íñigo Goiri | Wei Yan | | [HDFS-13250](https://issues.apache.org/jira/browse/HDFS-13250) | RBF: Router to manage requests across multiple subclusters | Major | . | Íñigo Goiri | Íñigo Goiri | -| [HDFS-13318](https://issues.apache.org/jira/browse/HDFS-13318) | RBF: Fix FindBugs in hadoop-hdfs-rbf | Minor | . | Íñigo Goiri | Ekanth S | +| [HDFS-13318](https://issues.apache.org/jira/browse/HDFS-13318) | RBF: Fix FindBugs in hadoop-hdfs-rbf | Minor | . | Íñigo Goiri | Ekanth Sethuramalingam | | [HDFS-12792](https://issues.apache.org/jira/browse/HDFS-12792) | RBF: Test Router-based federation using HDFSContract | Major | . | Íñigo Goiri | Íñigo Goiri | | [HDFS-12512](https://issues.apache.org/jira/browse/HDFS-12512) | RBF: Add WebHDFS | Major | fs | Íñigo Goiri | Wei Yan | | [HDFS-13291](https://issues.apache.org/jira/browse/HDFS-13291) | RBF: Implement available space based OrderResolver | Major | . | Yiqun Lin | Yiqun Lin | @@ -264,6 +265,7 @@ | [HDFS-13364](https://issues.apache.org/jira/browse/HDFS-13364) | RBF: Support NamenodeProtocol in the Router | Major | . | Íñigo Goiri | Íñigo Goiri | | [HADOOP-14651](https://issues.apache.org/jira/browse/HADOOP-14651) | Update okhttp version to 2.7.5 | Major | fs/adl | Ray Chiang | Ray Chiang | | [HADOOP-14999](https://issues.apache.org/jira/browse/HADOOP-14999) | AliyunOSS: provide one asynchronous multi-part based uploading mechanism | Major | fs/oss | Genmao Yu | Genmao Yu | +| [YARN-7810](https://issues.apache.org/jira/browse/YARN-7810) | TestDockerContainerRuntime test failures due to UID lookup of a non-existent user | Major | . | Shane Kumpf | Shane Kumpf | ### OTHER: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/RELEASENOTES.2.9.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/RELEASENOTES.2.9.1.md index bed70b14e15..ae8edfec525 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/RELEASENOTES.2.9.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.1/RELEASENOTES.2.9.1.md @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. --> -# "Apache Hadoop" 2.9.1 Release Notes +# Apache Hadoop 2.9.1 Release Notes These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.2/CHANGELOG.2.9.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.2/CHANGELOG.2.9.2.md new file mode 100644 index 00000000000..e0a167df159 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.2/CHANGELOG.2.9.2.md @@ -0,0 +1,202 @@ + + +# Apache Hadoop Changelog + +## Release 2.9.2 - Unreleased (as of 2018-09-02) + + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13283](https://issues.apache.org/jira/browse/HDFS-13283) | Percentage based Reserved Space Calculation for DataNode | Major | datanode, hdfs | Lukas Majercak | Lukas Majercak | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-14987](https://issues.apache.org/jira/browse/HADOOP-14987) | Improve KMSClientProvider log around delegation token checking | Major | . | Xiaoyu Yao | Xiaoyu Yao | +| [HADOOP-15394](https://issues.apache.org/jira/browse/HADOOP-15394) | Backport PowerShell NodeFencer HADOOP-14309 to branch-2 | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13462](https://issues.apache.org/jira/browse/HDFS-13462) | Add BIND\_HOST configuration for JournalNode's HTTP and RPC Servers | Major | hdfs, journal-node | Lukas Majercak | Lukas Majercak | +| [HADOOP-14841](https://issues.apache.org/jira/browse/HADOOP-14841) | Kms client should disconnect if unable to get output stream from connection. | Major | kms | Xiao Chen | Rushabh S Shah | +| [HDFS-13272](https://issues.apache.org/jira/browse/HDFS-13272) | DataNodeHttpServer to have configurable HttpServer2 threads | Major | datanode | Erik Krogen | Erik Krogen | +| [HADOOP-15441](https://issues.apache.org/jira/browse/HADOOP-15441) | Log kms url and token service at debug level. | Minor | . | Wei-Chiu Chuang | Gabor Bota | +| [HDFS-13544](https://issues.apache.org/jira/browse/HDFS-13544) | Improve logging for JournalNode in federated cluster | Major | federation, hdfs | Hanisha Koneru | Hanisha Koneru | +| [HADOOP-15486](https://issues.apache.org/jira/browse/HADOOP-15486) | Make NetworkTopology#netLock fair | Major | net | Nanda kumar | Nanda kumar | +| [HADOOP-15449](https://issues.apache.org/jira/browse/HADOOP-15449) | Increase default timeout of ZK session to avoid frequent NameNode failover | Critical | common | Karthik Palanisamy | Karthik Palanisamy | +| [HDFS-13602](https://issues.apache.org/jira/browse/HDFS-13602) | Add checkOperation(WRITE) checks in FSNamesystem | Major | ha, namenode | Erik Krogen | Chao Sun | +| [HDFS-13653](https://issues.apache.org/jira/browse/HDFS-13653) | Make dfs.client.failover.random.order a per nameservice configuration | Major | federation | Ekanth Sethuramalingam | Ekanth Sethuramalingam | +| [HDFS-13714](https://issues.apache.org/jira/browse/HDFS-13714) | Fix TestNameNodePrunesMissingStorages test failures on Windows | Major | hdfs, namenode, test | Lukas Majercak | Lukas Majercak | +| [HDFS-11060](https://issues.apache.org/jira/browse/HDFS-11060) | make DEFAULT\_MAX\_CORRUPT\_FILEBLOCKS\_RETURNED configurable | Minor | hdfs | Lantao Jin | Lantao Jin | +| [HDFS-13813](https://issues.apache.org/jira/browse/HDFS-13813) | Exit NameNode if dangling child inode is detected when saving FsImage | Major | hdfs, namenode | Siyao Meng | Siyao Meng | +| [HDFS-13821](https://issues.apache.org/jira/browse/HDFS-13821) | RBF: Add dfs.federation.router.mount-table.cache.enable so that users can disable cache | Major | hdfs | Fei Hui | Fei Hui | +| [HADOOP-15689](https://issues.apache.org/jira/browse/HADOOP-15689) | Add "\*.patch" into .gitignore file of branch-2 | Major | . | Rui Gao | Rui Gao | +| [HDFS-13854](https://issues.apache.org/jira/browse/HDFS-13854) | RBF: The ProcessingAvgTime and ProxyAvgTime should display by JMX with ms unit. | Major | federation, hdfs | yanghuafeng | yanghuafeng | +| [YARN-8051](https://issues.apache.org/jira/browse/YARN-8051) | TestRMEmbeddedElector#testCallbackSynchronization is flakey | Major | test | Robert Kanter | Robert Kanter | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15121](https://issues.apache.org/jira/browse/HADOOP-15121) | Encounter NullPointerException when using DecayRpcScheduler | Major | . | Tao Jie | Tao Jie | +| [HDFS-10803](https://issues.apache.org/jira/browse/HDFS-10803) | TestBalancerWithMultipleNameNodes#testBalancing2OutOf3Blockpools fails intermittently due to no free space available | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-8068](https://issues.apache.org/jira/browse/YARN-8068) | Application Priority field causes NPE in app timeline publish when Hadoop 2.7 based clients to 2.8+ | Blocker | yarn | Sunil Govindan | Sunil Govindan | +| [HADOOP-15317](https://issues.apache.org/jira/browse/HADOOP-15317) | Improve NetworkTopology chooseRandom's loop | Major | . | Xiao Chen | Xiao Chen | +| [HADOOP-15375](https://issues.apache.org/jira/browse/HADOOP-15375) | Branch-2 pre-commit failed to build docker image | Major | . | Xiao Chen | Xiao Chen | +| [HADOOP-15357](https://issues.apache.org/jira/browse/HADOOP-15357) | Configuration.getPropsWithPrefix no longer does variable substitution | Major | . | Jim Brennan | Jim Brennan | +| [HDFS-7101](https://issues.apache.org/jira/browse/HDFS-7101) | Potential null dereference in DFSck#doWork() | Minor | . | Ted Yu | skrho | +| [YARN-8120](https://issues.apache.org/jira/browse/YARN-8120) | JVM can crash with SIGSEGV when exiting due to custom leveldb logger | Major | nodemanager, resourcemanager | Jason Lowe | Jason Lowe | +| [YARN-8147](https://issues.apache.org/jira/browse/YARN-8147) | TestClientRMService#testGetApplications sporadically fails | Major | test | Jason Lowe | Jason Lowe | +| [HADOOP-14970](https://issues.apache.org/jira/browse/HADOOP-14970) | MiniHadoopClusterManager doesn't respect lack of format option | Minor | . | Erik Krogen | Erik Krogen | +| [HDFS-12828](https://issues.apache.org/jira/browse/HDFS-12828) | OIV ReverseXML Processor fails with escaped characters | Critical | hdfs | Erik Krogen | Erik Krogen | +| [HADOOP-15180](https://issues.apache.org/jira/browse/HADOOP-15180) | branch-2 : daemon processes' sysout overwrites 'ulimit -a' in daemon's out file | Minor | scripts | Ranith Sardar | Ranith Sardar | +| [HADOOP-15396](https://issues.apache.org/jira/browse/HADOOP-15396) | Some java source files are executable | Minor | . | Akira Ajisaka | Shashikant Banerjee | +| [YARN-7786](https://issues.apache.org/jira/browse/YARN-7786) | NullPointerException while launching ApplicationMaster | Major | . | lujie | lujie | +| [HDFS-10183](https://issues.apache.org/jira/browse/HDFS-10183) | Prevent race condition during class initialization | Minor | fs | Pavel Avgustinov | Pavel Avgustinov | +| [HDFS-13408](https://issues.apache.org/jira/browse/HDFS-13408) | MiniDFSCluster to support being built on randomized base directory | Major | test | Xiao Liang | Xiao Liang | +| [HADOOP-15390](https://issues.apache.org/jira/browse/HADOOP-15390) | Yarn RM logs flooded by DelegationTokenRenewer trying to renew KMS tokens | Critical | . | Xiao Chen | Xiao Chen | +| [HDFS-13336](https://issues.apache.org/jira/browse/HDFS-13336) | Test cases of TestWriteToReplica failed in windows | Major | . | Xiao Liang | Xiao Liang | +| [HADOOP-15385](https://issues.apache.org/jira/browse/HADOOP-15385) | Many tests are failing in hadoop-distcp project in branch-2 | Critical | tools/distcp | Rushabh S Shah | Jason Lowe | +| [HDFS-13509](https://issues.apache.org/jira/browse/HDFS-13509) | Bug fix for breakHardlinks() of ReplicaInfo/LocalReplica, and fix TestFileAppend failures on Windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-8232](https://issues.apache.org/jira/browse/YARN-8232) | RMContainer lost queue name when RM HA happens | Major | resourcemanager | Hu Ziqian | Hu Ziqian | +| [HDFS-13537](https://issues.apache.org/jira/browse/HDFS-13537) | TestHdfsHelper does not generate jceks path properly for relative path in Windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-7003](https://issues.apache.org/jira/browse/YARN-7003) | DRAINING state of queues is not recovered after RM restart | Major | capacityscheduler | Tao Yang | Tao Yang | +| [YARN-8244](https://issues.apache.org/jira/browse/YARN-8244) | TestContainerSchedulerQueuing.testStartMultipleContainers failed | Major | . | Miklos Szegedi | Jim Brennan | +| [HDFS-13581](https://issues.apache.org/jira/browse/HDFS-13581) | DN UI logs link is broken when https is enabled | Minor | datanode | Namit Maheshwari | Shashikant Banerjee | +| [HDFS-13586](https://issues.apache.org/jira/browse/HDFS-13586) | Fsync fails on directories on Windows | Critical | datanode, hdfs | Lukas Majercak | Lukas Majercak | +| [HDFS-13590](https://issues.apache.org/jira/browse/HDFS-13590) | Backport HDFS-12378 to branch-2 | Major | datanode, hdfs, test | Lukas Majercak | Lukas Majercak | +| [HADOOP-15450](https://issues.apache.org/jira/browse/HADOOP-15450) | Avoid fsync storm triggered by DiskChecker and handle disk full situation | Blocker | . | Kihwal Lee | Arpit Agarwal | +| [HDFS-13588](https://issues.apache.org/jira/browse/HDFS-13588) | Fix TestFsDatasetImpl test failures on Windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-8344](https://issues.apache.org/jira/browse/YARN-8344) | Missing nm.stop() in TestNodeManagerResync to fix testKillContainersOnResync | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8327](https://issues.apache.org/jira/browse/YARN-8327) | Fix TestAggregatedLogFormat#testReadAcontainerLogs1 on Windows | Major | log-aggregation | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8346](https://issues.apache.org/jira/browse/YARN-8346) | Upgrading to 3.1 kills running containers with error "Opportunistic container queue is full" | Blocker | . | Rohith Sharma K S | Jason Lowe | +| [HDFS-13618](https://issues.apache.org/jira/browse/HDFS-13618) | Fix TestDataNodeFaultInjector test failures on Windows | Major | test | Xiao Liang | Xiao Liang | +| [HADOOP-15473](https://issues.apache.org/jira/browse/HADOOP-15473) | Configure serialFilter in KeyProvider to avoid UnrecoverableKeyException caused by JDK-8189997 | Critical | kms | Gabor Bota | Gabor Bota | +| [MAPREDUCE-7103](https://issues.apache.org/jira/browse/MAPREDUCE-7103) | Fix TestHistoryViewerPrinter on windows due to a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8359](https://issues.apache.org/jira/browse/YARN-8359) | Exclude containermanager.linux test classes on Windows | Major | . | Giovanni Matteo Fumarola | Jason Lowe | +| [HDFS-13664](https://issues.apache.org/jira/browse/HDFS-13664) | Refactor ConfiguredFailoverProxyProvider to make inheritance easier | Minor | hdfs-client | Chao Sun | Chao Sun | +| [HDFS-13667](https://issues.apache.org/jira/browse/HDFS-13667) | Typo: Marking all "datandoes" as stale | Trivial | namenode | Wei-Chiu Chuang | Nanda kumar | +| [YARN-8405](https://issues.apache.org/jira/browse/YARN-8405) | RM zk-state-store.parent-path ACLs has been changed since HADOOP-14773 | Major | . | Rohith Sharma K S | Íñigo Goiri | +| [MAPREDUCE-7108](https://issues.apache.org/jira/browse/MAPREDUCE-7108) | TestFileOutputCommitter fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HDFS-13675](https://issues.apache.org/jira/browse/HDFS-13675) | Speed up TestDFSAdminWithHA | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | +| [HDFS-13673](https://issues.apache.org/jira/browse/HDFS-13673) | TestNameNodeMetrics fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HDFS-13676](https://issues.apache.org/jira/browse/HDFS-13676) | TestEditLogRace fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HADOOP-15523](https://issues.apache.org/jira/browse/HADOOP-15523) | Shell command timeout given is in seconds whereas it is taken as millisec while scheduling | Major | . | Bilwa S T | Bilwa S T | +| [YARN-8444](https://issues.apache.org/jira/browse/YARN-8444) | NodeResourceMonitor crashes on bad swapFree value | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8443](https://issues.apache.org/jira/browse/YARN-8443) | Total #VCores in cluster metrics is wrong when CapacityScheduler reserved some containers | Major | webapp | Tao Yang | Tao Yang | +| [YARN-8457](https://issues.apache.org/jira/browse/YARN-8457) | Compilation is broken with -Pyarn-ui | Major | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-8401](https://issues.apache.org/jira/browse/YARN-8401) | [UI2] new ui is not accessible with out internet connection | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8451](https://issues.apache.org/jira/browse/YARN-8451) | Multiple NM heartbeat thread created when a slow NM resync with RM | Major | nodemanager | Botong Huang | Botong Huang | +| [HADOOP-15548](https://issues.apache.org/jira/browse/HADOOP-15548) | Randomize local dirs | Minor | . | Jim Brennan | Jim Brennan | +| [YARN-8473](https://issues.apache.org/jira/browse/YARN-8473) | Containers being launched as app tears down can leave containers in NEW state | Major | nodemanager | Jason Lowe | Jason Lowe | +| [HDFS-13729](https://issues.apache.org/jira/browse/HDFS-13729) | Fix broken links to RBF documentation | Minor | documentation | jwhitter | Gabor Bota | +| [YARN-8518](https://issues.apache.org/jira/browse/YARN-8518) | test-container-executor test\_is\_empty() is broken | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8515](https://issues.apache.org/jira/browse/YARN-8515) | container-executor can crash with SIGPIPE after nodemanager restart | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8421](https://issues.apache.org/jira/browse/YARN-8421) | when moving app, activeUsers is increased, even though app does not have outstanding request | Major | . | kyungwan nam | | +| [HADOOP-15614](https://issues.apache.org/jira/browse/HADOOP-15614) | TestGroupsCaching.testExceptionOnBackgroundRefreshHandled reliably fails | Major | . | Kihwal Lee | Weiwei Yang | +| [YARN-8577](https://issues.apache.org/jira/browse/YARN-8577) | Fix the broken anchor in SLS site-doc | Minor | documentation | Weiwei Yang | Weiwei Yang | +| [YARN-4606](https://issues.apache.org/jira/browse/YARN-4606) | CapacityScheduler: applications could get starved because computation of #activeUsers considers pending apps | Critical | capacity scheduler, capacityscheduler | Karam Singh | Manikandan R | +| [HADOOP-15637](https://issues.apache.org/jira/browse/HADOOP-15637) | LocalFs#listLocatedStatus does not filter out hidden .crc files | Minor | fs | Erik Krogen | Erik Krogen | +| [YARN-8331](https://issues.apache.org/jira/browse/YARN-8331) | Race condition in NM container launched after done | Major | . | Yang Wang | Pradeep Ambati | +| [HDFS-13758](https://issues.apache.org/jira/browse/HDFS-13758) | DatanodeManager should throw exception if it has BlockRecoveryCommand but the block is not under construction | Major | namenode | Wei-Chiu Chuang | chencan | +| [YARN-8612](https://issues.apache.org/jira/browse/YARN-8612) | Fix NM Collector Service Port issue in YarnConfiguration | Major | ATSv2 | Prabha Manepalli | Prabha Manepalli | +| [HADOOP-15674](https://issues.apache.org/jira/browse/HADOOP-15674) | Test failure TestSSLHttpServer.testExcludedCiphers with TLS\_ECDHE\_RSA\_WITH\_AES\_128\_CBC\_SHA256 cipher suite | Major | common | Gabor Bota | Szilard Nemeth | +| [YARN-8640](https://issues.apache.org/jira/browse/YARN-8640) | Restore previous state in container-executor after failure | Major | . | Jim Brennan | Jim Brennan | +| [HADOOP-14314](https://issues.apache.org/jira/browse/HADOOP-14314) | The OpenSolaris taxonomy link is dead in InterfaceClassification.md | Major | documentation | Daniel Templeton | Rui Gao | +| [YARN-8649](https://issues.apache.org/jira/browse/YARN-8649) | NPE in localizer hearbeat processing if a container is killed while localizing | Major | . | lujie | lujie | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13337](https://issues.apache.org/jira/browse/HDFS-13337) | Backport HDFS-4275 to branch-2.9 | Minor | . | Íñigo Goiri | Xiao Liang | +| [HDFS-13503](https://issues.apache.org/jira/browse/HDFS-13503) | Fix TestFsck test failures on Windows | Major | hdfs | Xiao Liang | Xiao Liang | +| [HDFS-13542](https://issues.apache.org/jira/browse/HDFS-13542) | TestBlockManager#testNeededReplicationWhileAppending fails due to improper cluster shutdown in TestBlockManager#testBlockManagerMachinesArray on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13551](https://issues.apache.org/jira/browse/HDFS-13551) | TestMiniDFSCluster#testClusterSetStorageCapacity does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-11700](https://issues.apache.org/jira/browse/HDFS-11700) | TestHDFSServerPorts#testBackupNodePorts doesn't pass on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13548](https://issues.apache.org/jira/browse/HDFS-13548) | TestResolveHdfsSymlink#testFcResolveAfs fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13567](https://issues.apache.org/jira/browse/HDFS-13567) | TestNameNodeMetrics#testGenerateEDEKTime,TestNameNodeMetrics#testResourceCheck should use a different cluster basedir | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13557](https://issues.apache.org/jira/browse/HDFS-13557) | TestDFSAdmin#testListOpenFiles fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13550](https://issues.apache.org/jira/browse/HDFS-13550) | TestDebugAdmin#testComputeMetaCommand fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13559](https://issues.apache.org/jira/browse/HDFS-13559) | TestBlockScanner does not close TestContext properly | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13570](https://issues.apache.org/jira/browse/HDFS-13570) | TestQuotaByStorageType,TestQuota,TestDFSOutputStream fail on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13558](https://issues.apache.org/jira/browse/HDFS-13558) | TestDatanodeHttpXFrame does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13554](https://issues.apache.org/jira/browse/HDFS-13554) | TestDatanodeRegistration#testForcedRegistration does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13556](https://issues.apache.org/jira/browse/HDFS-13556) | TestNestedEncryptionZones does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13560](https://issues.apache.org/jira/browse/HDFS-13560) | Insufficient system resources exist to complete the requested service for some tests on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13592](https://issues.apache.org/jira/browse/HDFS-13592) | TestNameNodePrunesMissingStorages#testNameNodePrunesUnreportedStorages does not shut down cluster properly | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13593](https://issues.apache.org/jira/browse/HDFS-13593) | TestBlockReaderLocalLegacy#testBlockReaderLocalLegacyWithAppend fails on Windows | Minor | test | Anbang Hu | Anbang Hu | +| [HDFS-13587](https://issues.apache.org/jira/browse/HDFS-13587) | TestQuorumJournalManager fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13620](https://issues.apache.org/jira/browse/HDFS-13620) | Randomize the test directory path for TestHDFSFileSystemContract | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13591](https://issues.apache.org/jira/browse/HDFS-13591) | TestDFSShell#testSetrepLow fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13632](https://issues.apache.org/jira/browse/HDFS-13632) | Randomize baseDir for MiniJournalCluster in MiniQJMHACluster for TestDFSAdminWithHA | Minor | . | Anbang Hu | Anbang Hu | +| [MAPREDUCE-7102](https://issues.apache.org/jira/browse/MAPREDUCE-7102) | Fix TestJavaSerialization for Windows due a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HDFS-13652](https://issues.apache.org/jira/browse/HDFS-13652) | Randomize baseDir for MiniDFSCluster in TestBlockScanner | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8370](https://issues.apache.org/jira/browse/YARN-8370) | Some Node Manager tests fail on Windows due to improper path/file separator | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8422](https://issues.apache.org/jira/browse/YARN-8422) | TestAMSimulator failing with NPE | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15532](https://issues.apache.org/jira/browse/HADOOP-15532) | TestBasicDiskValidator fails with NoSuchFileException | Minor | . | Íñigo Goiri | Giovanni Matteo Fumarola | +| [HDFS-13563](https://issues.apache.org/jira/browse/HDFS-13563) | TestDFSAdminWithHA times out on Windows | Minor | . | Anbang Hu | Lukas Majercak | +| [HDFS-13681](https://issues.apache.org/jira/browse/HDFS-13681) | Fix TestStartup.testNNFailToStartOnReadOnlyNNDir test failure on Windows | Major | test | Xiao Liang | Xiao Liang | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13353](https://issues.apache.org/jira/browse/HDFS-13353) | RBF: TestRouterWebHDFSContractCreate failed | Major | test | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8110](https://issues.apache.org/jira/browse/YARN-8110) | AMRMProxy recover should catch for all throwable to avoid premature exit | Major | . | Botong Huang | Botong Huang | +| [HDFS-13402](https://issues.apache.org/jira/browse/HDFS-13402) | RBF: Fix java doc for StateStoreFileSystemImpl | Minor | hdfs | Yiran Wu | Yiran Wu | +| [HDFS-13410](https://issues.apache.org/jira/browse/HDFS-13410) | RBF: Support federation with no subclusters | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13384](https://issues.apache.org/jira/browse/HDFS-13384) | RBF: Improve timeout RPC call mechanism | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13045](https://issues.apache.org/jira/browse/HDFS-13045) | RBF: Improve error message returned from subcluster | Minor | . | Wei Yan | Íñigo Goiri | +| [HDFS-13428](https://issues.apache.org/jira/browse/HDFS-13428) | RBF: Remove LinkedList From StateStoreFileImpl.java | Trivial | federation | BELUGA BEHR | BELUGA BEHR | +| [HDFS-13386](https://issues.apache.org/jira/browse/HDFS-13386) | RBF: Wrong date information in list file(-ls) result | Minor | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [HADOOP-14999](https://issues.apache.org/jira/browse/HADOOP-14999) | AliyunOSS: provide one asynchronous multi-part based uploading mechanism | Major | fs/oss | Genmao Yu | Genmao Yu | +| [HDFS-13435](https://issues.apache.org/jira/browse/HDFS-13435) | RBF: Improve the error loggings for printing the stack trace | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-7189](https://issues.apache.org/jira/browse/YARN-7189) | Container-executor doesn't remove Docker containers that error out early | Major | yarn | Eric Badger | Eric Badger | +| [HDFS-13466](https://issues.apache.org/jira/browse/HDFS-13466) | RBF: Add more router-related information to the UI | Minor | . | Wei Yan | Wei Yan | +| [HDFS-13453](https://issues.apache.org/jira/browse/HDFS-13453) | RBF: getMountPointDates should fetch latest subdir time/date when parent dir is not present but /parent/child dirs are present in mount table | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [HDFS-13478](https://issues.apache.org/jira/browse/HDFS-13478) | RBF: Disabled Nameservice store API | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13490](https://issues.apache.org/jira/browse/HDFS-13490) | RBF: Fix setSafeMode in the Router | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13484](https://issues.apache.org/jira/browse/HDFS-13484) | RBF: Disable Nameservices from the federation | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13326](https://issues.apache.org/jira/browse/HDFS-13326) | RBF: Improve the interfaces to modify and view mount tables | Minor | . | Wei Yan | Gang Li | +| [HDFS-13499](https://issues.apache.org/jira/browse/HDFS-13499) | RBF: Show disabled name services in the UI | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13508](https://issues.apache.org/jira/browse/HDFS-13508) | RBF: Normalize paths (automatically) when adding, updating, removing or listing mount table entries | Minor | . | Ekanth Sethuramalingam | Ekanth Sethuramalingam | +| [HDFS-13434](https://issues.apache.org/jira/browse/HDFS-13434) | RBF: Fix dead links in RBF document | Major | documentation | Akira Ajisaka | Chetna Chaudhari | +| [HDFS-13488](https://issues.apache.org/jira/browse/HDFS-13488) | RBF: Reject requests when a Router is overloaded | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13525](https://issues.apache.org/jira/browse/HDFS-13525) | RBF: Add unit test TestStateStoreDisabledNameservice | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-8253](https://issues.apache.org/jira/browse/YARN-8253) | HTTPS Ats v2 api call fails with "bad HTTP parsed" | Critical | ATSv2 | Yesha Vora | Charan Hebri | +| [HADOOP-15454](https://issues.apache.org/jira/browse/HADOOP-15454) | TestRollingFileSystemSinkWithLocal fails on Windows | Major | test | Xiao Liang | Xiao Liang | +| [HADOOP-15498](https://issues.apache.org/jira/browse/HADOOP-15498) | TestHadoopArchiveLogs (#testGenerateScript, #testPrepareWorkingDir) fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13637](https://issues.apache.org/jira/browse/HDFS-13637) | RBF: Router fails when threadIndex (in ConnectionPool) wraps around Integer.MIN\_VALUE | Critical | federation | CR Hota | CR Hota | +| [YARN-4781](https://issues.apache.org/jira/browse/YARN-4781) | Support intra-queue preemption for fairness ordering policy. | Major | scheduler | Wangda Tan | Eric Payne | +| [HDFS-13281](https://issues.apache.org/jira/browse/HDFS-13281) | Namenode#createFile should be /.reserved/raw/ aware. | Critical | encryption | Rushabh S Shah | Rushabh S Shah | +| [YARN-4677](https://issues.apache.org/jira/browse/YARN-4677) | RMNodeResourceUpdateEvent update from scheduler can lead to race condition | Major | graceful, resourcemanager, scheduler | Brook Zhou | Wilfred Spiegelenburg | +| [HADOOP-15529](https://issues.apache.org/jira/browse/HADOOP-15529) | ContainerLaunch#testInvalidEnvVariableSubstitutionType is not supported in Windows | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15458](https://issues.apache.org/jira/browse/HADOOP-15458) | TestLocalFileSystem#testFSOutputStreamBuilder fails on Windows | Minor | test | Xiao Liang | Xiao Liang | +| [HDFS-13475](https://issues.apache.org/jira/browse/HDFS-13475) | RBF: Admin cannot enforce Router enter SafeMode | Major | . | Wei Yan | Chao Sun | +| [HDFS-13733](https://issues.apache.org/jira/browse/HDFS-13733) | RBF: Add Web UI configurations and descriptions to RBF document | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13743](https://issues.apache.org/jira/browse/HDFS-13743) | RBF: Router throws NullPointerException due to the invalid initialization of MountTableResolver | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13750](https://issues.apache.org/jira/browse/HDFS-13750) | RBF: Router ID in RouterRpcClient is always null | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13848](https://issues.apache.org/jira/browse/HDFS-13848) | Refactor NameNode failover proxy providers | Major | ha, hdfs-client | Konstantin Shvachko | Konstantin Shvachko | +| [HADOOP-15699](https://issues.apache.org/jira/browse/HADOOP-15699) | Fix some of testContainerManager failures in Windows | Major | . | Botong Huang | Botong Huang | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.2/RELEASENOTES.2.9.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.2/RELEASENOTES.2.9.2.md new file mode 100644 index 00000000000..439933e3088 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/2.9.2/RELEASENOTES.2.9.2.md @@ -0,0 +1,21 @@ + + +# Apache Hadoop 2.9.2 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/CHANGES.3.0.0-alpha1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/CHANGELOG.3.0.0-alpha1.md similarity index 99% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/CHANGES.3.0.0-alpha1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/CHANGELOG.3.0.0-alpha1.md index 325ff617381..92a3f1c7772 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/CHANGES.3.0.0-alpha1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/CHANGELOG.3.0.0-alpha1.md @@ -54,7 +54,7 @@ | [HADOOP-11627](https://issues.apache.org/jira/browse/HADOOP-11627) | Remove io.native.lib.available | Major | native | Akira Ajisaka | Brahma Reddy Battula | | [HDFS-7281](https://issues.apache.org/jira/browse/HDFS-7281) | Missing block is marked as corrupted block | Major | namenode | Ming Ma | Ming Ma | | [HDFS-8226](https://issues.apache.org/jira/browse/HDFS-8226) | Non-HA rollback compatibility broken | Blocker | . | J.Andreina | J.Andreina | -| [MAPREDUCE-2632](https://issues.apache.org/jira/browse/MAPREDUCE-2632) | Avoid calling the partitioner when the numReduceTasks is 1. | Major | . | Ravi Teja Ch N V | Sunil G | +| [MAPREDUCE-2632](https://issues.apache.org/jira/browse/MAPREDUCE-2632) | Avoid calling the partitioner when the numReduceTasks is 1. | Major | . | Ravi Teja Ch N V | Sunil Govindan | | [HDFS-8241](https://issues.apache.org/jira/browse/HDFS-8241) | Remove unused NameNode startup option -finalize | Minor | namenode | Brahma Reddy Battula | Brahma Reddy Battula | | [HDFS-8332](https://issues.apache.org/jira/browse/HDFS-8332) | DFS client API calls should check filesystem closed | Major | . | Rakesh R | Rakesh R | | [HADOOP-11698](https://issues.apache.org/jira/browse/HADOOP-11698) | Remove DistCpV1 and Logalyzer | Major | tools/distcp | Allen Wittenauer | Brahma Reddy Battula | @@ -67,6 +67,7 @@ | [HADOOP-11347](https://issues.apache.org/jira/browse/HADOOP-11347) | RawLocalFileSystem#mkdir and create should honor umask | Major | fs | Chris Nauroth | Varun Saxena | | [HDFS-8591](https://issues.apache.org/jira/browse/HDFS-8591) | Remove support for deprecated configuration key dfs.namenode.decommission.nodes.per.interval | Minor | namenode | Andrew Wang | Andrew Wang | | [HDFS-6564](https://issues.apache.org/jira/browse/HDFS-6564) | Use slf4j instead of common-logging in hdfs-client | Major | build | Haohui Mai | Rakesh R | +| [HDFS-6440](https://issues.apache.org/jira/browse/HDFS-6440) | Support more than 2 NameNodes | Major | auto-failover, ha, namenode | Jesse Yates | Jesse Yates | | [MAPREDUCE-6427](https://issues.apache.org/jira/browse/MAPREDUCE-6427) | Fix typo in JobHistoryEventHandler | Minor | . | Brahma Reddy Battula | Ray Chiang | | [HADOOP-12209](https://issues.apache.org/jira/browse/HADOOP-12209) | Comparable type should be in FileStatus | Minor | fs | Yong Zhang | Yong Zhang | | [HADOOP-12269](https://issues.apache.org/jira/browse/HADOOP-12269) | Update aws-sdk dependency to 1.10.6; move to aws-sdk-s3 | Major | fs/s3 | Thomas Demoor | Thomas Demoor | @@ -81,6 +82,7 @@ | [HADOOP-12495](https://issues.apache.org/jira/browse/HADOOP-12495) | Fix posix\_spawn error on OS X | Major | util | Allen Wittenauer | Allen Wittenauer | | [HDFS-9070](https://issues.apache.org/jira/browse/HDFS-9070) | Allow fsck display pending replica location information for being-written blocks | Major | . | Rui Gao | Rui Gao | | [HDFS-9278](https://issues.apache.org/jira/browse/HDFS-9278) | Fix preferredBlockSize typo in OIV XML output | Trivial | tools | Nicole Pazmany | Nicole Pazmany | +| [HADOOP-12436](https://issues.apache.org/jira/browse/HADOOP-12436) | GlobPattern regex library has performance issues with wildcard characters | Major | fs | Matthew Paduano | Matthew Paduano | | [HADOOP-10787](https://issues.apache.org/jira/browse/HADOOP-10787) | Rename/remove non-HADOOP\_\*, etc from the shell scripts | Blocker | scripts | Allen Wittenauer | Allen Wittenauer | | [HADOOP-12294](https://issues.apache.org/jira/browse/HADOOP-12294) | Throw an Exception when fs.permissions.umask-mode is misconfigured | Major | conf | Chang Li | Chang Li | | [HADOOP-10465](https://issues.apache.org/jira/browse/HADOOP-10465) | Fix use of generics within SortedMapWritable | Minor | . | Bertrand Dechoux | Bertrand Dechoux | @@ -125,7 +127,7 @@ | [HADOOP-13534](https://issues.apache.org/jira/browse/HADOOP-13534) | Remove unused TrashPolicy#getInstance and initialize code | Minor | . | Zhe Zhang | Yiqun Lin | | [YARN-5567](https://issues.apache.org/jira/browse/YARN-5567) | Fix script exit code checking in NodeHealthScriptRunner#reportHealthStatus | Major | nodemanager | Yufei Gu | Yufei Gu | | [HDFS-2538](https://issues.apache.org/jira/browse/HDFS-2538) | option to disable fsck dots | Minor | namenode | Allen Wittenauer | Mohammad Kamrul Islam | -| [YARN-5049](https://issues.apache.org/jira/browse/YARN-5049) | Extend NMStateStore to save queued container information | Major | nodemanager, resourcemanager | Konstantinos Karanasos | Konstantinos Karanasos | +| [YARN-5049](https://issues.apache.org/jira/browse/YARN-5049) | Extend NMStateStore to save queued container information | Major | nodemanager, resourcemanager | Konstantinos Karanasos | Arun Suresh | ### IMPORTANT ISSUES: @@ -162,7 +164,6 @@ | [HADOOP-7947](https://issues.apache.org/jira/browse/HADOOP-7947) | Validate XMLs if a relevant tool is available, when using scripts | Major | scripts | Harsh J | Kengo Seki | | [HDFS-8487](https://issues.apache.org/jira/browse/HDFS-8487) | Generalize BlockInfo in preparation of merging HDFS-7285 into trunk and branch-2 | Major | namenode | Zhe Zhang | Zhe Zhang | | [HDFS-8608](https://issues.apache.org/jira/browse/HDFS-8608) | Merge HDFS-7912 to trunk and branch-2 (track BlockInfo instead of Block in UnderReplicatedBlocks and PendingReplicationBlocks) | Major | . | Zhe Zhang | Zhe Zhang | -| [HDFS-6440](https://issues.apache.org/jira/browse/HDFS-6440) | Support more than 2 NameNodes | Major | auto-failover, ha, namenode | Jesse Yates | Jesse Yates | | [HADOOP-5732](https://issues.apache.org/jira/browse/HADOOP-5732) | Add SFTP FileSystem | Minor | fs | Íñigo Goiri | ramtin | | [HDFS-8622](https://issues.apache.org/jira/browse/HDFS-8622) | Implement GETCONTENTSUMMARY operation for WebImageViewer | Major | . | Jagadesh Kiran N | Jagadesh Kiran N | | [HDFS-8155](https://issues.apache.org/jira/browse/HDFS-8155) | Support OAuth2 in WebHDFS | Major | webhdfs | Jakob Homan | Jakob Homan | @@ -175,13 +176,13 @@ | [HDFS-9057](https://issues.apache.org/jira/browse/HDFS-9057) | allow/disallow snapshots via webhdfs | Major | webhdfs | Allen Wittenauer | Brahma Reddy Battula | | [YARN-4349](https://issues.apache.org/jira/browse/YARN-4349) | Support CallerContext in YARN | Major | . | Wangda Tan | Wangda Tan | | [HADOOP-12366](https://issues.apache.org/jira/browse/HADOOP-12366) | expose calculated paths | Major | . | Allen Wittenauer | Allen Wittenauer | -| [HADOOP-12321](https://issues.apache.org/jira/browse/HADOOP-12321) | Make JvmPauseMonitor an AbstractService | Major | . | Steve Loughran | Sunil G | +| [HADOOP-12321](https://issues.apache.org/jira/browse/HADOOP-12321) | Make JvmPauseMonitor an AbstractService | Major | . | Steve Loughran | Sunil Govindan | | [YARN-3623](https://issues.apache.org/jira/browse/YARN-3623) | We should have a config to indicate the Timeline Service version | Major | timelineserver | Zhijie Shen | Xuan Gong | | [HADOOP-12657](https://issues.apache.org/jira/browse/HADOOP-12657) | Add a option to skip newline on empty files with getMerge -nl | Minor | . | Jan Filipiak | Kanaka Kumar Avvaru | | [YARN-3458](https://issues.apache.org/jira/browse/YARN-3458) | CPU resource monitoring in Windows | Minor | nodemanager | Íñigo Goiri | Íñigo Goiri | | [HADOOP-12691](https://issues.apache.org/jira/browse/HADOOP-12691) | Add CSRF Filter for REST APIs to Hadoop Common | Major | security | Larry McCay | Larry McCay | | [HADOOP-12635](https://issues.apache.org/jira/browse/HADOOP-12635) | Adding Append API support for WASB | Major | fs/azure | Dushyanth | Dushyanth | -| [HDFS-9525](https://issues.apache.org/jira/browse/HDFS-9525) | hadoop utilities need to support provided delegation tokens | Blocker | security | Allen Wittenauer | HeeSoo Kim | +| [HDFS-9525](https://issues.apache.org/jira/browse/HDFS-9525) | hadoop utilities need to support provided delegation tokens | Blocker | security | Allen Wittenauer | Heesoo Kim | | [HADOOP-12702](https://issues.apache.org/jira/browse/HADOOP-12702) | Add an HDFS metrics sink | Major | metrics | Daniel Templeton | Daniel Templeton | | [HADOOP-12426](https://issues.apache.org/jira/browse/HADOOP-12426) | Add Entry point for Kerberos health check | Minor | security | Steve Loughran | Steve Loughran | | [HDFS-9244](https://issues.apache.org/jira/browse/HDFS-9244) | Support nested encryption zones | Major | encryption | Xiaoyu Yao | Zhe Zhang | @@ -208,6 +209,7 @@ | [HADOOP-13396](https://issues.apache.org/jira/browse/HADOOP-13396) | Allow pluggable audit loggers in KMS | Major | kms | Xiao Chen | Xiao Chen | | [HDFS-10584](https://issues.apache.org/jira/browse/HDFS-10584) | Allow long-running Mover tool to login with keytab | Major | balancer & mover, security | Rakesh R | Rakesh R | | [MAPREDUCE-6304](https://issues.apache.org/jira/browse/MAPREDUCE-6304) | Specifying node labels when submitting MR jobs | Major | job submission | Jian Fang | Naganarasimha G R | +| [YARN-1963](https://issues.apache.org/jira/browse/YARN-1963) | Support priorities across applications within the same queue | Major | api, resourcemanager | Arun C Murthy | Sunil Govindan | ### IMPROVEMENTS: @@ -488,7 +490,7 @@ | [MAPREDUCE-6408](https://issues.apache.org/jira/browse/MAPREDUCE-6408) | Queue name and user name should be printed on the job page | Major | applicationmaster | Siqi Li | Siqi Li | | [HDFS-8639](https://issues.apache.org/jira/browse/HDFS-8639) | Option for HTTP port of NameNode by MiniDFSClusterManager | Minor | test | Kai Sasaki | Kai Sasaki | | [YARN-3360](https://issues.apache.org/jira/browse/YARN-3360) | Add JMX metrics to TimelineDataManager | Major | timelineserver | Jason Lowe | Jason Lowe | -| [HADOOP-12049](https://issues.apache.org/jira/browse/HADOOP-12049) | Control http authentication cookie persistence via configuration | Major | security | Benoy Antony | Huizhi Lu | +| [HADOOP-12049](https://issues.apache.org/jira/browse/HADOOP-12049) | Control http authentication cookie persistence via configuration | Major | security | Benoy Antony | H Lu | | [HDFS-8462](https://issues.apache.org/jira/browse/HDFS-8462) | Implement GETXATTRS and LISTXATTRS operations for WebImageViewer | Major | . | Akira Ajisaka | Jagadesh Kiran N | | [HDFS-8640](https://issues.apache.org/jira/browse/HDFS-8640) | Make reserved RBW space visible through JMX | Major | . | Kanaka Kumar Avvaru | Kanaka Kumar Avvaru | | [HDFS-8546](https://issues.apache.org/jira/browse/HDFS-8546) | Use try with resources in DataStorage and Storage | Minor | datanode | Andrew Wang | Andrew Wang | @@ -559,7 +561,7 @@ | [HDFS-6407](https://issues.apache.org/jira/browse/HDFS-6407) | Add sorting and pagination in the datanode tab of the NN Web UI | Critical | namenode | Nathan Roberts | Haohui Mai | | [HDFS-8880](https://issues.apache.org/jira/browse/HDFS-8880) | NameNode metrics logging | Major | namenode | Arpit Agarwal | Arpit Agarwal | | [YARN-4057](https://issues.apache.org/jira/browse/YARN-4057) | If ContainersMonitor is not enabled, only print related log info one time | Minor | nodemanager | Jun Gong | Jun Gong | -| [HADOOP-12050](https://issues.apache.org/jira/browse/HADOOP-12050) | Enable MaxInactiveInterval for hadoop http auth token | Major | security | Benoy Antony | Huizhi Lu | +| [HADOOP-12050](https://issues.apache.org/jira/browse/HADOOP-12050) | Enable MaxInactiveInterval for hadoop http auth token | Major | security | Benoy Antony | H Lu | | [HDFS-8435](https://issues.apache.org/jira/browse/HDFS-8435) | Support CreateFlag in WebHdfs | Major | webhdfs | Vinoth Sathappan | Jakob Homan | | [HDFS-8911](https://issues.apache.org/jira/browse/HDFS-8911) | NameNode Metric : Add Editlog counters as a JMX metric | Major | namenode | Anu Engineer | Anu Engineer | | [HDFS-8917](https://issues.apache.org/jira/browse/HDFS-8917) | Cleanup BlockInfoUnderConstruction from comments and tests | Minor | namenode | Zhe Zhang | Zhe Zhang | @@ -568,7 +570,6 @@ | [HDFS-8924](https://issues.apache.org/jira/browse/HDFS-8924) | Add pluggable interface for reading replicas in DFSClient | Major | hdfs-client | Colin P. McCabe | Colin P. McCabe | | [HDFS-8928](https://issues.apache.org/jira/browse/HDFS-8928) | Improvements for BlockUnderConstructionFeature: ReplicaUnderConstruction as a separate class and replicas as an array | Minor | namenode | Zhe Zhang | Jing Zhao | | [HDFS-2390](https://issues.apache.org/jira/browse/HDFS-2390) | dfsadmin -setBalancerBandwidth doesnot validate -ve value | Minor | balancer & mover | Rajit Saha | Gautam Gopalakrishnan | -| [HDFS-8865](https://issues.apache.org/jira/browse/HDFS-8865) | Improve quota initialization performance | Major | . | Kihwal Lee | Kihwal Lee | | [HDFS-8983](https://issues.apache.org/jira/browse/HDFS-8983) | NameNode support for protected directories | Major | namenode | Arpit Agarwal | Arpit Agarwal | | [HDFS-8946](https://issues.apache.org/jira/browse/HDFS-8946) | Improve choosing datanode storage for block placement | Major | namenode | Yi Liu | Yi Liu | | [HDFS-8965](https://issues.apache.org/jira/browse/HDFS-8965) | Harden edit log reading code against out of memory errors | Major | . | Colin P. McCabe | Colin P. McCabe | @@ -643,7 +644,6 @@ | [MAPREDUCE-6489](https://issues.apache.org/jira/browse/MAPREDUCE-6489) | Fail fast rogue tasks that write too much to local disk | Major | task | Maysam Yabandeh | Maysam Yabandeh | | [HDFS-8647](https://issues.apache.org/jira/browse/HDFS-8647) | Abstract BlockManager's rack policy into BlockPlacementPolicy | Major | . | Ming Ma | Brahma Reddy Battula | | [HDFS-7087](https://issues.apache.org/jira/browse/HDFS-7087) | Ability to list /.reserved | Major | . | Andrew Wang | Xiao Chen | -| [HADOOP-12436](https://issues.apache.org/jira/browse/HADOOP-12436) | GlobPattern regex library has performance issues with wildcard characters | Major | fs | Matthew Paduano | Matthew Paduano | | [HDFS-9280](https://issues.apache.org/jira/browse/HDFS-9280) | Document NFS gateway export point parameter | Trivial | documentation | Zhe Zhang | Xiao Chen | | [HADOOP-12334](https://issues.apache.org/jira/browse/HADOOP-12334) | Change Mode Of Copy Operation of HBase WAL Archiving to bypass Azure Storage Throttling after retries | Major | tools | Gaurav Kanade | Gaurav Kanade | | [HADOOP-7266](https://issues.apache.org/jira/browse/HADOOP-7266) | Deprecate metrics v1 | Blocker | metrics | Luke Lu | Akira Ajisaka | @@ -702,7 +702,7 @@ | [HDFS-9024](https://issues.apache.org/jira/browse/HDFS-9024) | Deprecate the TotalFiles metric | Major | . | Akira Ajisaka | Akira Ajisaka | | [HDFS-7988](https://issues.apache.org/jira/browse/HDFS-7988) | Replace usage of ExactSizeInputStream with LimitInputStream. | Minor | . | Chris Nauroth | Walter Su | | [HDFS-9314](https://issues.apache.org/jira/browse/HDFS-9314) | Improve BlockPlacementPolicyDefault's picking of excess replicas | Major | . | Ming Ma | Xiao Chen | -| [MAPREDUCE-5870](https://issues.apache.org/jira/browse/MAPREDUCE-5870) | Support for passing Job priority through Application Submission Context in Mapreduce Side | Major | client | Sunil G | Sunil G | +| [MAPREDUCE-5870](https://issues.apache.org/jira/browse/MAPREDUCE-5870) | Support for passing Job priority through Application Submission Context in Mapreduce Side | Major | client | Sunil Govindan | Sunil Govindan | | [HDFS-9434](https://issues.apache.org/jira/browse/HDFS-9434) | Recommission a datanode with 500k blocks may pause NN for 30 seconds | Major | namenode | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | | [YARN-4132](https://issues.apache.org/jira/browse/YARN-4132) | Separate configs for nodemanager to resourcemanager connection timeout and retries | Major | nodemanager | Chang Li | Chang Li | | [HDFS-8512](https://issues.apache.org/jira/browse/HDFS-8512) | WebHDFS : GETFILESTATUS should return LocatedBlock with storage type info | Major | webhdfs | Sumana Sathish | Xiaoyu Yao | @@ -734,7 +734,7 @@ | [HADOOP-12570](https://issues.apache.org/jira/browse/HADOOP-12570) | HDFS Secure Mode Documentation updates | Major | documentation | Arpit Agarwal | Arpit Agarwal | | [YARN-4480](https://issues.apache.org/jira/browse/YARN-4480) | Clean up some inappropriate imports | Major | . | Kai Zheng | Kai Zheng | | [HDFS-9582](https://issues.apache.org/jira/browse/HDFS-9582) | TestLeaseRecoveryStriped file missing Apache License header and not well formatted | Minor | . | Uma Maheswara Rao G | Uma Maheswara Rao G | -| [YARN-4290](https://issues.apache.org/jira/browse/YARN-4290) | Add -showDetails option to YARN Nodes CLI to print all nodes reports information | Major | client | Wangda Tan | Sunil G | +| [YARN-4290](https://issues.apache.org/jira/browse/YARN-4290) | Add -showDetails option to YARN Nodes CLI to print all nodes reports information | Major | client | Wangda Tan | Sunil Govindan | | [YARN-4400](https://issues.apache.org/jira/browse/YARN-4400) | AsyncDispatcher.waitForDrained should be final | Trivial | yarn | Daniel Templeton | Daniel Templeton | | [HADOOP-12566](https://issues.apache.org/jira/browse/HADOOP-12566) | Add NullGroupMapping | Major | . | Daniel Templeton | Daniel Templeton | | [YARN-2934](https://issues.apache.org/jira/browse/YARN-2934) | Improve handling of container's stderr | Critical | . | Gera Shegalov | Naganarasimha G R | @@ -757,7 +757,7 @@ | [YARN-4603](https://issues.apache.org/jira/browse/YARN-4603) | FairScheduler should mention user requested queuename in error message when failed in queue ACL check | Trivial | fairscheduler | Tao Jie | Tao Jie | | [YARN-4492](https://issues.apache.org/jira/browse/YARN-4492) | Add documentation for preemption supported in Capacity scheduler | Minor | capacity scheduler | Naganarasimha G R | Naganarasimha G R | | [HDFS-9674](https://issues.apache.org/jira/browse/HDFS-9674) | The HTrace span for OpWriteBlock should record the maxWriteToDisk time | Major | datanode, tracing | Colin P. McCabe | Colin P. McCabe | -| [YARN-4371](https://issues.apache.org/jira/browse/YARN-4371) | "yarn application -kill" should take multiple application ids | Major | . | Tsuyoshi Ozawa | Sunil G | +| [YARN-4371](https://issues.apache.org/jira/browse/YARN-4371) | "yarn application -kill" should take multiple application ids | Major | . | Tsuyoshi Ozawa | Sunil Govindan | | [YARN-4496](https://issues.apache.org/jira/browse/YARN-4496) | Improve HA ResourceManager Failover detection on the client | Major | client, resourcemanager | Arun Suresh | Jian He | | [HDFS-9653](https://issues.apache.org/jira/browse/HDFS-9653) | Expose the number of blocks pending deletion through dfsadmin report command | Major | hdfs-client, tools | Weiwei Yang | Weiwei Yang | | [HADOOP-12731](https://issues.apache.org/jira/browse/HADOOP-12731) | Remove useless boxing/unboxing code | Minor | performance | Kousuke Saruta | Kousuke Saruta | @@ -894,7 +894,7 @@ | [HADOOP-12963](https://issues.apache.org/jira/browse/HADOOP-12963) | Allow using path style addressing for accessing the s3 endpoint | Minor | fs/s3 | Andrew Baptist | Stephen Montgomery | | [HDFS-10280](https://issues.apache.org/jira/browse/HDFS-10280) | Document new dfsadmin command -evictWriters | Minor | documentation | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-10292](https://issues.apache.org/jira/browse/HDFS-10292) | Add block id when client got Unable to close file exception | Minor | . | Brahma Reddy Battula | Brahma Reddy Battula | -| [HDFS-9412](https://issues.apache.org/jira/browse/HDFS-9412) | getBlocks occupies FSLock and takes too long to complete | Major | . | He Tianyi | He Tianyi | +| [HDFS-9412](https://issues.apache.org/jira/browse/HDFS-9412) | getBlocks occupies FSLock and takes too long to complete | Major | balancer & mover, namenode | He Tianyi | He Tianyi | | [HDFS-10302](https://issues.apache.org/jira/browse/HDFS-10302) | BlockPlacementPolicyDefault should use default replication considerload value | Trivial | . | Yiqun Lin | Yiqun Lin | | [HDFS-10264](https://issues.apache.org/jira/browse/HDFS-10264) | Logging improvements in FSImageFormatProtobuf.Saver | Major | namenode | Konstantin Shvachko | Xiaobing Zhou | | [HADOOP-12985](https://issues.apache.org/jira/browse/HADOOP-12985) | Support MetricsSource interface for DecayRpcScheduler Metrics | Major | . | Xiaoyu Yao | Xiaoyu Yao | @@ -1010,7 +1010,7 @@ | [HDFS-10676](https://issues.apache.org/jira/browse/HDFS-10676) | Add namenode metric to measure time spent in generating EDEKs | Major | namenode | Hanisha Koneru | Hanisha Koneru | | [MAPREDUCE-6746](https://issues.apache.org/jira/browse/MAPREDUCE-6746) | Replace org.apache.commons.io.Charsets with java.nio.charset.StandardCharsets | Minor | . | Vincent Poon | Vincent Poon | | [HDFS-10703](https://issues.apache.org/jira/browse/HDFS-10703) | HA NameNode Web UI should show last checkpoint time | Minor | ui | John Zhuge | John Zhuge | -| [MAPREDUCE-6729](https://issues.apache.org/jira/browse/MAPREDUCE-6729) | Accurately compute the test execute time in DFSIO | Minor | benchmarks, performance, test | mingleizhang | mingleizhang | +| [MAPREDUCE-6729](https://issues.apache.org/jira/browse/MAPREDUCE-6729) | Accurately compute the test execute time in DFSIO | Minor | benchmarks, performance, test | zhangminglei | zhangminglei | | [HADOOP-13444](https://issues.apache.org/jira/browse/HADOOP-13444) | Replace org.apache.commons.io.Charsets with java.nio.charset.StandardCharsets | Minor | . | Vincent Poon | Vincent Poon | | [YARN-5456](https://issues.apache.org/jira/browse/YARN-5456) | container-executor support for FreeBSD, NetBSD, and others if conf path is absolute | Major | nodemanager, security | Allen Wittenauer | Allen Wittenauer | | [YARN-5460](https://issues.apache.org/jira/browse/YARN-5460) | Change container runtime type logging in DelegatingLinuxContainerRuntime to debug | Trivial | yarn | Shane Kumpf | Shane Kumpf | @@ -1037,7 +1037,7 @@ | [YARN-4491](https://issues.apache.org/jira/browse/YARN-4491) | yarn list command to support filtering by tags | Minor | client | Steve Loughran | Varun Saxena | | [HADOOP-13538](https://issues.apache.org/jira/browse/HADOOP-13538) | Deprecate getInstance and initialize methods with Path in TrashPolicy | Minor | . | Yiqun Lin | Yiqun Lin | | [HDFS-8986](https://issues.apache.org/jira/browse/HDFS-8986) | Add option to -du to calculate directory space usage excluding snapshots | Major | snapshots | Gautam Gopalakrishnan | Xiao Chen | -| [HDFS-10795](https://issues.apache.org/jira/browse/HDFS-10795) | Fix an error in ReaderStrategy#ByteBufferStrategy | Major | . | SammiChen | SammiChen | +| [HDFS-10795](https://issues.apache.org/jira/browse/HDFS-10795) | Fix an error in ReaderStrategy#ByteBufferStrategy | Major | . | Sammi Chen | Sammi Chen | | [HDFS-10798](https://issues.apache.org/jira/browse/HDFS-10798) | Make the threshold of reporting FSNamesystem lock contention configurable | Major | logging, namenode | Zhe Zhang | Erik Krogen | | [YARN-5550](https://issues.apache.org/jira/browse/YARN-5550) | TestYarnCLI#testGetContainers should format according to CONTAINER\_PATTERN | Minor | client, test | Jonathan Hung | Jonathan Hung | | [MAPREDUCE-6741](https://issues.apache.org/jira/browse/MAPREDUCE-6741) | add MR support to redact job conf properties | Major | mrv2 | Haibo Chen | Haibo Chen | @@ -1045,6 +1045,7 @@ | [HADOOP-7930](https://issues.apache.org/jira/browse/HADOOP-7930) | Kerberos relogin interval in UserGroupInformation should be configurable | Major | security | Alejandro Abdelnur | Robert Kanter | | [HDFS-8873](https://issues.apache.org/jira/browse/HDFS-8873) | Allow the directoryScanner to be rate-limited | Major | datanode | Nathan Roberts | Daniel Templeton | | [HADOOP-12825](https://issues.apache.org/jira/browse/HADOOP-12825) | Log slow name resolutions | Major | . | Sidharta Seethana | Sidharta Seethana | +| [HDFS-8865](https://issues.apache.org/jira/browse/HDFS-8865) | Improve quota initialization performance | Major | . | Kihwal Lee | Kihwal Lee | ### BUG FIXES: @@ -1162,7 +1163,7 @@ | [YARN-2894](https://issues.apache.org/jira/browse/YARN-2894) | When ACL's are enabled, if RM switches then application can not be viewed from web. | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | | [HADOOP-11296](https://issues.apache.org/jira/browse/HADOOP-11296) | hadoop-daemons.sh throws 'host1: bash: host3: command not found...' | Major | scripts | Vinayakumar B | Vinayakumar B | | [YARN-2874](https://issues.apache.org/jira/browse/YARN-2874) | Dead lock in "DelegationTokenRenewer" which blocks RM to execute any further apps | Blocker | resourcemanager | Naganarasimha G R | Naganarasimha G R | -| [HADOOP-11343](https://issues.apache.org/jira/browse/HADOOP-11343) | Overflow is not properly handled in caclulating final iv for AES CTR | Blocker | security | Jerry Chen | Jerry Chen | +| [HADOOP-11343](https://issues.apache.org/jira/browse/HADOOP-11343) | Overflow is not properly handled in caclulating final iv for AES CTR | Blocker | security | Haifeng Chen | Haifeng Chen | | [HADOOP-10134](https://issues.apache.org/jira/browse/HADOOP-10134) | [JDK8] Fix Javadoc errors caused by incorrect or illegal tags in doc comments | Minor | . | Andrew Purtell | Andrew Purtell | | [HADOOP-11368](https://issues.apache.org/jira/browse/HADOOP-11368) | Fix SSLFactory truststore reloader thread leak in KMSClientProvider | Major | kms | Arun Suresh | Arun Suresh | | [HDFS-7489](https://issues.apache.org/jira/browse/HDFS-7489) | Incorrect locking in FsVolumeList#checkDirs can hang datanodes | Critical | datanode | Noah Lorang | Noah Lorang | @@ -1446,7 +1447,7 @@ | [HADOOP-11922](https://issues.apache.org/jira/browse/HADOOP-11922) | Misspelling of threshold in log4j.properties for tests in hadoop-tools | Minor | . | Brahma Reddy Battula | Gabor Liptak | | [HDFS-8257](https://issues.apache.org/jira/browse/HDFS-8257) | Namenode rollingUpgrade option is incorrect in document | Major | documentation | J.Andreina | J.Andreina | | [HDFS-8067](https://issues.apache.org/jira/browse/HDFS-8067) | haadmin prints out stale help messages | Minor | hdfs-client | Ajith S | Ajith S | -| [YARN-3592](https://issues.apache.org/jira/browse/YARN-3592) | Fix typos in RMNodeLabelsManager | Trivial | resourcemanager | Junping Du | Sunil G | +| [YARN-3592](https://issues.apache.org/jira/browse/YARN-3592) | Fix typos in RMNodeLabelsManager | Trivial | resourcemanager | Junping Du | Sunil Govindan | | [HDFS-8174](https://issues.apache.org/jira/browse/HDFS-8174) | Update replication count to live rep count in fsck report | Minor | . | J.Andreina | J.Andreina | | [HDFS-6291](https://issues.apache.org/jira/browse/HDFS-6291) | FSImage may be left unclosed in BootstrapStandby#doRun() | Minor | ha | Ted Yu | Sanghyun Yun | | [YARN-3358](https://issues.apache.org/jira/browse/YARN-3358) | Audit log not present while refreshing Service ACLs | Minor | resourcemanager | Varun Saxena | Varun Saxena | @@ -1476,7 +1477,7 @@ | [HADOOP-11942](https://issues.apache.org/jira/browse/HADOOP-11942) | Add links to SLGUserGuide to site index | Minor | documentation | Masatake Iwasaki | Masatake Iwasaki | | [HDFS-8245](https://issues.apache.org/jira/browse/HDFS-8245) | Standby namenode doesn't process DELETED\_BLOCK if the add block request is in edit log. | Major | . | Rushabh S Shah | Rushabh S Shah | | [YARN-3018](https://issues.apache.org/jira/browse/YARN-3018) | Unify the default value for yarn.scheduler.capacity.node-locality-delay in code and default xml file | Trivial | capacityscheduler | nijel | nijel | -| [HDFS-8326](https://issues.apache.org/jira/browse/HDFS-8326) | Documentation about when checkpoints are run is out of date | Major | documentation | Misty Stanley-Jones | Misty Stanley-Jones | +| [HDFS-8326](https://issues.apache.org/jira/browse/HDFS-8326) | Documentation about when checkpoints are run is out of date | Major | documentation | Misty Linville | Misty Linville | | [YARN-3604](https://issues.apache.org/jira/browse/YARN-3604) | removeApplication in ZKRMStateStore should also disable watch. | Minor | resourcemanager | zhihai xu | zhihai xu | | [YARN-3476](https://issues.apache.org/jira/browse/YARN-3476) | Nodemanager can fail to delete local logs if log aggregation fails | Major | log-aggregation, nodemanager | Jason Lowe | Rohith Sharma K S | | [YARN-3473](https://issues.apache.org/jira/browse/YARN-3473) | Fix RM Web UI configuration for some properties | Minor | resourcemanager | Ray Chiang | Ray Chiang | @@ -1570,7 +1571,7 @@ | [HDFS-8470](https://issues.apache.org/jira/browse/HDFS-8470) | fsimage loading progress should update inode, delegation token and cache pool count. | Minor | namenode | tongshiquan | Surendra Singh Lilhore | | [HDFS-8523](https://issues.apache.org/jira/browse/HDFS-8523) | Remove usage information on unsupported operation "fsck -showprogress" from branch-2 | Major | documentation | J.Andreina | J.Andreina | | [HDFS-3716](https://issues.apache.org/jira/browse/HDFS-3716) | Purger should remove stale fsimage ckpt files | Minor | namenode | suja s | J.Andreina | -| [YARN-3751](https://issues.apache.org/jira/browse/YARN-3751) | TestAHSWebServices fails after YARN-3467 | Major | . | Zhijie Shen | Sunil G | +| [YARN-3751](https://issues.apache.org/jira/browse/YARN-3751) | TestAHSWebServices fails after YARN-3467 | Major | . | Zhijie Shen | Sunil Govindan | | [YARN-3585](https://issues.apache.org/jira/browse/YARN-3585) | NodeManager cannot exit on SHUTDOWN event triggered and NM recovery is enabled | Critical | . | Peng Zhang | Rohith Sharma K S | | [MAPREDUCE-6374](https://issues.apache.org/jira/browse/MAPREDUCE-6374) | Distributed Cache File visibility should check permission of full path | Major | . | Chang Li | Chang Li | | [YARN-3762](https://issues.apache.org/jira/browse/YARN-3762) | FairScheduler: CME on FSParentQueue#getQueueUserAclInfo | Critical | fairscheduler | Karthik Kambatla | Karthik Kambatla | @@ -1603,7 +1604,7 @@ | [MAPREDUCE-6350](https://issues.apache.org/jira/browse/MAPREDUCE-6350) | JobHistory doesn't support fully-functional search | Critical | jobhistoryserver | Siqi Li | Siqi Li | | [MAPREDUCE-6389](https://issues.apache.org/jira/browse/MAPREDUCE-6389) | Fix BaileyBorweinPlouffe CLI usage message | Trivial | . | Brahma Reddy Battula | Brahma Reddy Battula | | [HADOOP-12052](https://issues.apache.org/jira/browse/HADOOP-12052) | IPC client downgrades all exception types to IOE, breaks callers trying to use them | Critical | . | Steve Loughran | Brahma Reddy Battula | -| [YARN-3785](https://issues.apache.org/jira/browse/YARN-3785) | Support for Resource as an argument during submitApp call in MockRM test class | Minor | resourcemanager | Sunil G | Sunil G | +| [YARN-3785](https://issues.apache.org/jira/browse/YARN-3785) | Support for Resource as an argument during submitApp call in MockRM test class | Minor | resourcemanager | Sunil Govindan | Sunil Govindan | | [HADOOP-12074](https://issues.apache.org/jira/browse/HADOOP-12074) | in Shell.java#runCommand() rethrow InterruptedException as InterruptedIOException | Minor | . | Lavkesh Lahngir | Lavkesh Lahngir | | [HDFS-8566](https://issues.apache.org/jira/browse/HDFS-8566) | HDFS documentation about debug commands wrongly identifies them as "hdfs dfs" commands | Major | documentation | Surendra Singh Lilhore | Surendra Singh Lilhore | | [HDFS-8583](https://issues.apache.org/jira/browse/HDFS-8583) | Document that NFS gateway does not work with rpcbind on SLES 11 | Major | documentation | Arpit Agarwal | Arpit Agarwal | @@ -1711,7 +1712,7 @@ | [HDFS-8749](https://issues.apache.org/jira/browse/HDFS-8749) | Fix findbugs warning in BlockManager.java | Minor | . | Akira Ajisaka | Brahma Reddy Battula | | [HDFS-2956](https://issues.apache.org/jira/browse/HDFS-2956) | calling fetchdt without a --renewer argument throws NPE | Major | security | Todd Lipcon | Vinayakumar B | | [HDFS-8751](https://issues.apache.org/jira/browse/HDFS-8751) | Remove setBlocks API from INodeFile and misc code cleanup | Major | namenode | Zhe Zhang | Zhe Zhang | -| [YARN-3849](https://issues.apache.org/jira/browse/YARN-3849) | Too much of preemption activity causing continuos killing of containers across queues | Critical | capacityscheduler | Sunil G | Sunil G | +| [YARN-3849](https://issues.apache.org/jira/browse/YARN-3849) | Too much of preemption activity causing continuos killing of containers across queues | Critical | capacityscheduler | Sunil Govindan | Sunil Govindan | | [YARN-3917](https://issues.apache.org/jira/browse/YARN-3917) | getResourceCalculatorPlugin for the default should intercept all exceptions | Major | . | Gera Shegalov | Gera Shegalov | | [YARN-3894](https://issues.apache.org/jira/browse/YARN-3894) | RM startup should fail for wrong CS xml NodeLabel capacity configuration | Critical | capacityscheduler | Bibin A Chundatt | Bibin A Chundatt | | [MAPREDUCE-6421](https://issues.apache.org/jira/browse/MAPREDUCE-6421) | Fix findbugs warning in RMContainerAllocator.reduceNodeLabelExpression | Major | . | Ray Chiang | Brahma Reddy Battula | @@ -1747,7 +1748,7 @@ | [YARN-3954](https://issues.apache.org/jira/browse/YARN-3954) | TestYarnConfigurationFields#testCompareConfigurationClassAgainstXml fails in trunk | Major | . | Varun Saxena | Varun Saxena | | [YARN-2019](https://issues.apache.org/jira/browse/YARN-2019) | Retrospect on decision of making RM crashed if any exception throw in ZKRMStateStore | Critical | . | Junping Du | Jian He | | [HDFS-8797](https://issues.apache.org/jira/browse/HDFS-8797) | WebHdfsFileSystem creates too many connections for pread | Major | webhdfs | Jing Zhao | Jing Zhao | -| [YARN-3941](https://issues.apache.org/jira/browse/YARN-3941) | Proportional Preemption policy should try to avoid sending duplicate PREEMPT\_CONTAINER event to scheduler | Major | capacityscheduler | Sunil G | Sunil G | +| [YARN-3941](https://issues.apache.org/jira/browse/YARN-3941) | Proportional Preemption policy should try to avoid sending duplicate PREEMPT\_CONTAINER event to scheduler | Major | capacityscheduler | Sunil Govindan | Sunil Govindan | | [YARN-3900](https://issues.apache.org/jira/browse/YARN-3900) | Protobuf layout of yarn\_security\_token causes errors in other protos that include it | Major | . | Anubhav Dhoot | Anubhav Dhoot | | [YARN-3845](https://issues.apache.org/jira/browse/YARN-3845) | Scheduler page does not render RGBA color combinations in IE11 | Minor | . | Jagadesh Kiran N | Mohammad Shahid Khan | | [HDFS-8806](https://issues.apache.org/jira/browse/HDFS-8806) | Inconsistent metrics: number of missing blocks with replication factor 1 not properly cleared | Major | . | Zhe Zhang | Zhe Zhang | @@ -1868,13 +1869,13 @@ | [MAPREDUCE-6481](https://issues.apache.org/jira/browse/MAPREDUCE-6481) | LineRecordReader may give incomplete record and wrong position/key information for uncompressed input sometimes. | Critical | mrv2 | zhihai xu | zhihai xu | | [MAPREDUCE-5002](https://issues.apache.org/jira/browse/MAPREDUCE-5002) | AM could potentially allocate a reduce container to a map attempt | Major | mr-am | Jason Lowe | Chang Li | | [MAPREDUCE-5982](https://issues.apache.org/jira/browse/MAPREDUCE-5982) | Task attempts that fail from the ASSIGNED state can disappear | Major | mr-am | Jason Lowe | Chang Li | -| [HADOOP-12386](https://issues.apache.org/jira/browse/HADOOP-12386) | RetryPolicies.RETRY\_FOREVER should be able to specify a retry interval | Major | . | Wangda Tan | Sunil G | +| [HADOOP-12386](https://issues.apache.org/jira/browse/HADOOP-12386) | RetryPolicies.RETRY\_FOREVER should be able to specify a retry interval | Major | . | Wangda Tan | Sunil Govindan | | [YARN-3697](https://issues.apache.org/jira/browse/YARN-3697) | FairScheduler: ContinuousSchedulingThread can fail to shutdown | Critical | fairscheduler | zhihai xu | zhihai xu | | [HDFS-6955](https://issues.apache.org/jira/browse/HDFS-6955) | DN should reserve disk space for a full block when creating tmp files | Major | datanode | Arpit Agarwal | Kanaka Kumar Avvaru | | [HDFS-5802](https://issues.apache.org/jira/browse/HDFS-5802) | NameNode does not check for inode type before traversing down a path | Trivial | namenode | Harsh J | Xiao Chen | | [MAPREDUCE-6460](https://issues.apache.org/jira/browse/MAPREDUCE-6460) | TestRMContainerAllocator.testAttemptNotFoundCausesRMCommunicatorException fails | Major | test | zhihai xu | zhihai xu | | [YARN-4167](https://issues.apache.org/jira/browse/YARN-4167) | NPE on RMActiveServices#serviceStop when store is null | Minor | . | Bibin A Chundatt | Bibin A Chundatt | -| [YARN-4113](https://issues.apache.org/jira/browse/YARN-4113) | RM should respect retry-interval when uses RetryPolicies.RETRY\_FOREVER | Critical | . | Wangda Tan | Sunil G | +| [YARN-4113](https://issues.apache.org/jira/browse/YARN-4113) | RM should respect retry-interval when uses RetryPolicies.RETRY\_FOREVER | Critical | . | Wangda Tan | Sunil Govindan | | [YARN-4188](https://issues.apache.org/jira/browse/YARN-4188) | MoveApplicationAcrossQueuesResponse should be an abstract class | Minor | resourcemanager | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | | [HDFS-9043](https://issues.apache.org/jira/browse/HDFS-9043) | Doc updation for commands in HDFS Federation | Minor | documentation | J.Andreina | J.Andreina | | [HDFS-9013](https://issues.apache.org/jira/browse/HDFS-9013) | Deprecate NameNodeMXBean#getNNStarted in branch2 and remove from trunk | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | @@ -1882,7 +1883,7 @@ | [HADOOP-12438](https://issues.apache.org/jira/browse/HADOOP-12438) | Reset RawLocalFileSystem.useDeprecatedFileStatus in TestLocalFileSystem | Trivial | test | Chris Nauroth | Chris Nauroth | | [HDFS-9128](https://issues.apache.org/jira/browse/HDFS-9128) | TestWebHdfsFileContextMainOperations and TestSWebHdfsFileContextMainOperations fail due to invalid HDFS path on Windows. | Trivial | test | Chris Nauroth | Chris Nauroth | | [YARN-4152](https://issues.apache.org/jira/browse/YARN-4152) | NM crash with NPE when LogAggregationService#stopContainer called for absent container | Critical | log-aggregation, nodemanager | Bibin A Chundatt | Bibin A Chundatt | -| [YARN-4044](https://issues.apache.org/jira/browse/YARN-4044) | Running applications information changes such as movequeue is not published to TimeLine server | Critical | resourcemanager, timelineserver | Sunil G | Sunil G | +| [YARN-4044](https://issues.apache.org/jira/browse/YARN-4044) | Running applications information changes such as movequeue is not published to TimeLine server | Critical | resourcemanager, timelineserver | Sunil Govindan | Sunil Govindan | | [HDFS-9076](https://issues.apache.org/jira/browse/HDFS-9076) | Log full path instead of inodeId in DFSClient#closeAllFilesBeingWritten() | Major | hdfs-client | Surendra Singh Lilhore | Surendra Singh Lilhore | | [MAPREDUCE-6484](https://issues.apache.org/jira/browse/MAPREDUCE-6484) | Yarn Client uses local address instead of RM address as token renewer in a secure cluster when RM HA is enabled. | Major | client, security | zhihai xu | zhihai xu | | [HADOOP-12437](https://issues.apache.org/jira/browse/HADOOP-12437) | Allow SecurityUtil to lookup alternate hostnames | Major | net, security | Arpit Agarwal | Arpit Agarwal | @@ -1978,7 +1979,7 @@ | [HADOOP-12484](https://issues.apache.org/jira/browse/HADOOP-12484) | Single File Rename Throws Incorrectly In Potential Race Condition Scenarios | Major | tools | Gaurav Kanade | Gaurav Kanade | | [HDFS-9286](https://issues.apache.org/jira/browse/HDFS-9286) | HttpFs does not parse ACL syntax correctly for operation REMOVEACLENTRIES | Major | fs | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-4009](https://issues.apache.org/jira/browse/YARN-4009) | CORS support for ResourceManager REST API | Major | . | Prakash Ramachandran | Varun Vasudev | -| [YARN-4041](https://issues.apache.org/jira/browse/YARN-4041) | Slow delegation token renewal can severely prolong RM recovery | Major | resourcemanager | Jason Lowe | Sunil G | +| [YARN-4041](https://issues.apache.org/jira/browse/YARN-4041) | Slow delegation token renewal can severely prolong RM recovery | Major | resourcemanager | Jason Lowe | Sunil Govindan | | [HDFS-9290](https://issues.apache.org/jira/browse/HDFS-9290) | DFSClient#callAppend() is not backward compatible for slightly older NameNodes | Blocker | . | Tony Wu | Tony Wu | | [HDFS-9301](https://issues.apache.org/jira/browse/HDFS-9301) | HDFS clients can't construct HdfsConfiguration instances | Major | . | Steve Loughran | Mingliang Liu | | [YARN-4294](https://issues.apache.org/jira/browse/YARN-4294) | [JDK8] Fix javadoc errors caused by wrong reference and illegal tag | Blocker | build, documentation | Akira Ajisaka | Akira Ajisaka | @@ -1996,7 +1997,7 @@ | [HADOOP-12515](https://issues.apache.org/jira/browse/HADOOP-12515) | Mockito dependency is missing in hadoop-kafka module | Major | test | Kai Zheng | Kai Zheng | | [YARN-4302](https://issues.apache.org/jira/browse/YARN-4302) | SLS not able start due to NPE in SchedulerApplicationAttempt#getResourceUsageReport | Major | . | Bibin A Chundatt | Bibin A Chundatt | | [HADOOP-12178](https://issues.apache.org/jira/browse/HADOOP-12178) | NPE during handling of SASL setup if problem with SASL resolver class | Minor | ipc | Steve Loughran | Steve Loughran | -| [HADOOP-12494](https://issues.apache.org/jira/browse/HADOOP-12494) | fetchdt stores the token based on token kind instead of token service | Major | . | HeeSoo Kim | HeeSoo Kim | +| [HADOOP-12494](https://issues.apache.org/jira/browse/HADOOP-12494) | fetchdt stores the token based on token kind instead of token service | Major | . | Heesoo Kim | Heesoo Kim | | [HADOOP-11685](https://issues.apache.org/jira/browse/HADOOP-11685) | StorageException complaining " no lease ID" during HBase distributed log splitting | Major | tools | Duo Xu | Duo Xu | | [HDFS-9231](https://issues.apache.org/jira/browse/HDFS-9231) | fsck doesn't list correct file path when Bad Replicas/Blocks are in a snapshot | Major | snapshots | Xiao Chen | Xiao Chen | | [HDFS-9302](https://issues.apache.org/jira/browse/HDFS-9302) | WebHDFS truncate throws NullPointerException if newLength is not provided | Minor | webhdfs | Karthik Palaniappan | Jagadesh Kiran N | @@ -2006,7 +2007,7 @@ | [HDFS-9044](https://issues.apache.org/jira/browse/HDFS-9044) | Give Priority to FavouredNodes , before selecting nodes from FavouredNode's Node Group | Major | . | J.Andreina | J.Andreina | | [YARN-4130](https://issues.apache.org/jira/browse/YARN-4130) | Duplicate declaration of ApplicationId in RMAppManager#submitApplication method | Trivial | resourcemanager | Kai Sasaki | Kai Sasaki | | [YARN-4288](https://issues.apache.org/jira/browse/YARN-4288) | NodeManager restart should keep retrying to register to RM while connection exception happens during RM failed over. | Critical | nodemanager | Junping Du | Junping Du | -| [MAPREDUCE-6515](https://issues.apache.org/jira/browse/MAPREDUCE-6515) | Update Application priority in AM side from AM-RM heartbeat | Major | applicationmaster | Sunil G | Sunil G | +| [MAPREDUCE-6515](https://issues.apache.org/jira/browse/MAPREDUCE-6515) | Update Application priority in AM side from AM-RM heartbeat | Major | applicationmaster | Sunil Govindan | Sunil Govindan | | [HDFS-9332](https://issues.apache.org/jira/browse/HDFS-9332) | Fix Precondition failures from NameNodeEditLogRoller while saving namespace | Major | . | Andrew Wang | Andrew Wang | | [YARN-4313](https://issues.apache.org/jira/browse/YARN-4313) | Race condition in MiniMRYarnCluster when getting history server address | Major | . | Jian He | Jian He | | [YARN-4320](https://issues.apache.org/jira/browse/YARN-4320) | TestJobHistoryEventHandler fails as AHS in MiniYarnCluster no longer binds to default port 8188 | Major | . | Varun Saxena | Varun Saxena | @@ -2078,7 +2079,7 @@ | [YARN-4344](https://issues.apache.org/jira/browse/YARN-4344) | NMs reconnecting with changed capabilities can lead to wrong cluster resource calculations | Critical | resourcemanager | Varun Vasudev | Varun Vasudev | | [HADOOP-9822](https://issues.apache.org/jira/browse/HADOOP-9822) | create constant MAX\_CAPACITY in RetryCache rather than hard-coding 16 in RetryCache constructor | Minor | . | Tsuyoshi Ozawa | Tsuyoshi Ozawa | | [HDFS-9423](https://issues.apache.org/jira/browse/HDFS-9423) | Fix intermittent failure of TestEditLogTailer | Minor | test | Masatake Iwasaki | Masatake Iwasaki | -| [YARN-4298](https://issues.apache.org/jira/browse/YARN-4298) | Fix findbugs warnings in hadoop-yarn-common | Minor | . | Varun Saxena | Sunil G | +| [YARN-4298](https://issues.apache.org/jira/browse/YARN-4298) | Fix findbugs warnings in hadoop-yarn-common | Minor | . | Varun Saxena | Sunil Govindan | | [YARN-4387](https://issues.apache.org/jira/browse/YARN-4387) | Fix typo in FairScheduler log message | Minor | fairscheduler | Xin Wang | Xin Wang | | [HDFS-6101](https://issues.apache.org/jira/browse/HDFS-6101) | TestReplaceDatanodeOnFailure fails occasionally | Major | test | Arpit Agarwal | Wei-Chiu Chuang | | [HDFS-8855](https://issues.apache.org/jira/browse/HDFS-8855) | Webhdfs client leaks active NameNode connections | Major | webhdfs | Bob Hansen | Xiaobing Zhou | @@ -2122,7 +2123,7 @@ | [HDFS-9519](https://issues.apache.org/jira/browse/HDFS-9519) | Some coding improvement in SecondaryNameNode#main | Major | namenode | Yongjun Zhang | Xiao Chen | | [HDFS-9514](https://issues.apache.org/jira/browse/HDFS-9514) | TestDistributedFileSystem.testDFSClientPeerWriteTimeout failing; exception being swallowed | Major | hdfs-client, test | Steve Loughran | Wei-Chiu Chuang | | [HDFS-9535](https://issues.apache.org/jira/browse/HDFS-9535) | Newly completed blocks in IBR should not be considered under-replicated too quickly | Major | namenode | Jing Zhao | Mingliang Liu | -| [YARN-4418](https://issues.apache.org/jira/browse/YARN-4418) | AM Resource Limit per partition can be updated to ResourceUsage as well | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-4418](https://issues.apache.org/jira/browse/YARN-4418) | AM Resource Limit per partition can be updated to ResourceUsage as well | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HADOOP-12638](https://issues.apache.org/jira/browse/HADOOP-12638) | UnsatisfiedLinkError while checking ISA-L in checknative command. | Minor | native | Kai Sasaki | Kai Sasaki | | [YARN-4403](https://issues.apache.org/jira/browse/YARN-4403) | (AM/NM/Container)LivelinessMonitor should use monotonic time when calculating period | Critical | . | Junping Du | Junping Du | | [YARN-4402](https://issues.apache.org/jira/browse/YARN-4402) | TestNodeManagerShutdown And TestNodeManagerResync fails with bind exception | Major | test | Brahma Reddy Battula | Brahma Reddy Battula | @@ -2203,7 +2204,7 @@ | [HADOOP-12653](https://issues.apache.org/jira/browse/HADOOP-12653) | Use SO\_REUSEADDR to avoid getting "Address already in use" when using kerberos and attempting to bind to any port on the local IP address | Major | net | Colin P. McCabe | Colin P. McCabe | | [HADOOP-12603](https://issues.apache.org/jira/browse/HADOOP-12603) | TestSymlinkLocalFSFileContext#testSetTimesSymlinkToDir occasionally fail | Major | test | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-4571](https://issues.apache.org/jira/browse/YARN-4571) | Make app id/name available to the yarn authorizer provider for better auditing | Major | . | Jian He | Jian He | -| [YARN-4551](https://issues.apache.org/jira/browse/YARN-4551) | Address the duplication between StatusUpdateWhenHealthy and StatusUpdateWhenUnhealthy transitions | Minor | nodemanager | Karthik Kambatla | Sunil G | +| [YARN-4551](https://issues.apache.org/jira/browse/YARN-4551) | Address the duplication between StatusUpdateWhenHealthy and StatusUpdateWhenUnhealthy transitions | Minor | nodemanager | Karthik Kambatla | Sunil Govindan | | [HDFS-9517](https://issues.apache.org/jira/browse/HDFS-9517) | Fix missing @Test annotation on TestDistCpUtils.testUnpackAttributes | Trivial | distcp | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-4534](https://issues.apache.org/jira/browse/YARN-4534) | Remove the redundant symbol in yarn rmadmin help msg | Trivial | . | Yiqun Lin | Yiqun Lin | | [HADOOP-12700](https://issues.apache.org/jira/browse/HADOOP-12700) | Remove unused import in TestCompressorDecompressor.java | Minor | . | John Zhuge | John Zhuge | @@ -2213,7 +2214,7 @@ | [HADOOP-12706](https://issues.apache.org/jira/browse/HADOOP-12706) | TestLocalFsFCStatistics#testStatisticsThreadLocalDataCleanUp times out occasionally | Major | test | Jason Lowe | Sangjin Lee | | [YARN-4581](https://issues.apache.org/jira/browse/YARN-4581) | AHS writer thread leak makes RM crash while RM is recovering | Major | resourcemanager | sandflee | sandflee | | [MAPREDUCE-6554](https://issues.apache.org/jira/browse/MAPREDUCE-6554) | MRAppMaster servicestart failing with NPE in MRAppMaster#parsePreviousJobHistory | Critical | . | Bibin A Chundatt | Bibin A Chundatt | -| [YARN-4389](https://issues.apache.org/jira/browse/YARN-4389) | "yarn.am.blacklisting.enabled" and "yarn.am.blacklisting.disable-failure-threshold" should be app specific rather than a setting for whole YARN cluster | Critical | applications | Junping Du | Sunil G | +| [YARN-4389](https://issues.apache.org/jira/browse/YARN-4389) | "yarn.am.blacklisting.enabled" and "yarn.am.blacklisting.disable-failure-threshold" should be app specific rather than a setting for whole YARN cluster | Critical | applications | Junping Du | Sunil Govindan | | [HDFS-9612](https://issues.apache.org/jira/browse/HDFS-9612) | DistCp worker threads are not terminated after jobs are done. | Major | distcp | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-9624](https://issues.apache.org/jira/browse/HDFS-9624) | DataNode start slowly due to the initial DU command operations | Major | . | Yiqun Lin | Yiqun Lin | | [HADOOP-12712](https://issues.apache.org/jira/browse/HADOOP-12712) | Fix some cmake plugin and native build warnings | Minor | native | Colin P. McCabe | Colin P. McCabe | @@ -2412,7 +2413,7 @@ | [HDFS-10197](https://issues.apache.org/jira/browse/HDFS-10197) | TestFsDatasetCache failing intermittently due to timeout | Major | test | Yiqun Lin | Yiqun Lin | | [HDFS-9478](https://issues.apache.org/jira/browse/HDFS-9478) | Reason for failing ipc.FairCallQueue contruction should be thrown | Minor | . | Archana T | Ajith S | | [HDFS-10228](https://issues.apache.org/jira/browse/HDFS-10228) | TestHDFSCLI fails | Major | test | Akira Ajisaka | Akira Ajisaka | -| [YARN-4865](https://issues.apache.org/jira/browse/YARN-4865) | Track Reserved resources in ResourceUsage and QueueCapacities | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-4865](https://issues.apache.org/jira/browse/YARN-4865) | Track Reserved resources in ResourceUsage and QueueCapacities | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HADOOP-12972](https://issues.apache.org/jira/browse/HADOOP-12972) | Lz4Compressor#getLibraryName returns the wrong version number | Trivial | native | John Zhuge | Colin P. McCabe | | [HDFS-5177](https://issues.apache.org/jira/browse/HDFS-5177) | blocksScheduled count should be decremented for abandoned blocks | Major | namenode | Vinayakumar B | Vinayakumar B | | [HDFS-10223](https://issues.apache.org/jira/browse/HDFS-10223) | peerFromSocketAndKey performs SASL exchange before setting connection timeouts | Major | . | Colin P. McCabe | Colin P. McCabe | @@ -2428,7 +2429,7 @@ | [HDFS-10178](https://issues.apache.org/jira/browse/HDFS-10178) | Permanent write failures can happen if pipeline recoveries occur for the first packet | Critical | . | Kihwal Lee | Kihwal Lee | | [HDFS-8496](https://issues.apache.org/jira/browse/HDFS-8496) | Calling stopWriter() with FSDatasetImpl lock held may block other threads | Major | . | zhouyingchao | Colin P. McCabe | | [HDFS-9917](https://issues.apache.org/jira/browse/HDFS-9917) | IBR accumulate more objects when SNN was down for sometime. | Critical | . | Brahma Reddy Battula | Brahma Reddy Battula | -| [YARN-4880](https://issues.apache.org/jira/browse/YARN-4880) | Running TestZKRMStateStorePerf with real zookeeper cluster throws NPE | Major | . | Rohith Sharma K S | Sunil G | +| [YARN-4880](https://issues.apache.org/jira/browse/YARN-4880) | Running TestZKRMStateStorePerf with real zookeeper cluster throws NPE | Major | . | Rohith Sharma K S | Sunil Govindan | | [YARN-4609](https://issues.apache.org/jira/browse/YARN-4609) | RM Nodes list page takes too much time to load | Major | webapp | Bibin A Chundatt | Bibin A Chundatt | | [HDFS-10239](https://issues.apache.org/jira/browse/HDFS-10239) | Fsshell mv fails if port usage doesn't match in src and destination paths | Major | . | Kuhu Shukla | Kuhu Shukla | | [YARN-4893](https://issues.apache.org/jira/browse/YARN-4893) | Fix some intermittent test failures in TestRMAdminService | Blocker | . | Junping Du | Brahma Reddy Battula | @@ -2436,7 +2437,7 @@ | [YARN-4915](https://issues.apache.org/jira/browse/YARN-4915) | Fix typo in YARN Secure Containers documentation | Trivial | documentation, yarn | Takashi Ohnishi | Takashi Ohnishi | | [YARN-4917](https://issues.apache.org/jira/browse/YARN-4917) | Fix typos in documentation of Capacity Scheduler. | Minor | documentation | Takashi Ohnishi | Takashi Ohnishi | | [HDFS-10261](https://issues.apache.org/jira/browse/HDFS-10261) | TestBookKeeperHACheckpoints doesn't handle ephemeral HTTP ports | Major | . | Eric Badger | Eric Badger | -| [YARN-4699](https://issues.apache.org/jira/browse/YARN-4699) | Scheduler UI and REST o/p is not in sync when -replaceLabelsOnNode is used to change label of a node | Critical | capacity scheduler | Sunil G | Sunil G | +| [YARN-4699](https://issues.apache.org/jira/browse/YARN-4699) | Scheduler UI and REST o/p is not in sync when -replaceLabelsOnNode is used to change label of a node | Critical | capacity scheduler | Sunil Govindan | Sunil Govindan | | [YARN-4906](https://issues.apache.org/jira/browse/YARN-4906) | Capture container start/finish time in container metrics | Major | . | Jian He | Jian He | | [HADOOP-12022](https://issues.apache.org/jira/browse/HADOOP-12022) | fix site -Pdocs -Pdist in hadoop-project-dist; cleanout remaining forrest bits | Blocker | build | Allen Wittenauer | Allen Wittenauer | | [MAPREDUCE-6670](https://issues.apache.org/jira/browse/MAPREDUCE-6670) | TestJobListCache#testEviction sometimes fails on Windows with timeout | Minor | test | Gergely Novák | Gergely Novák | @@ -2470,7 +2471,7 @@ | [HDFS-10283](https://issues.apache.org/jira/browse/HDFS-10283) | o.a.h.hdfs.server.namenode.TestFSImageWithSnapshot#testSaveLoadImageWithAppending fails intermittently | Major | test | Mingliang Liu | Mingliang Liu | | [YARN-4940](https://issues.apache.org/jira/browse/YARN-4940) | yarn node -list -all failed if RM start with decommissioned node | Major | . | sandflee | sandflee | | [YARN-4965](https://issues.apache.org/jira/browse/YARN-4965) | Distributed shell AM failed due to ClientHandlerException thrown by jersey | Critical | . | Sumana Sathish | Junping Du | -| [YARN-4934](https://issues.apache.org/jira/browse/YARN-4934) | Reserved Resource for QueueMetrics needs to be handled correctly in few cases | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-4934](https://issues.apache.org/jira/browse/YARN-4934) | Reserved Resource for QueueMetrics needs to be handled correctly in few cases | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [HDFS-10291](https://issues.apache.org/jira/browse/HDFS-10291) | TestShortCircuitLocalRead failing | Major | test | Steve Loughran | Steve Loughran | | [HDFS-10275](https://issues.apache.org/jira/browse/HDFS-10275) | TestDataNodeMetrics failing intermittently due to TotalWriteTime counted incorrectly | Major | test | Yiqun Lin | Yiqun Lin | | [MAPREDUCE-6649](https://issues.apache.org/jira/browse/MAPREDUCE-6649) | getFailureInfo not returning any failure info | Major | . | Eric Badger | Eric Badger | @@ -2527,7 +2528,6 @@ | [HDFS-10324](https://issues.apache.org/jira/browse/HDFS-10324) | Trash directory in an encryption zone should be pre-created with correct permissions | Major | encryption | Wei-Chiu Chuang | Wei-Chiu Chuang | | [MAPREDUCE-6514](https://issues.apache.org/jira/browse/MAPREDUCE-6514) | Job hangs as ask is not updated after ramping down of all reducers | Blocker | applicationmaster | Varun Saxena | Varun Saxena | | [HDFS-2043](https://issues.apache.org/jira/browse/HDFS-2043) | TestHFlush failing intermittently | Major | test | Aaron T. Myers | Yiqun Lin | -| [HADOOP-12751](https://issues.apache.org/jira/browse/HADOOP-12751) | While using kerberos Hadoop incorrectly assumes names with '@' to be non-simple | Critical | security | Bolke de Bruin | Bolke de Bruin | | [YARN-5002](https://issues.apache.org/jira/browse/YARN-5002) | getApplicationReport call may raise NPE for removed queues | Critical | . | Sumana Sathish | Jian He | | [MAPREDUCE-6689](https://issues.apache.org/jira/browse/MAPREDUCE-6689) | MapReduce job can infinitely increase number of reducer resource requests | Blocker | . | Wangda Tan | Wangda Tan | | [YARN-4747](https://issues.apache.org/jira/browse/YARN-4747) | AHS error 500 due to NPE when container start event is missing | Major | timelineserver | Jason Lowe | Varun Saxena | @@ -2593,7 +2593,7 @@ | [YARN-4866](https://issues.apache.org/jira/browse/YARN-4866) | FairScheduler: AMs can consume all vcores leading to a livelock when using FAIR policy | Major | fairscheduler | Karthik Kambatla | Yufei Gu | | [HDFS-10463](https://issues.apache.org/jira/browse/HDFS-10463) | TestRollingFileSystemSinkWithHdfs needs some cleanup | Critical | . | Daniel Templeton | Daniel Templeton | | [YARN-5166](https://issues.apache.org/jira/browse/YARN-5166) | javadoc:javadoc goal fails on hadoop-yarn-client | Major | . | Andras Bokor | Andras Bokor | -| [HDFS-10276](https://issues.apache.org/jira/browse/HDFS-10276) | HDFS should not expose path info that user has no permission to see. | Major | . | Kevin Cox | Yuanbo Liu | +| [HDFS-10276](https://issues.apache.org/jira/browse/HDFS-10276) | HDFS should not expose path info that user has no permission to see. | Major | fs, security | Kevin Cox | Yuanbo Liu | | [YARN-5132](https://issues.apache.org/jira/browse/YARN-5132) | Exclude generated protobuf sources from YARN Javadoc build | Critical | . | Subru Krishnan | Subru Krishnan | | [HADOOP-13132](https://issues.apache.org/jira/browse/HADOOP-13132) | Handle ClassCastException on AuthenticationException in LoadBalancingKMSClientProvider | Major | kms | Miklos Szurap | Wei-Chiu Chuang | | [HDFS-10415](https://issues.apache.org/jira/browse/HDFS-10415) | TestDistributedFileSystem#MyDistributedFileSystem attempts to set up statistics before initialize() is called | Major | test | Sangjin Lee | Mingliang Liu | @@ -2611,7 +2611,7 @@ | [YARN-5206](https://issues.apache.org/jira/browse/YARN-5206) | RegistrySecurity includes id:pass in exception text if considered invalid | Minor | client, security | Steve Loughran | Steve Loughran | | [HDFS-10220](https://issues.apache.org/jira/browse/HDFS-10220) | A large number of expired leases can make namenode unresponsive and cause failover | Major | namenode | Nicolas Fraison | Nicolas Fraison | | [MAPREDUCE-6240](https://issues.apache.org/jira/browse/MAPREDUCE-6240) | Hadoop client displays confusing error message | Major | client | Mohammad Kamrul Islam | Gera Shegalov | -| [YARN-4308](https://issues.apache.org/jira/browse/YARN-4308) | ContainersAggregated CPU resource utilization reports negative usage in first few heartbeats | Major | nodemanager | Sunil G | Sunil G | +| [YARN-4308](https://issues.apache.org/jira/browse/YARN-4308) | ContainersAggregated CPU resource utilization reports negative usage in first few heartbeats | Major | nodemanager | Sunil Govindan | Sunil Govindan | | [HDFS-10508](https://issues.apache.org/jira/browse/HDFS-10508) | DFSInputStream should set thread's interrupt status after catching InterruptException from sleep | Major | . | Jing Zhao | Jing Zhao | | [HADOOP-13249](https://issues.apache.org/jira/browse/HADOOP-13249) | RetryInvocationHandler need wrap InterruptedException in IOException when call Thread.sleep | Major | ipc | zhihai xu | zhihai xu | | [HADOOP-13213](https://issues.apache.org/jira/browse/HADOOP-13213) | Small Documentation bug with AuthenticatedURL in hadoop-auth | Minor | documentation | Tom Ellis | Tom Ellis | @@ -2639,7 +2639,6 @@ | [HDFS-10448](https://issues.apache.org/jira/browse/HDFS-10448) | CacheManager#addInternal tracks bytesNeeded incorrectly when dealing with replication factors other than 1 | Major | caching | Yiqun Lin | Yiqun Lin | | [HADOOP-13287](https://issues.apache.org/jira/browse/HADOOP-13287) | TestS3ACredentials#testInstantiateFromURL fails if AWS secret key contains '+'. | Minor | fs/s3, test | Chris Nauroth | Chris Nauroth | | [MAPREDUCE-6197](https://issues.apache.org/jira/browse/MAPREDUCE-6197) | Cache MapOutputLocations in ShuffleHandler | Major | . | Siddharth Seth | Junping Du | -| [MAPREDUCE-6641](https://issues.apache.org/jira/browse/MAPREDUCE-6641) | TestTaskAttempt fails in trunk | Major | test | Tsuyoshi Ozawa | Haibo Chen | | [YARN-5266](https://issues.apache.org/jira/browse/YARN-5266) | Wrong exit code while trying to get app logs using regex via CLI | Critical | yarn | Sumana Sathish | Xuan Gong | | [HDFS-10561](https://issues.apache.org/jira/browse/HDFS-10561) | test\_native\_mini\_dfs fails by NoClassDefFoundError | Major | native, test | Akira Ajisaka | Akira Ajisaka | | [HDFS-10555](https://issues.apache.org/jira/browse/HDFS-10555) | Unable to loadFSEdits due to a failure in readCachePoolInfo | Critical | caching, namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | @@ -2716,7 +2715,6 @@ | [HDFS-10688](https://issues.apache.org/jira/browse/HDFS-10688) | BPServiceActor may run into a tight loop for sending block report when hitting IOException | Major | datanode | Jing Zhao | Chen Liang | | [HDFS-10671](https://issues.apache.org/jira/browse/HDFS-10671) | Fix typo in HdfsRollingUpgrade.md | Trivial | documentation | Yiqun Lin | Yiqun Lin | | [HADOOP-13422](https://issues.apache.org/jira/browse/HADOOP-13422) | ZKDelegationTokenSecretManager JaasConfig does not work well with other ZK users in process | Major | . | Sergey Shelukhin | Sergey Shelukhin | -| [YARN-5195](https://issues.apache.org/jira/browse/YARN-5195) | RM intermittently crashed with NPE while handling APP\_ATTEMPT\_REMOVED event when async-scheduling enabled in CapacityScheduler | Major | resourcemanager | Karam Singh | sandflee | | [HADOOP-9427](https://issues.apache.org/jira/browse/HADOOP-9427) | Use JUnit assumptions to skip platform-specific tests | Major | test | Arpit Agarwal | Gergely Novák | | [YARN-5441](https://issues.apache.org/jira/browse/YARN-5441) | Fixing minor Scheduler test case failures | Major | . | Subru Krishnan | Subru Krishnan | | [YARN-5431](https://issues.apache.org/jira/browse/YARN-5431) | TimeLineReader daemon start should allow to pass its own reader opts | Major | scripts, timelinereader | Rohith Sharma K S | Rohith Sharma K S | @@ -2832,6 +2830,9 @@ | [HDFS-10620](https://issues.apache.org/jira/browse/HDFS-10620) | StringBuilder created and appended even if logging is disabled | Major | namenode | Staffan Friberg | Staffan Friberg | | [YARN-4925](https://issues.apache.org/jira/browse/YARN-4925) | ContainerRequest in AMRMClient, application should be able to specify nodes/racks together with nodeLabelExpression | Major | . | Bibin A Chundatt | Bibin A Chundatt | | [MAPREDUCE-6433](https://issues.apache.org/jira/browse/MAPREDUCE-6433) | launchTime may be negative | Major | jobhistoryserver, mrv2 | Allen Wittenauer | zhihai xu | +| [MAPREDUCE-6641](https://issues.apache.org/jira/browse/MAPREDUCE-6641) | TestTaskAttempt fails in trunk | Major | test | Tsuyoshi Ozawa | Haibo Chen | +| [YARN-5195](https://issues.apache.org/jira/browse/YARN-5195) | RM intermittently crashed with NPE while handling APP\_ATTEMPT\_REMOVED event when async-scheduling enabled in CapacityScheduler | Major | resourcemanager | Karam Singh | sandflee | +| [HADOOP-12751](https://issues.apache.org/jira/browse/HADOOP-12751) | While using kerberos Hadoop incorrectly assumes names with '@' to be non-simple | Critical | security | Bolke de Bruin | Bolke de Bruin | ### TESTS: @@ -2874,7 +2875,7 @@ | [YARN-3956](https://issues.apache.org/jira/browse/YARN-3956) | Fix TestNodeManagerHardwareUtils fails on Mac | Minor | nodemanager | Varun Vasudev | Varun Vasudev | | [HDFS-8834](https://issues.apache.org/jira/browse/HDFS-8834) | TestReplication#testReplicationWhenBlockCorruption is not valid after HDFS-6482 | Minor | datanode | Lei (Eddy) Xu | Lei (Eddy) Xu | | [HADOOP-10854](https://issues.apache.org/jira/browse/HADOOP-10854) | unit tests for the shell scripts | Major | scripts | Allen Wittenauer | Allen Wittenauer | -| [YARN-3992](https://issues.apache.org/jira/browse/YARN-3992) | TestApplicationPriority.testApplicationPriorityAllocation fails intermittently | Major | . | Zhijie Shen | Sunil G | +| [YARN-3992](https://issues.apache.org/jira/browse/YARN-3992) | TestApplicationPriority.testApplicationPriorityAllocation fails intermittently | Major | . | Zhijie Shen | Sunil Govindan | | [HDFS-2070](https://issues.apache.org/jira/browse/HDFS-2070) | Add more unit tests for FsShell getmerge | Major | test | XieXianshan | Daniel Templeton | | [MAPREDUCE-5045](https://issues.apache.org/jira/browse/MAPREDUCE-5045) | UtilTest#isCygwin method appears to be unused | Trivial | contrib/streaming, test | Chris Nauroth | Neelesh Srinivas Salian | | [YARN-3573](https://issues.apache.org/jira/browse/YARN-3573) | MiniMRYarnCluster constructor that starts the timeline server using a boolean should be marked deprecated | Major | timelineserver | Mit Desai | Brahma Reddy Battula | @@ -2926,7 +2927,7 @@ | [YARN-5343](https://issues.apache.org/jira/browse/YARN-5343) | TestContinuousScheduling#testSortedNodes fails intermittently | Minor | . | sandflee | Yufei Gu | | [YARN-2398](https://issues.apache.org/jira/browse/YARN-2398) | TestResourceTrackerOnHA crashes | Major | test | Jason Lowe | Ajith S | | [YARN-5492](https://issues.apache.org/jira/browse/YARN-5492) | TestSubmitApplicationWithRMHA is failing sporadically during precommit builds | Major | test | Jason Lowe | Vrushali C | -| [YARN-5544](https://issues.apache.org/jira/browse/YARN-5544) | TestNodeBlacklistingOnAMFailures fails on trunk | Major | test | Varun Saxena | Sunil G | +| [YARN-5544](https://issues.apache.org/jira/browse/YARN-5544) | TestNodeBlacklistingOnAMFailures fails on trunk | Major | test | Varun Saxena | Sunil Govindan | | [HDFS-9745](https://issues.apache.org/jira/browse/HDFS-9745) | TestSecureNNWithQJM#testSecureMode sometimes fails with timeouts | Minor | . | Xiao Chen | Xiao Chen | | [YARN-5389](https://issues.apache.org/jira/browse/YARN-5389) | TestYarnClient#testReservationDelete fails | Major | . | Rohith Sharma K S | Sean Po | | [YARN-5560](https://issues.apache.org/jira/browse/YARN-5560) | Clean up bad exception catching practices in TestYarnClient | Major | . | Sean Po | Sean Po | @@ -2942,7 +2943,7 @@ | [HDFS-3292](https://issues.apache.org/jira/browse/HDFS-3292) | Remove the deprecated DistributedFileSystem.DiskStatus and the related methods | Major | hdfs-client | Tsz Wo Nicholas Sze | Arpit Gupta | | [HADOOP-9258](https://issues.apache.org/jira/browse/HADOOP-9258) | Add stricter tests to FileSystemContractTestBase | Major | test | Steve Loughran | Steve Loughran | | [MAPREDUCE-5189](https://issues.apache.org/jira/browse/MAPREDUCE-5189) | Basic AM changes to support preemption requests (per YARN-45) | Major | mr-am, mrv2 | Carlo Curino | Carlo Curino | -| [MAPREDUCE-5867](https://issues.apache.org/jira/browse/MAPREDUCE-5867) | Possible NPE in KillAMPreemptionPolicy related to ProportionalCapacityPreemptionPolicy | Major | resourcemanager | Sunil G | Sunil G | +| [MAPREDUCE-5867](https://issues.apache.org/jira/browse/MAPREDUCE-5867) | Possible NPE in KillAMPreemptionPolicy related to ProportionalCapacityPreemptionPolicy | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-6922](https://issues.apache.org/jira/browse/HDFS-6922) | Add LazyPersist flag to INodeFile, save it in FsImage and edit logs | Major | datanode | Arpit Agarwal | Arpit Agarwal | | [HDFS-7071](https://issues.apache.org/jira/browse/HDFS-7071) | Updated editsStored and editsStored.xml to bump layout version and add LazyPersist flag | Major | namenode | Xiaoyu Yao | Xiaoyu Yao | | [MAPREDUCE-6078](https://issues.apache.org/jira/browse/MAPREDUCE-6078) | native-task: fix gtest build on macosx | Trivial | task | Binglin Chang | Binglin Chang | @@ -3009,7 +3010,7 @@ | [YARN-1402](https://issues.apache.org/jira/browse/YARN-1402) | Related Web UI, CLI changes on exposing client API to check log aggregation status | Major | . | Xuan Gong | Xuan Gong | | [YARN-2696](https://issues.apache.org/jira/browse/YARN-2696) | Queue sorting in CapacityScheduler should consider node label | Major | capacityscheduler, resourcemanager | Wangda Tan | Wangda Tan | | [YARN-3487](https://issues.apache.org/jira/browse/YARN-3487) | CapacityScheduler scheduler lock obtained unnecessarily when calling getQueue | Critical | capacityscheduler | Jason Lowe | Jason Lowe | -| [YARN-3136](https://issues.apache.org/jira/browse/YARN-3136) | getTransferredContainers can be a bottleneck during AM registration | Major | scheduler | Jason Lowe | Sunil G | +| [YARN-3136](https://issues.apache.org/jira/browse/YARN-3136) | getTransferredContainers can be a bottleneck during AM registration | Major | scheduler | Jason Lowe | Sunil Govindan | | [HDFS-8169](https://issues.apache.org/jira/browse/HDFS-8169) | Move LocatedBlocks and related classes to hdfs-client | Major | build, hdfs-client | Haohui Mai | Haohui Mai | | [YARN-3463](https://issues.apache.org/jira/browse/YARN-3463) | Integrate OrderingPolicy Framework with CapacityScheduler | Major | capacityscheduler | Craig Welch | Craig Welch | | [MAPREDUCE-6327](https://issues.apache.org/jira/browse/MAPREDUCE-6327) | [Event producers] Implement MapReduce AM writing MR events/counters to v2 ATS | Major | . | Sangjin Lee | Junping Du | @@ -3047,22 +3048,22 @@ | [YARN-3134](https://issues.apache.org/jira/browse/YARN-3134) | [Storage implementation] Exploiting the option of using Phoenix to access HBase backend | Major | timelineserver | Zhijie Shen | Li Lu | | [YARN-3529](https://issues.apache.org/jira/browse/YARN-3529) | Add miniHBase cluster and Phoenix support to ATS v2 unit tests | Major | timelineserver | Li Lu | Li Lu | | [YARN-3634](https://issues.apache.org/jira/browse/YARN-3634) | TestMRTimelineEventHandling and TestApplication are broken | Major | timelineserver | Sangjin Lee | Sangjin Lee | -| [YARN-3579](https://issues.apache.org/jira/browse/YARN-3579) | CommonNodeLabelsManager should support NodeLabel instead of string label name when getting node-to-label/label-to-label mappings | Minor | resourcemanager | Sunil G | Sunil G | +| [YARN-3579](https://issues.apache.org/jira/browse/YARN-3579) | CommonNodeLabelsManager should support NodeLabel instead of string label name when getting node-to-label/label-to-label mappings | Minor | resourcemanager | Sunil Govindan | Sunil Govindan | | [MAPREDUCE-6337](https://issues.apache.org/jira/browse/MAPREDUCE-6337) | add a mode to replay MR job history files to the timeline service | Major | . | Sangjin Lee | Sangjin Lee | | [YARN-3505](https://issues.apache.org/jira/browse/YARN-3505) | Node's Log Aggregation Report with SUCCEED should not cached in RMApps | Critical | log-aggregation | Junping Du | Xuan Gong | | [HDFS-8403](https://issues.apache.org/jira/browse/HDFS-8403) | Eliminate retries in TestFileCreation#testOverwriteOpenForWrite | Major | test | Arpit Agarwal | Arpit Agarwal | | [HDFS-8157](https://issues.apache.org/jira/browse/HDFS-8157) | Writes to RAM DISK reserve locked memory for block files | Major | datanode | Arpit Agarwal | Arpit Agarwal | | [YARN-3541](https://issues.apache.org/jira/browse/YARN-3541) | Add version info on timeline service / generic history web UI and REST API | Major | timelineserver | Zhijie Shen | Zhijie Shen | | [YARN-3565](https://issues.apache.org/jira/browse/YARN-3565) | NodeHeartbeatRequest/RegisterNodeManagerRequest should use NodeLabel object instead of String | Blocker | api, client, resourcemanager | Wangda Tan | Naganarasimha G R | -| [YARN-3583](https://issues.apache.org/jira/browse/YARN-3583) | Support of NodeLabel object instead of plain String in YarnClient side. | Major | client | Sunil G | Sunil G | +| [YARN-3583](https://issues.apache.org/jira/browse/YARN-3583) | Support of NodeLabel object instead of plain String in YarnClient side. | Major | client | Sunil Govindan | Sunil Govindan | | [YARN-3609](https://issues.apache.org/jira/browse/YARN-3609) | Move load labels from storage from serviceInit to serviceStart to make it works with RM HA case. | Major | resourcemanager | Wangda Tan | Wangda Tan | | [YARN-3684](https://issues.apache.org/jira/browse/YARN-3684) | Change ContainerExecutor's primary lifecycle methods to use a more extensible mechanism for passing information. | Major | yarn | Sidharta Seethana | Sidharta Seethana | | [YARN-3411](https://issues.apache.org/jira/browse/YARN-3411) | [Storage implementation] explore & create the native HBase schema for writes | Critical | timelineserver | Sangjin Lee | Vrushali C | | [HDFS-8454](https://issues.apache.org/jira/browse/HDFS-8454) | Remove unnecessary throttling in TestDatanodeDeath | Major | test | Arpit Agarwal | Arpit Agarwal | | [MAPREDUCE-6370](https://issues.apache.org/jira/browse/MAPREDUCE-6370) | Timeline service v2 load generator needs to write event id | Major | . | Li Lu | Li Lu | | [YARN-3632](https://issues.apache.org/jira/browse/YARN-3632) | Ordering policy should be allowed to reorder an application when demand changes | Major | capacityscheduler | Craig Welch | Craig Welch | -| [YARN-3686](https://issues.apache.org/jira/browse/YARN-3686) | CapacityScheduler should trim default\_node\_label\_expression | Critical | api, client, resourcemanager | Wangda Tan | Sunil G | -| [YARN-3647](https://issues.apache.org/jira/browse/YARN-3647) | RMWebServices api's should use updated api from CommonNodeLabelsManager to get NodeLabel object | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-3686](https://issues.apache.org/jira/browse/YARN-3686) | CapacityScheduler should trim default\_node\_label\_expression | Critical | api, client, resourcemanager | Wangda Tan | Sunil Govindan | +| [YARN-3647](https://issues.apache.org/jira/browse/YARN-3647) | RMWebServices api's should use updated api from CommonNodeLabelsManager to get NodeLabel object | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-3581](https://issues.apache.org/jira/browse/YARN-3581) | Deprecate -directlyAccessNodeLabelStore in RMAdminCLI | Major | api, client, resourcemanager | Wangda Tan | Naganarasimha G R | | [HDFS-8482](https://issues.apache.org/jira/browse/HDFS-8482) | Rename BlockInfoContiguous to BlockInfo | Major | . | Zhe Zhang | Zhe Zhang | | [YARN-3700](https://issues.apache.org/jira/browse/YARN-3700) | ATS Web Performance issue at load time when large number of jobs | Major | resourcemanager, webapp, yarn | Xuan Gong | Xuan Gong | @@ -3088,7 +3089,7 @@ | [HDFS-8238](https://issues.apache.org/jira/browse/HDFS-8238) | Move ClientProtocol to the hdfs-client | Major | build | Haohui Mai | Takanobu Asanuma | | [HDFS-6249](https://issues.apache.org/jira/browse/HDFS-6249) | Output AclEntry in PBImageXmlWriter | Minor | tools | Akira Ajisaka | Surendra Singh Lilhore | | [YARN-3706](https://issues.apache.org/jira/browse/YARN-3706) | Generalize native HBase writer for additional tables | Minor | timelineserver | Joep Rottinghuis | Joep Rottinghuis | -| [YARN-3521](https://issues.apache.org/jira/browse/YARN-3521) | Support return structured NodeLabel objects in REST API | Major | api, client, resourcemanager | Wangda Tan | Sunil G | +| [YARN-3521](https://issues.apache.org/jira/browse/YARN-3521) | Support return structured NodeLabel objects in REST API | Major | api, client, resourcemanager | Wangda Tan | Sunil Govindan | | [HDFS-8192](https://issues.apache.org/jira/browse/HDFS-8192) | Eviction should key off used locked memory instead of ram disk free space | Major | datanode | Arpit Agarwal | Arpit Agarwal | | [YARN-3792](https://issues.apache.org/jira/browse/YARN-3792) | Test case failures in TestDistributedShell and some issue fixes related to ATSV2 | Major | timelineserver | Naganarasimha G R | Naganarasimha G R | | [HDFS-8651](https://issues.apache.org/jira/browse/HDFS-8651) | Make hadoop-hdfs-project Native code -Wall-clean | Major | native | Alan Burlison | Alan Burlison | @@ -3118,7 +3119,7 @@ | [YARN-3844](https://issues.apache.org/jira/browse/YARN-3844) | Make hadoop-yarn-project Native code -Wall-clean | Major | build | Alan Burlison | Alan Burlison | | [HDFS-8794](https://issues.apache.org/jira/browse/HDFS-8794) | Improve CorruptReplicasMap#corruptReplicasMap | Major | . | Yi Liu | Yi Liu | | [HDFS-7483](https://issues.apache.org/jira/browse/HDFS-7483) | Display information per tier on the Namenode UI | Major | . | Benoy Antony | Benoy Antony | -| [YARN-2003](https://issues.apache.org/jira/browse/YARN-2003) | Support for Application priority : Changes in RM and Capacity Scheduler | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-2003](https://issues.apache.org/jira/browse/YARN-2003) | Support for Application priority : Changes in RM and Capacity Scheduler | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-8721](https://issues.apache.org/jira/browse/HDFS-8721) | Add a metric for number of encryption zones | Major | encryption | Rakesh R | Rakesh R | | [YARN-1645](https://issues.apache.org/jira/browse/YARN-1645) | ContainerManager implementation to support container resizing | Major | nodemanager | Wangda Tan | MENG DING | | [HDFS-8495](https://issues.apache.org/jira/browse/HDFS-8495) | Consolidate append() related implementation into a single class | Major | namenode | Rakesh R | Rakesh R | @@ -3138,20 +3139,20 @@ | [MAPREDUCE-6394](https://issues.apache.org/jira/browse/MAPREDUCE-6394) | Speed up Task processing loop in HsTasksBlock#render() | Major | jobhistoryserver | Ray Chiang | Ray Chiang | | [HADOOP-7824](https://issues.apache.org/jira/browse/HADOOP-7824) | NativeIO.java flags and identifiers must be set correctly for each platform, not hardcoded to their Linux values | Major | native | Dmytro Shteflyuk | Martin Walsh | | [YARN-3543](https://issues.apache.org/jira/browse/YARN-3543) | ApplicationReport should be able to tell whether the Application is AM managed or not. | Major | api | Spandan Dutta | Rohith Sharma K S | -| [YARN-3993](https://issues.apache.org/jira/browse/YARN-3993) | Change to use the AM flag in ContainerContext determine AM container | Major | timelineserver | Zhijie Shen | Sunil G | +| [YARN-3993](https://issues.apache.org/jira/browse/YARN-3993) | Change to use the AM flag in ContainerContext determine AM container | Major | timelineserver | Zhijie Shen | Sunil Govindan | | [YARN-4004](https://issues.apache.org/jira/browse/YARN-4004) | container-executor should print output of docker logs if the docker container exits with non-0 exit status | Major | nodemanager | Varun Vasudev | Varun Vasudev | | [YARN-3736](https://issues.apache.org/jira/browse/YARN-3736) | Add RMStateStore apis to store and load accepted reservations for failover | Major | capacityscheduler, fairscheduler, resourcemanager | Subru Krishnan | Anubhav Dhoot | | [YARN-1643](https://issues.apache.org/jira/browse/YARN-1643) | Make ContainersMonitor can support change monitoring size of an allocated container in NM side | Major | nodemanager | Wangda Tan | MENG DING | | [YARN-3984](https://issues.apache.org/jira/browse/YARN-3984) | Rethink event column key issue | Major | timelineserver | Zhijie Shen | Vrushali C | | [YARN-3974](https://issues.apache.org/jira/browse/YARN-3974) | Refactor the reservation system test cases to use parameterized base test | Major | capacityscheduler, fairscheduler | Subru Krishnan | Subru Krishnan | -| [YARN-3948](https://issues.apache.org/jira/browse/YARN-3948) | Display Application Priority in RM Web UI | Major | webapp | Sunil G | Sunil G | +| [YARN-3948](https://issues.apache.org/jira/browse/YARN-3948) | Display Application Priority in RM Web UI | Major | webapp | Sunil Govindan | Sunil Govindan | | [YARN-3049](https://issues.apache.org/jira/browse/YARN-3049) | [Storage Implementation] Implement storage reader interface to fetch raw data from HBase backend | Major | timelineserver | Sangjin Lee | Zhijie Shen | -| [YARN-3873](https://issues.apache.org/jira/browse/YARN-3873) | pendingApplications in LeafQueue should also use OrderingPolicy | Major | capacityscheduler | Sunil G | Sunil G | -| [YARN-3887](https://issues.apache.org/jira/browse/YARN-3887) | Support for changing Application priority during runtime | Major | capacityscheduler, resourcemanager | Sunil G | Sunil G | +| [YARN-3873](https://issues.apache.org/jira/browse/YARN-3873) | pendingApplications in LeafQueue should also use OrderingPolicy | Major | capacityscheduler | Sunil Govindan | Sunil Govindan | +| [YARN-3887](https://issues.apache.org/jira/browse/YARN-3887) | Support for changing Application priority during runtime | Major | capacityscheduler, resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-8805](https://issues.apache.org/jira/browse/HDFS-8805) | Archival Storage: getStoragePolicy should not need superuser privilege | Major | balancer & mover, namenode | Hui Zheng | Brahma Reddy Battula | | [HDFS-8052](https://issues.apache.org/jira/browse/HDFS-8052) | Move WebHdfsFileSystem into hadoop-hdfs-client | Major | build | Haohui Mai | Haohui Mai | | [YARN-3906](https://issues.apache.org/jira/browse/YARN-3906) | split the application table from the entity table | Major | timelineserver | Sangjin Lee | Sangjin Lee | -| [YARN-4023](https://issues.apache.org/jira/browse/YARN-4023) | Publish Application Priority to TimelineServer | Major | timelineserver | Sunil G | Sunil G | +| [YARN-4023](https://issues.apache.org/jira/browse/YARN-4023) | Publish Application Priority to TimelineServer | Major | timelineserver | Sunil Govindan | Sunil Govindan | | [HDFS-8824](https://issues.apache.org/jira/browse/HDFS-8824) | Do not use small blocks for balancing the cluster | Major | balancer & mover | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | | [YARN-3534](https://issues.apache.org/jira/browse/YARN-3534) | Collect memory/cpu usage on the node | Major | nodemanager, resourcemanager | Íñigo Goiri | Íñigo Goiri | | [HDFS-8801](https://issues.apache.org/jira/browse/HDFS-8801) | Convert BlockInfoUnderConstruction as a feature | Major | namenode | Zhe Zhang | Jing Zhao | @@ -3177,7 +3178,7 @@ | [HDFS-8951](https://issues.apache.org/jira/browse/HDFS-8951) | Move the shortcircuit package to hdfs-client | Major | build | Mingliang Liu | Mingliang Liu | | [HDFS-8248](https://issues.apache.org/jira/browse/HDFS-8248) | Store INodeId instead of the INodeFile object in BlockInfoContiguous | Major | . | Haohui Mai | Haohui Mai | | [HDFS-8962](https://issues.apache.org/jira/browse/HDFS-8962) | Clean up checkstyle warnings in o.a.h.hdfs.DfsClientConf | Major | build | Mingliang Liu | Mingliang Liu | -| [YARN-3250](https://issues.apache.org/jira/browse/YARN-3250) | Support admin cli interface in for Application Priority | Major | resourcemanager | Sunil G | Rohith Sharma K S | +| [YARN-3250](https://issues.apache.org/jira/browse/YARN-3250) | Support admin cli interface in for Application Priority | Major | resourcemanager | Sunil Govindan | Rohith Sharma K S | | [HDFS-8925](https://issues.apache.org/jira/browse/HDFS-8925) | Move BlockReaderLocal to hdfs-client | Major | build | Mingliang Liu | Mingliang Liu | | [HDFS-8980](https://issues.apache.org/jira/browse/HDFS-8980) | Remove unnecessary block replacement in INodeFile | Major | namenode | Jing Zhao | Jing Zhao | | [HDFS-8990](https://issues.apache.org/jira/browse/HDFS-8990) | Move RemoteBlockReader to hdfs-client module | Major | build | Mingliang Liu | Mingliang Liu | @@ -3187,7 +3188,7 @@ | [YARN-3893](https://issues.apache.org/jira/browse/YARN-3893) | Both RM in active state when Admin#transitionToActive failure from refeshAll() | Critical | resourcemanager | Bibin A Chundatt | Bibin A Chundatt | | [HDFS-8890](https://issues.apache.org/jira/browse/HDFS-8890) | Allow admin to specify which blockpools the balancer should run on | Major | balancer & mover | Chris Trezzo | Chris Trezzo | | [YARN-4101](https://issues.apache.org/jira/browse/YARN-4101) | RM should print alert messages if Zookeeper and Resourcemanager gets connection issue | Critical | yarn | Yesha Vora | Xuan Gong | -| [YARN-3970](https://issues.apache.org/jira/browse/YARN-3970) | REST api support for Application Priority | Major | webapp | Sunil G | Naganarasimha G R | +| [YARN-3970](https://issues.apache.org/jira/browse/YARN-3970) | REST api support for Application Priority | Major | webapp | Sunil Govindan | Naganarasimha G R | | [HDFS-9002](https://issues.apache.org/jira/browse/HDFS-9002) | Move o.a.h.hdfs.net/\*Peer classes to hdfs-client | Major | build | Mingliang Liu | Mingliang Liu | | [HDFS-9012](https://issues.apache.org/jira/browse/HDFS-9012) | Move o.a.h.hdfs.protocol.datatransfer.PipelineAck class to hadoop-hdfs-client module | Major | build | Mingliang Liu | Mingliang Liu | | [HDFS-8984](https://issues.apache.org/jira/browse/HDFS-8984) | Move replication queues related methods in FSNamesystem to BlockManager | Major | . | Haohui Mai | Haohui Mai | @@ -3221,7 +3222,7 @@ | [HADOOP-11918](https://issues.apache.org/jira/browse/HADOOP-11918) | Listing an empty s3a root directory throws FileNotFound. | Minor | fs/s3 | Lei (Eddy) Xu | Lei (Eddy) Xu | | [HDFS-8053](https://issues.apache.org/jira/browse/HDFS-8053) | Move DFSIn/OutputStream and related classes to hadoop-hdfs-client | Major | build | Haohui Mai | Mingliang Liu | | [HDFS-8740](https://issues.apache.org/jira/browse/HDFS-8740) | Move DistributedFileSystem to hadoop-hdfs-client | Major | build | Yi Liu | Mingliang Liu | -| [YARN-4141](https://issues.apache.org/jira/browse/YARN-4141) | Runtime Application Priority change should not throw exception for applications at finishing states | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-4141](https://issues.apache.org/jira/browse/YARN-4141) | Runtime Application Priority change should not throw exception for applications at finishing states | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-9165](https://issues.apache.org/jira/browse/HDFS-9165) | Move entries in META-INF/services/o.a.h.fs.FileSystem to hdfs-client | Major | build | Haohui Mai | Mingliang Liu | | [HDFS-8971](https://issues.apache.org/jira/browse/HDFS-8971) | Remove guards when calling LOG.debug() and LOG.trace() in client package | Major | build | Mingliang Liu | Mingliang Liu | | [YARN-4203](https://issues.apache.org/jira/browse/YARN-4203) | Add request/response logging & timing for each REST endpoint call | Minor | timelineserver | Vrushali C | Varun Saxena | @@ -3252,7 +3253,7 @@ | [YARN-4237](https://issues.apache.org/jira/browse/YARN-4237) | Support additional queries for ATSv2 Web UI | Major | . | Varun Saxena | Varun Saxena | | [HADOOP-12475](https://issues.apache.org/jira/browse/HADOOP-12475) | Replace guava Cache with ConcurrentHashMap for caching Connection in ipc Client | Major | conf, io, ipc | Walter Su | Walter Su | | [YARN-4162](https://issues.apache.org/jira/browse/YARN-4162) | CapacityScheduler: Add resource usage by partition and queue capacity by partition to REST API | Major | api, client, resourcemanager | Naganarasimha G R | Naganarasimha G R | -| [YARN-4170](https://issues.apache.org/jira/browse/YARN-4170) | AM need to be notified with priority in AllocateResponse | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-4170](https://issues.apache.org/jira/browse/YARN-4170) | AM need to be notified with priority in AllocateResponse | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-2556](https://issues.apache.org/jira/browse/YARN-2556) | Tool to measure the performance of the timeline server | Major | timelineserver | Jonathan Eagles | Chang Li | | [YARN-4262](https://issues.apache.org/jira/browse/YARN-4262) | Allow whitelisted users to run privileged docker containers. | Major | yarn | Sidharta Seethana | Sidharta Seethana | | [YARN-4267](https://issues.apache.org/jira/browse/YARN-4267) | Add additional logging to container launch implementations in container-executor | Major | yarn | Sidharta Seethana | Sidharta Seethana | @@ -3267,7 +3268,7 @@ | [YARN-3724](https://issues.apache.org/jira/browse/YARN-3724) | Use POSIX nftw(3) instead of fts(3) | Major | . | Malcolm Kavalsky | Alan Burlison | | [YARN-2729](https://issues.apache.org/jira/browse/YARN-2729) | Support script based NodeLabelsProvider Interface in Distributed Node Label Configuration Setup | Major | nodemanager | Naganarasimha G R | Naganarasimha G R | | [HDFS-9304](https://issues.apache.org/jira/browse/HDFS-9304) | Add HdfsClientConfigKeys class to TestHdfsConfigFields#configurationClasses | Major | build | Mingliang Liu | Mingliang Liu | -| [YARN-3216](https://issues.apache.org/jira/browse/YARN-3216) | Max-AM-Resource-Percentage should respect node labels | Critical | resourcemanager | Wangda Tan | Sunil G | +| [YARN-3216](https://issues.apache.org/jira/browse/YARN-3216) | Max-AM-Resource-Percentage should respect node labels | Critical | resourcemanager | Wangda Tan | Sunil Govindan | | [HADOOP-12457](https://issues.apache.org/jira/browse/HADOOP-12457) | [JDK8] Fix a failure of compiling common by javadoc | Major | . | Tsuyoshi Ozawa | Akira Ajisaka | | [MAPREDUCE-6391](https://issues.apache.org/jira/browse/MAPREDUCE-6391) | util/Timer.cc completely misunderstands \_POSIX\_CPUTIME | Major | build | Alan Burlison | Alan Burlison | | [MAPREDUCE-6412](https://issues.apache.org/jira/browse/MAPREDUCE-6412) | Make hadoop-mapreduce-client Native code -Wall-clean | Major | build | Alan Burlison | Alan Burlison | @@ -3305,7 +3306,7 @@ | [HDFS-9438](https://issues.apache.org/jira/browse/HDFS-9438) | TestPipelinesFailover assumes Linux ifconfig | Minor | test | Alan Burlison | John Zhuge | | [YARN-4297](https://issues.apache.org/jira/browse/YARN-4297) | TestJobHistoryEventHandler and TestRMContainerAllocator failing on YARN-2928 branch | Major | . | Varun Saxena | Varun Saxena | | [YARN-3862](https://issues.apache.org/jira/browse/YARN-3862) | Support for fetching specific configs and metrics based on prefixes | Major | timelineserver | Varun Saxena | Varun Saxena | -| [YARN-4292](https://issues.apache.org/jira/browse/YARN-4292) | ResourceUtilization should be a part of NodeInfo REST API | Major | . | Wangda Tan | Sunil G | +| [YARN-4292](https://issues.apache.org/jira/browse/YARN-4292) | ResourceUtilization should be a part of NodeInfo REST API | Major | . | Wangda Tan | Sunil Govindan | | [HDFS-9436](https://issues.apache.org/jira/browse/HDFS-9436) | Make NNThroughputBenchmark$BlockReportStats run with 10 datanodes by default | Minor | test | Mingliang Liu | Mingliang Liu | | [HDFS-9484](https://issues.apache.org/jira/browse/HDFS-9484) | NNThroughputBenchmark$BlockReportStats should not send empty block reports | Major | test | Mingliang Liu | Mingliang Liu | | [YARN-4405](https://issues.apache.org/jira/browse/YARN-4405) | Support node label store in non-appendable file system | Major | api, client, resourcemanager | Wangda Tan | Wangda Tan | @@ -3321,11 +3322,11 @@ | [HDFS-9371](https://issues.apache.org/jira/browse/HDFS-9371) | Code cleanup for DatanodeManager | Major | namenode | Jing Zhao | Jing Zhao | | [YARN-4460](https://issues.apache.org/jira/browse/YARN-4460) | [Bug fix] RM fails to start when SMP is enabled | Major | timelineserver | Li Lu | Li Lu | | [YARN-4445](https://issues.apache.org/jira/browse/YARN-4445) | Unify the term flowId and flowName in timeline v2 codebase | Major | timelineserver | Li Lu | Zhan Zhang | -| [YARN-4293](https://issues.apache.org/jira/browse/YARN-4293) | ResourceUtilization should be a part of yarn node CLI | Major | . | Wangda Tan | Sunil G | +| [YARN-4293](https://issues.apache.org/jira/browse/YARN-4293) | ResourceUtilization should be a part of yarn node CLI | Major | . | Wangda Tan | Sunil Govindan | | [YARN-4416](https://issues.apache.org/jira/browse/YARN-4416) | Deadlock due to synchronised get Methods in AbstractCSQueue | Minor | capacity scheduler, resourcemanager | Naganarasimha G R | Naganarasimha G R | | [YARN-1856](https://issues.apache.org/jira/browse/YARN-1856) | cgroups based memory monitoring for containers | Major | nodemanager | Karthik Kambatla | Varun Vasudev | | [HDFS-9373](https://issues.apache.org/jira/browse/HDFS-9373) | Erasure coding: friendly log information for write operations with some failed streamers | Major | erasure-coding | Li Bo | Li Bo | -| [YARN-3226](https://issues.apache.org/jira/browse/YARN-3226) | UI changes for decommissioning node | Major | graceful | Junping Du | Sunil G | +| [YARN-3226](https://issues.apache.org/jira/browse/YARN-3226) | UI changes for decommissioning node | Major | graceful | Junping Du | Sunil Govindan | | [YARN-4164](https://issues.apache.org/jira/browse/YARN-4164) | Retrospect update ApplicationPriority API return type | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | | [YARN-4350](https://issues.apache.org/jira/browse/YARN-4350) | TestDistributedShell fails for V2 scenarios | Major | timelineserver | Sangjin Lee | Naganarasimha G R | | [HDFS-9173](https://issues.apache.org/jira/browse/HDFS-9173) | Erasure Coding: Lease recovery for striped file | Major | erasure-coding | Walter Su | Walter Su | @@ -3349,7 +3350,7 @@ | [HADOOP-11262](https://issues.apache.org/jira/browse/HADOOP-11262) | Enable YARN to use S3A | Major | fs/s3 | Thomas Demoor | Pieter Reuse | | [YARN-4553](https://issues.apache.org/jira/browse/YARN-4553) | Add cgroups support for docker containers | Major | yarn | Sidharta Seethana | Sidharta Seethana | | [YARN-4265](https://issues.apache.org/jira/browse/YARN-4265) | Provide new timeline plugin storage to support fine-grained entity caching | Major | timelineserver | Li Lu | Li Lu | -| [YARN-4304](https://issues.apache.org/jira/browse/YARN-4304) | AM max resource configuration per partition to be displayed/updated correctly in UI and in various partition related metrics | Major | webapp | Sunil G | Sunil G | +| [YARN-4304](https://issues.apache.org/jira/browse/YARN-4304) | AM max resource configuration per partition to be displayed/updated correctly in UI and in various partition related metrics | Major | webapp | Sunil Govindan | Sunil Govindan | | [YARN-4526](https://issues.apache.org/jira/browse/YARN-4526) | Make SystemClock singleton so AppSchedulingInfo could use it | Major | scheduler | Karthik Kambatla | Karthik Kambatla | | [YARN-4557](https://issues.apache.org/jira/browse/YARN-4557) | Fix improper Queues sorting in PartitionedQueueComparator when accessible-node-labels=\* | Major | resourcemanager | Naganarasimha G R | Naganarasimha G R | | [HADOOP-11828](https://issues.apache.org/jira/browse/HADOOP-11828) | Implement the Hitchhiker erasure coding algorithm | Major | . | Zhe Zhang | jack liuquan | @@ -3359,7 +3360,7 @@ | [YARN-4578](https://issues.apache.org/jira/browse/YARN-4578) | Directories that are mounted in docker containers need to be more restrictive/container-specific | Major | yarn | Sidharta Seethana | Sidharta Seethana | | [YARN-4574](https://issues.apache.org/jira/browse/YARN-4574) | TestAMRMClientOnRMRestart fails on trunk | Major | client, test | Takashi Ohnishi | Takashi Ohnishi | | [HDFS-9646](https://issues.apache.org/jira/browse/HDFS-9646) | ErasureCodingWorker may fail when recovering data blocks with length less than the first internal block | Critical | erasure-coding | Takuya Fukudome | Jing Zhao | -| [YARN-4614](https://issues.apache.org/jira/browse/YARN-4614) | TestApplicationPriority#testApplicationPriorityAllocationWithChangeInPriority fails occasionally | Major | test | Jason Lowe | Sunil G | +| [YARN-4614](https://issues.apache.org/jira/browse/YARN-4614) | TestApplicationPriority#testApplicationPriorityAllocationWithChangeInPriority fails occasionally | Major | test | Jason Lowe | Sunil Govindan | | [YARN-4613](https://issues.apache.org/jira/browse/YARN-4613) | TestClientRMService#testGetClusterNodes fails occasionally | Major | test | Jason Lowe | Takashi Ohnishi | | [HDFS-9094](https://issues.apache.org/jira/browse/HDFS-9094) | Add command line option to ask NameNode reload configuration. | Major | namenode | Xiaobing Zhou | Xiaobing Zhou | | [HDFS-9672](https://issues.apache.org/jira/browse/HDFS-9672) | o.a.h.hdfs.TestLeaseRecovery2 fails intermittently | Major | test | Mingliang Liu | Mingliang Liu | @@ -3371,7 +3372,7 @@ | [YARN-4643](https://issues.apache.org/jira/browse/YARN-4643) | Container recovery is broken with delegating container runtime | Critical | yarn | Sidharta Seethana | Sidharta Seethana | | [YARN-4219](https://issues.apache.org/jira/browse/YARN-4219) | New levelDB cache storage for timeline v1.5 | Major | . | Li Lu | Li Lu | | [YARN-4543](https://issues.apache.org/jira/browse/YARN-4543) | TestNodeStatusUpdater.testStopReentrant fails + JUnit misusage | Minor | nodemanager | Akihiro Suda | Akihiro Suda | -| [YARN-4615](https://issues.apache.org/jira/browse/YARN-4615) | TestAbstractYarnScheduler#testResourceRequestRecoveryToTheRightAppAttempt fails occasionally | Major | test | Jason Lowe | Sunil G | +| [YARN-4615](https://issues.apache.org/jira/browse/YARN-4615) | TestAbstractYarnScheduler#testResourceRequestRecoveryToTheRightAppAttempt fails occasionally | Major | test | Jason Lowe | Sunil Govindan | | [HDFS-9494](https://issues.apache.org/jira/browse/HDFS-9494) | Parallel optimization of DFSStripedOutputStream#flushAllInternals( ) | Minor | hdfs-client | Rui Gao | Rui Gao | | [YARN-4340](https://issues.apache.org/jira/browse/YARN-4340) | Add "list" API to reservation system | Major | capacityscheduler, fairscheduler, resourcemanager | Carlo Curino | Sean Po | | [YARN-4100](https://issues.apache.org/jira/browse/YARN-4100) | Add Documentation for Distributed and Delegated-Centralized Node Labels feature | Major | api, client, resourcemanager | Naganarasimha G R | Naganarasimha G R | @@ -3393,7 +3394,6 @@ | [YARN-2575](https://issues.apache.org/jira/browse/YARN-2575) | Create separate ACLs for Reservation create/update/delete/list ops | Major | capacityscheduler, fairscheduler, resourcemanager | Subru Krishnan | Sean Po | | [YARN-4684](https://issues.apache.org/jira/browse/YARN-4684) | TestYarnCLI#testGetContainers failing in CN locale | Major | yarn | Bibin A Chundatt | Bibin A Chundatt | | [HDFS-9794](https://issues.apache.org/jira/browse/HDFS-9794) | Streamer threads may leak if failure happens when closing the striped outputstream | Critical | hdfs-client | Namit Maheshwari | Jing Zhao | -| [HDFS-9754](https://issues.apache.org/jira/browse/HDFS-9754) | Avoid unnecessary getBlockCollection calls in BlockManager | Major | namenode | Jing Zhao | Jing Zhao | | [HDFS-9691](https://issues.apache.org/jira/browse/HDFS-9691) | TestBlockManagerSafeMode#testCheckSafeMode fails intermittently | Major | test | Mingliang Liu | Mingliang Liu | | [HADOOP-12710](https://issues.apache.org/jira/browse/HADOOP-12710) | Remove dependency on commons-httpclient for TestHttpServerLogs | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-9830](https://issues.apache.org/jira/browse/HDFS-9830) | Remove references to hftp in ViewFs documentation in trunk | Minor | documentation, fs | Wei-Chiu Chuang | Wei-Chiu Chuang | @@ -3440,7 +3440,7 @@ | [HDFS-9349](https://issues.apache.org/jira/browse/HDFS-9349) | Support reconfiguring fs.protected.directories without NN restart | Major | namenode | Xiaobing Zhou | Xiaobing Zhou | | [YARN-4822](https://issues.apache.org/jira/browse/YARN-4822) | Refactor existing Preemption Policy of CS for easier adding new approach to select preemption candidates | Major | . | Wangda Tan | Wangda Tan | | [YARN-4811](https://issues.apache.org/jira/browse/YARN-4811) | Generate histograms in ContainerMetrics for actual container resource usage | Major | . | Varun Vasudev | Varun Vasudev | -| [YARN-4634](https://issues.apache.org/jira/browse/YARN-4634) | Scheduler UI/Metrics need to consider cases like non-queue label mappings | Major | . | Sunil G | Sunil G | +| [YARN-4634](https://issues.apache.org/jira/browse/YARN-4634) | Scheduler UI/Metrics need to consider cases like non-queue label mappings | Major | . | Sunil Govindan | Sunil Govindan | | [HADOOP-12169](https://issues.apache.org/jira/browse/HADOOP-12169) | ListStatus on empty dir in S3A lists itself instead of returning an empty list | Major | fs/s3 | Pieter Reuse | Pieter Reuse | | [HADOOP-12959](https://issues.apache.org/jira/browse/HADOOP-12959) | Add additional github web site for ISA-L library | Major | . | Li Bo | Li Bo | | [HDFS-10186](https://issues.apache.org/jira/browse/HDFS-10186) | DirectoryScanner: Improve logs by adding full path of both actual and expected block directories | Minor | datanode | Rakesh R | Rakesh R | @@ -3464,7 +3464,7 @@ | [HADOOP-12924](https://issues.apache.org/jira/browse/HADOOP-12924) | Configure raw erasure coders for supported codecs | Minor | . | Rui Li | Rui Li | | [HDFS-10284](https://issues.apache.org/jira/browse/HDFS-10284) | o.a.h.hdfs.server.blockmanagement.TestBlockManagerSafeMode.testCheckSafeMode fails intermittently | Major | test | Mingliang Liu | Mingliang Liu | | [YARN-2883](https://issues.apache.org/jira/browse/YARN-2883) | Queuing of container requests in the NM | Major | nodemanager, resourcemanager | Konstantinos Karanasos | Konstantinos Karanasos | -| [YARN-4890](https://issues.apache.org/jira/browse/YARN-4890) | Unit test intermittent failure: TestNodeLabelContainerAllocation#testQueueUsedCapacitiesUpdate | Major | . | Wangda Tan | Sunil G | +| [YARN-4890](https://issues.apache.org/jira/browse/YARN-4890) | Unit test intermittent failure: TestNodeLabelContainerAllocation#testQueueUsedCapacitiesUpdate | Major | . | Wangda Tan | Sunil Govindan | | [HDFS-10207](https://issues.apache.org/jira/browse/HDFS-10207) | Support enable Hadoop IPC backoff without namenode restart | Major | . | Xiaoyu Yao | Xiaobing Zhou | | [YARN-4968](https://issues.apache.org/jira/browse/YARN-4968) | A couple of AM retry unit tests need to wait SchedulerApplicationAttempt stopped. | Major | . | Wangda Tan | Wangda Tan | | [HADOOP-13011](https://issues.apache.org/jira/browse/HADOOP-13011) | Clearly Document the Password Details for Keystore-based Credential Providers | Major | documentation | Larry McCay | Larry McCay | @@ -3636,7 +3636,7 @@ | [YARN-5298](https://issues.apache.org/jira/browse/YARN-5298) | Mount usercache and NM filecache directories into Docker container | Major | yarn | Varun Vasudev | Sidharta Seethana | | [YARN-5361](https://issues.apache.org/jira/browse/YARN-5361) | Obtaining logs for completed container says 'file belongs to a running container ' at the end | Critical | . | Sumana Sathish | Xuan Gong | | [YARN-5320](https://issues.apache.org/jira/browse/YARN-5320) | [YARN-3368] Add resource usage by applications and queues to cluster overview page. | Major | . | Wangda Tan | Wangda Tan | -| [YARN-4484](https://issues.apache.org/jira/browse/YARN-4484) | Available Resource calculation for a queue is not correct when used with labels | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-4484](https://issues.apache.org/jira/browse/YARN-4484) | Available Resource calculation for a queue is not correct when used with labels | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [HADOOP-13368](https://issues.apache.org/jira/browse/HADOOP-13368) | DFSOpsCountStatistics$OpType#fromSymbol and s3a.Statistic#fromSymbol should be O(1) operation | Major | fs | Mingliang Liu | Mingliang Liu | | [YARN-5350](https://issues.apache.org/jira/browse/YARN-5350) | Distributed Scheduling: Ensure sort order of allocatable nodes returned by the RM is not lost | Major | . | Arun Suresh | Arun Suresh | | [HADOOP-13212](https://issues.apache.org/jira/browse/HADOOP-13212) | Provide an option to set the socket buffers in S3AFileSystem | Minor | fs/s3 | Rajesh Balamohan | Rajesh Balamohan | @@ -3660,7 +3660,7 @@ | [HADOOP-13426](https://issues.apache.org/jira/browse/HADOOP-13426) | More efficiently build IPC responses | Major | . | Daryn Sharp | Daryn Sharp | | [HDFS-10656](https://issues.apache.org/jira/browse/HDFS-10656) | Optimize conversion of byte arrays back to path string | Major | hdfs | Daryn Sharp | Daryn Sharp | | [HDFS-10674](https://issues.apache.org/jira/browse/HDFS-10674) | Optimize creating a full path from an inode | Major | hdfs | Daryn Sharp | Daryn Sharp | -| [YARN-5342](https://issues.apache.org/jira/browse/YARN-5342) | Improve non-exclusive node partition resource allocation in Capacity Scheduler | Major | . | Wangda Tan | Sunil G | +| [YARN-5342](https://issues.apache.org/jira/browse/YARN-5342) | Improve non-exclusive node partition resource allocation in Capacity Scheduler | Major | . | Wangda Tan | Sunil Govindan | | [HADOOP-13438](https://issues.apache.org/jira/browse/HADOOP-13438) | Optimize IPC server protobuf decoding | Major | . | Daryn Sharp | Daryn Sharp | | [YARN-5459](https://issues.apache.org/jira/browse/YARN-5459) | Add support for docker rm | Minor | yarn | Shane Kumpf | Shane Kumpf | | [HDFS-10588](https://issues.apache.org/jira/browse/HDFS-10588) | False alarm in datanode log - ERROR - Disk Balancer is not enabled | Major | datanode, hdfs | Weiwei Yang | Weiwei Yang | @@ -3672,7 +3672,7 @@ | [HDFS-10681](https://issues.apache.org/jira/browse/HDFS-10681) | DiskBalancer: query command should report Plan file path apart from PlanID | Minor | diskbalancer | Manoj Govindassamy | Manoj Govindassamy | | [YARN-5137](https://issues.apache.org/jira/browse/YARN-5137) | Make DiskChecker pluggable in NodeManager | Major | nodemanager | Ray Chiang | Yufei Gu | | [HADOOP-11588](https://issues.apache.org/jira/browse/HADOOP-11588) | Benchmark framework and test for erasure coders | Major | io | Kai Zheng | Rui Li | -| [HDFS-8668](https://issues.apache.org/jira/browse/HDFS-8668) | Erasure Coding: revisit buffer used for encoding and decoding. | Major | . | Yi Liu | SammiChen | +| [HDFS-8668](https://issues.apache.org/jira/browse/HDFS-8668) | Erasure Coding: revisit buffer used for encoding and decoding. | Major | . | Yi Liu | Sammi Chen | | [HDFS-10724](https://issues.apache.org/jira/browse/HDFS-10724) | Document the caller context config keys | Minor | ipc, namenode | Mingliang Liu | Mingliang Liu | | [HDFS-10678](https://issues.apache.org/jira/browse/HDFS-10678) | Documenting NNThroughputBenchmark tool | Major | benchmarks, test | Mingliang Liu | Mingliang Liu | | [HDFS-10641](https://issues.apache.org/jira/browse/HDFS-10641) | TestBlockManager#testBlockReportQueueing fails intermittently | Major | namenode, test | Mingliang Liu | Daryn Sharp | @@ -3681,13 +3681,13 @@ | [HADOOP-13208](https://issues.apache.org/jira/browse/HADOOP-13208) | S3A listFiles(recursive=true) to do a bulk listObjects instead of walking the pseudo-tree of directories | Minor | fs/s3 | Steve Loughran | Steve Loughran | | [HADOOP-13405](https://issues.apache.org/jira/browse/HADOOP-13405) | doc for “fs.s3a.acl.default” indicates incorrect values | Minor | fs/s3 | Shen Yinjie | Shen Yinjie | | [YARN-4676](https://issues.apache.org/jira/browse/YARN-4676) | Automatic and Asynchronous Decommissioning Nodes Status Tracking | Major | resourcemanager | Daniel Zhi | Daniel Zhi | -| [HADOOP-13504](https://issues.apache.org/jira/browse/HADOOP-13504) | Refactor jni\_common to conform to C89 restrictions imposed by Visual Studio 2010 | Major | io | SammiChen | SammiChen | +| [HADOOP-13504](https://issues.apache.org/jira/browse/HADOOP-13504) | Refactor jni\_common to conform to C89 restrictions imposed by Visual Studio 2010 | Major | io | Sammi Chen | Sammi Chen | | [HDFS-10711](https://issues.apache.org/jira/browse/HDFS-10711) | Optimize FSPermissionChecker group membership check | Major | hdfs | Daryn Sharp | Daryn Sharp | | [HADOOP-13252](https://issues.apache.org/jira/browse/HADOOP-13252) | Tune S3A provider plugin mechanism | Minor | fs/s3 | Steve Loughran | Steve Loughran | | [HADOOP-13446](https://issues.apache.org/jira/browse/HADOOP-13446) | Support running isolated unit tests separate from AWS integration tests. | Major | fs/s3 | Chris Nauroth | Chris Nauroth | | [YARN-5457](https://issues.apache.org/jira/browse/YARN-5457) | Refactor DistributedScheduling framework to pull out common functionality | Major | resourcemanager | Arun Suresh | Arun Suresh | | [HDFS-10762](https://issues.apache.org/jira/browse/HDFS-10762) | Pass IIP for file status related methods | Major | hdfs | Daryn Sharp | Daryn Sharp | -| [HDFS-8905](https://issues.apache.org/jira/browse/HDFS-8905) | Refactor DFSInputStream#ReaderStrategy | Major | erasure-coding | Kai Zheng | SammiChen | +| [HDFS-8905](https://issues.apache.org/jira/browse/HDFS-8905) | Refactor DFSInputStream#ReaderStrategy | Major | erasure-coding | Kai Zheng | Sammi Chen | | [HDFS-10772](https://issues.apache.org/jira/browse/HDFS-10772) | Reduce byte/string conversions for get listing | Major | hdfs | Daryn Sharp | Daryn Sharp | | [YARN-5042](https://issues.apache.org/jira/browse/YARN-5042) | Mount /sys/fs/cgroup into Docker containers as read only mount | Major | yarn | Varun Vasudev | luhuichun | | [YARN-5564](https://issues.apache.org/jira/browse/YARN-5564) | Fix typo in RM\_SCHEDULER\_RESERVATION\_THRESHOLD\_INCREMENT\_MULTIPLE | Trivial | fairscheduler | Ray Chiang | Ray Chiang | @@ -3701,7 +3701,7 @@ | [HDFS-10662](https://issues.apache.org/jira/browse/HDFS-10662) | Optimize UTF8 string/byte conversions | Major | hdfs | Daryn Sharp | Daryn Sharp | | [HDFS-10673](https://issues.apache.org/jira/browse/HDFS-10673) | Optimize FSPermissionChecker's internal path usage | Major | hdfs | Daryn Sharp | Daryn Sharp | | [HDFS-10744](https://issues.apache.org/jira/browse/HDFS-10744) | Internally optimize path component resolution | Major | hdfs | Daryn Sharp | Daryn Sharp | -| [YARN-4091](https://issues.apache.org/jira/browse/YARN-4091) | Add REST API to retrieve scheduler activity | Major | capacity scheduler, resourcemanager | Sunil G | Chen Ge | +| [YARN-4091](https://issues.apache.org/jira/browse/YARN-4091) | Add REST API to retrieve scheduler activity | Major | capacity scheduler, resourcemanager | Sunil Govindan | Chen Ge | | [HDFS-10745](https://issues.apache.org/jira/browse/HDFS-10745) | Directly resolve paths into INodesInPath | Major | hdfs | Daryn Sharp | Daryn Sharp | | [HADOOP-13419](https://issues.apache.org/jira/browse/HADOOP-13419) | Fix javadoc warnings by JDK8 in hadoop-common package | Major | . | Kai Sasaki | Kai Sasaki | | [HADOOP-10597](https://issues.apache.org/jira/browse/HADOOP-10597) | RPC Server signals backoff to clients when all request queues are full | Major | . | Ming Ma | Ming Ma | @@ -3715,6 +3715,7 @@ | [HDFS-9922](https://issues.apache.org/jira/browse/HDFS-9922) | Upgrade Domain placement policy status marks a good block in violation when there are decommissioned nodes | Minor | . | Chris Trezzo | Chris Trezzo | | [HDFS-8818](https://issues.apache.org/jira/browse/HDFS-8818) | Allow Balancer to run faster | Major | balancer & mover | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | | [YARN-4140](https://issues.apache.org/jira/browse/YARN-4140) | RM container allocation delayed incase of app submitted to Nodelabel partition | Major | scheduler | Bibin A Chundatt | Bibin A Chundatt | +| [HDFS-9754](https://issues.apache.org/jira/browse/HDFS-9754) | Avoid unnecessary getBlockCollection calls in BlockManager | Major | namenode | Jing Zhao | Jing Zhao | ### OTHER: @@ -3744,7 +3745,7 @@ | [YARN-5297](https://issues.apache.org/jira/browse/YARN-5297) | Avoid printing a stack trace when recovering an app after the RM restarts | Major | . | Siddharth Seth | Junping Du | | [HDFS-10651](https://issues.apache.org/jira/browse/HDFS-10651) | Clean up some configuration related codes about legacy block reader | Minor | . | Kai Zheng | Youwei Wang | | [HDFS-9353](https://issues.apache.org/jira/browse/HDFS-9353) | Code and comment mismatch in JavaKeyStoreProvider | Trivial | . | nijel | Andras Bokor | -| [HDFS-10718](https://issues.apache.org/jira/browse/HDFS-10718) | Prefer direct ByteBuffer in native RS encoder and decoder | Major | . | SammiChen | SammiChen | +| [HDFS-10718](https://issues.apache.org/jira/browse/HDFS-10718) | Prefer direct ByteBuffer in native RS encoder and decoder | Major | . | Sammi Chen | Sammi Chen | | [YARN-5495](https://issues.apache.org/jira/browse/YARN-5495) | Remove import wildcard in CapacityScheduler | Trivial | capacityscheduler | Ray Chiang | Ray Chiang | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/RELEASENOTES.3.0.0-alpha1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/RELEASENOTES.3.0.0-alpha1.md index 1a476b11d9c..2727ceb798c 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/RELEASENOTES.3.0.0-alpha1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha1/RELEASENOTES.3.0.0-alpha1.md @@ -1006,6 +1006,13 @@ The output of fsck command for being written hdfs files had been changed. When u The preferred block size XML element has been corrected from "\\\" to "\\\". +--- + +* [HADOOP-12436](https://issues.apache.org/jira/browse/HADOOP-12436) | *Major* | **GlobPattern regex library has performance issues with wildcard characters** + +GlobFilter and RegexFilter.compile() now returns com.google.re2j.pattern.Pattern instead of java.util.regex.Pattern + + --- * [HDFS-9184](https://issues.apache.org/jira/browse/HDFS-9184) | *Major* | **Logging HDFS operation's caller context into audit logs** @@ -1463,6 +1470,13 @@ Add new flag to allow supporting path style addressing for s3a The default port for KMS service is now 9600. This is to avoid conflicts on the previous port 16000, which is also used by HMaster as the default port. +--- + +* [HDFS-9412](https://issues.apache.org/jira/browse/HDFS-9412) | *Major* | **getBlocks occupies FSLock and takes too long to complete** + +Skip blocks with size below dfs.balancer.getBlocks.min-block-size (default 10MB) when a balancer asks for a list of blocks. + + --- * [YARN-4784](https://issues.apache.org/jira/browse/YARN-4784) | *Major* | **Fairscheduler: defaultQueueSchedulingPolicy should not accept FIFO** diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/CHANGES.3.0.0-alpha2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/CHANGELOG.3.0.0-alpha2.md similarity index 98% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/CHANGES.3.0.0-alpha2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/CHANGELOG.3.0.0-alpha2.md index b23bbf717b9..97ab70082db 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/CHANGES.3.0.0-alpha2.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/CHANGELOG.3.0.0-alpha2.md @@ -70,8 +70,8 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | -| [HADOOP-12756](https://issues.apache.org/jira/browse/HADOOP-12756) | Incorporate Aliyun OSS file system implementation | Major | fs | shimingfei | mingfei.shi | -| [MAPREDUCE-6774](https://issues.apache.org/jira/browse/MAPREDUCE-6774) | Add support for HDFS erasure code policy to TestDFSIO | Major | . | SammiChen | SammiChen | +| [HADOOP-12756](https://issues.apache.org/jira/browse/HADOOP-12756) | Incorporate Aliyun OSS file system implementation | Major | fs, fs/oss | shimingfei | mingfei.shi | +| [MAPREDUCE-6774](https://issues.apache.org/jira/browse/MAPREDUCE-6774) | Add support for HDFS erasure code policy to TestDFSIO | Major | . | Sammi Chen | Sammi Chen | | [HDFS-10918](https://issues.apache.org/jira/browse/HDFS-10918) | Add a tool to get FileEncryptionInfo from CLI | Major | encryption | Xiao Chen | Xiao Chen | | [HADOOP-13584](https://issues.apache.org/jira/browse/HADOOP-13584) | hadoop-aliyun: merge HADOOP-12756 branch back | Major | fs | shimingfei | Genmao Yu | | [HDFS-9820](https://issues.apache.org/jira/browse/HDFS-9820) | Improve distcp to support efficient restore to an earlier snapshot | Major | distcp | Yongjun Zhang | Yongjun Zhang | @@ -81,6 +81,7 @@ | [HADOOP-13578](https://issues.apache.org/jira/browse/HADOOP-13578) | Add Codec for ZStandard Compression | Major | . | churro morales | churro morales | | [HADOOP-13933](https://issues.apache.org/jira/browse/HADOOP-13933) | Add haadmin -getAllServiceState option to get the HA state of all the NameNodes/ResourceManagers | Major | tools | Surendra Singh Lilhore | Surendra Singh Lilhore | | [HADOOP-13673](https://issues.apache.org/jira/browse/HADOOP-13673) | Update scripts to be smarter when running with privilege | Major | scripts | Allen Wittenauer | Allen Wittenauer | +| [YARN-2877](https://issues.apache.org/jira/browse/YARN-2877) | Extend YARN to support distributed scheduling | Major | nodemanager, resourcemanager | Sriram Rao | Konstantinos Karanasos | ### IMPROVEMENTS: @@ -173,7 +174,7 @@ | [YARN-4710](https://issues.apache.org/jira/browse/YARN-4710) | Reduce logging application reserved debug info in FSAppAttempt#assignContainer | Minor | fairscheduler | Yiqun Lin | Yiqun Lin | | [YARN-4668](https://issues.apache.org/jira/browse/YARN-4668) | Reuse objectMapper instance in Yarn | Major | timelineclient | Yiqun Lin | Yiqun Lin | | [HDFS-11064](https://issues.apache.org/jira/browse/HDFS-11064) | Mention the default NN rpc ports in hdfs-default.xml | Minor | documentation | Andrew Wang | Yiqun Lin | -| [HDFS-10926](https://issues.apache.org/jira/browse/HDFS-10926) | Update staled configuration properties related to erasure coding | Major | . | SammiChen | SammiChen | +| [HDFS-10926](https://issues.apache.org/jira/browse/HDFS-10926) | Update staled configuration properties related to erasure coding | Major | . | Sammi Chen | Sammi Chen | | [YARN-4963](https://issues.apache.org/jira/browse/YARN-4963) | capacity scheduler: Make number of OFF\_SWITCH assignments per heartbeat configurable | Major | capacityscheduler | Nathan Roberts | Nathan Roberts | | [HDFS-11047](https://issues.apache.org/jira/browse/HDFS-11047) | Remove deep copies of FinalizedReplica to alleviate heap consumption on DataNode | Major | datanode | Xiaobing Zhou | Xiaobing Zhou | | [HADOOP-10075](https://issues.apache.org/jira/browse/HADOOP-10075) | Update jetty dependency to version 9 | Critical | . | Robert Rati | Robert Kanter | @@ -184,7 +185,6 @@ | [MAPREDUCE-6795](https://issues.apache.org/jira/browse/MAPREDUCE-6795) | Update the document for JobConf#setNumReduceTasks | Major | documentation | Akira Ajisaka | Yiqun Lin | | [HADOOP-13603](https://issues.apache.org/jira/browse/HADOOP-13603) | Ignore package line length checkstyle rule | Major | build | Shane Kumpf | Shane Kumpf | | [HADOOP-13583](https://issues.apache.org/jira/browse/HADOOP-13583) | Incorporate checkcompatibility script which runs Java API Compliance Checker | Major | scripts | Andrew Wang | Andrew Wang | -| [HADOOP-13738](https://issues.apache.org/jira/browse/HADOOP-13738) | DiskChecker should perform some disk IO | Major | . | Arpit Agarwal | Arpit Agarwal | | [HADOOP-13667](https://issues.apache.org/jira/browse/HADOOP-13667) | Fix typing mistake of inline document in hadoop-metrics2.properties | Major | documentation | Rui Gao | Rui Gao | | [HDFS-10909](https://issues.apache.org/jira/browse/HDFS-10909) | De-duplicate code in ErasureCodingWorker#initializeStripedReadThreadPool and DFSClient#initThreadsNumForStripedReads | Minor | . | Wei-Chiu Chuang | Manoj Govindassamy | | [HADOOP-13784](https://issues.apache.org/jira/browse/HADOOP-13784) | Output javadoc inside the target directory | Major | documentation | Andrew Wang | Andrew Wang | @@ -214,7 +214,7 @@ | [YARN-5375](https://issues.apache.org/jira/browse/YARN-5375) | invoke MockRM#drainEvents implicitly in MockRM methods to reduce test failures | Major | resourcemanager | sandflee | sandflee | | [HDFS-11147](https://issues.apache.org/jira/browse/HDFS-11147) | Remove confusing log output in FsDatasetImpl#getInitialVolumeFailureInfos | Minor | datanode | Chen Liang | Chen Liang | | [HADOOP-13742](https://issues.apache.org/jira/browse/HADOOP-13742) | Expose "NumOpenConnectionsPerUser" as a metric | Major | . | Brahma Reddy Battula | Brahma Reddy Battula | -| [HDFS-11117](https://issues.apache.org/jira/browse/HDFS-11117) | Refactor striped file tests to allow flexibly test erasure coding policy | Major | . | SammiChen | SammiChen | +| [HDFS-11117](https://issues.apache.org/jira/browse/HDFS-11117) | Refactor striped file tests to allow flexibly test erasure coding policy | Major | . | Sammi Chen | Sammi Chen | | [HADOOP-13646](https://issues.apache.org/jira/browse/HADOOP-13646) | Remove outdated overview.html | Minor | . | Akira Ajisaka | Brahma Reddy Battula | | [YARN-5722](https://issues.apache.org/jira/browse/YARN-5722) | FairScheduler hides group resolution exceptions when assigning queue | Major | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | | [HADOOP-13166](https://issues.apache.org/jira/browse/HADOOP-13166) | add getFileStatus("/") test to AbstractContractGetFileStatusTest | Minor | fs, test | Steve Loughran | Steve Loughran | @@ -234,7 +234,7 @@ | [HDFS-10581](https://issues.apache.org/jira/browse/HDFS-10581) | Hide redundant table on NameNode WebUI when no nodes are decomissioning | Trivial | hdfs, ui | Weiwei Yang | Weiwei Yang | | [HDFS-11211](https://issues.apache.org/jira/browse/HDFS-11211) | Add a time unit to the DataNode client trace format | Minor | datanode | Akira Ajisaka | Jagadesh Kiran N | | [HADOOP-13827](https://issues.apache.org/jira/browse/HADOOP-13827) | Add reencryptEncryptedKey interface to KMS | Major | kms | Xiao Chen | Xiao Chen | -| [HDFS-10206](https://issues.apache.org/jira/browse/HDFS-10206) | Datanodes not sorted properly by distance when the reader isn't a datanode | Major | . | Ming Ma | Nandakumar | +| [HDFS-10206](https://issues.apache.org/jira/browse/HDFS-10206) | Datanodes not sorted properly by distance when the reader isn't a datanode | Major | . | Ming Ma | Nanda kumar | | [HDFS-11217](https://issues.apache.org/jira/browse/HDFS-11217) | Annotate NameNode and DataNode MXBean interfaces as Private/Stable | Major | . | Akira Ajisaka | Jagadesh Kiran N | | [YARN-4457](https://issues.apache.org/jira/browse/YARN-4457) | Cleanup unchecked types for EventHandler | Major | . | Daniel Templeton | Daniel Templeton | | [HADOOP-13900](https://issues.apache.org/jira/browse/HADOOP-13900) | Remove snapshot version of SDK dependency from Azure Data Lake Store File System | Major | fs/adl | Vishwajeet Dusane | Vishwajeet Dusane | @@ -254,7 +254,6 @@ | [HDFS-11275](https://issues.apache.org/jira/browse/HDFS-11275) | Check groupEntryIndex and throw a helpful exception on failures when removing ACL. | Major | namenode | Xiao Chen | Xiao Chen | | [YARN-5709](https://issues.apache.org/jira/browse/YARN-5709) | Cleanup leader election configs and pluggability | Critical | resourcemanager | Karthik Kambatla | Karthik Kambatla | | [HDFS-11279](https://issues.apache.org/jira/browse/HDFS-11279) | Cleanup unused DataNode#checkDiskErrorAsync() | Minor | . | Xiaoyu Yao | Hanisha Koneru | -| [HDFS-11156](https://issues.apache.org/jira/browse/HDFS-11156) | Add new op GETFILEBLOCKLOCATIONS to WebHDFS REST API | Major | webhdfs | Weiwei Yang | Weiwei Yang | | [HDFS-9483](https://issues.apache.org/jira/browse/HDFS-9483) | Documentation does not cover use of "swebhdfs" as URL scheme for SSL-secured WebHDFS. | Major | documentation | Chris Nauroth | Surendra Singh Lilhore | | [YARN-5991](https://issues.apache.org/jira/browse/YARN-5991) | Yarn Distributed Shell does not print throwable t to App Master When failed to start container | Minor | . | dashwang | Jim Frankola | | [HDFS-11292](https://issues.apache.org/jira/browse/HDFS-11292) | log lastWrittenTxId etc info in logSyncAll | Major | hdfs | Yongjun Zhang | Yongjun Zhang | @@ -275,16 +274,18 @@ | [MAPREDUCE-6728](https://issues.apache.org/jira/browse/MAPREDUCE-6728) | Give fetchers hint when ShuffleHandler rejects a shuffling connection | Major | mrv2 | Haibo Chen | Haibo Chen | | [HADOOP-13606](https://issues.apache.org/jira/browse/HADOOP-13606) | swift FS to add a service load metadata file | Major | fs/swift | Steve Loughran | Steve Loughran | | [HADOOP-13037](https://issues.apache.org/jira/browse/HADOOP-13037) | Refactor Azure Data Lake Store as an independent FileSystem | Major | fs/adl | Shrikant Naidu | Vishwajeet Dusane | +| [HDFS-11156](https://issues.apache.org/jira/browse/HDFS-11156) | Add new op GETFILEBLOCKLOCATIONS to WebHDFS REST API | Major | webhdfs | Weiwei Yang | Weiwei Yang | +| [HADOOP-13738](https://issues.apache.org/jira/browse/HADOOP-13738) | DiskChecker should perform some disk IO | Major | . | Arpit Agarwal | Arpit Agarwal | ### BUG FIXES: | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-9034](https://issues.apache.org/jira/browse/HDFS-9034) | "StorageTypeStats" Metric should not count failed storage. | Major | namenode | Archana T | Surendra Singh Lilhore | | [MAPREDUCE-4784](https://issues.apache.org/jira/browse/MAPREDUCE-4784) | TestRecovery occasionally fails | Major | mrv2, test | Jason Lowe | Haibo Chen | | [HDFS-10760](https://issues.apache.org/jira/browse/HDFS-10760) | DataXceiver#run() should not log InvalidToken exception as an error | Major | . | Pan Yuxuan | Pan Yuxuan | | [HDFS-10729](https://issues.apache.org/jira/browse/HDFS-10729) | Improve log message for edit loading failures caused by FS limit checks. | Major | namenode | Wei-Chiu Chuang | Wei-Chiu Chuang | -| [YARN-5221](https://issues.apache.org/jira/browse/YARN-5221) | Expose UpdateResourceRequest API to allow AM to request for change in container properties | Major | . | Arun Suresh | Arun Suresh | | [HADOOP-13375](https://issues.apache.org/jira/browse/HADOOP-13375) | o.a.h.security.TestGroupsCaching.testBackgroundRefreshCounters seems flaky | Major | security, test | Mingliang Liu | Weiwei Yang | | [HDFS-10820](https://issues.apache.org/jira/browse/HDFS-10820) | Reuse closeResponder to reset the response variable in DataStreamer#run | Minor | . | Yiqun Lin | Yiqun Lin | | [YARN-5555](https://issues.apache.org/jira/browse/YARN-5555) | Scheduler UI: "% of Queue" is inaccurate if leaf queue is hierarchically nested. | Minor | . | Eric Payne | Eric Payne | @@ -358,7 +359,7 @@ | [MAPREDUCE-6789](https://issues.apache.org/jira/browse/MAPREDUCE-6789) | Fix TestAMWebApp failure | Major | test | Akira Ajisaka | Daniel Templeton | | [MAPREDUCE-6740](https://issues.apache.org/jira/browse/MAPREDUCE-6740) | Enforce mapreduce.task.timeout to be at least mapreduce.task.progress-report.interval | Minor | mr-am | Haibo Chen | Haibo Chen | | [HADOOP-13690](https://issues.apache.org/jira/browse/HADOOP-13690) | Fix typos in core-default.xml | Minor | documentation | Yiqun Lin | Yiqun Lin | -| [YARN-5101](https://issues.apache.org/jira/browse/YARN-5101) | YARN\_APPLICATION\_UPDATED event is parsed in ApplicationHistoryManagerOnTimelineStore#convertToApplicationReport with reversed order | Major | . | Xuan Gong | Sunil G | +| [YARN-5101](https://issues.apache.org/jira/browse/YARN-5101) | YARN\_APPLICATION\_UPDATED event is parsed in ApplicationHistoryManagerOnTimelineStore#convertToApplicationReport with reversed order | Major | . | Xuan Gong | Sunil Govindan | | [YARN-5659](https://issues.apache.org/jira/browse/YARN-5659) | getPathFromYarnURL should use standard methods | Major | . | Sergey Shelukhin | Sergey Shelukhin | | [HADOOP-12611](https://issues.apache.org/jira/browse/HADOOP-12611) | TestZKSignerSecretProvider#testMultipleInit occasionally fail | Major | . | Wei-Chiu Chuang | Eric Badger | | [HDFS-10969](https://issues.apache.org/jira/browse/HDFS-10969) | Fix typos in hdfs-default.xml | Minor | documentation | Yiqun Lin | Yiqun Lin | @@ -401,7 +402,7 @@ | [HDFS-11040](https://issues.apache.org/jira/browse/HDFS-11040) | Add documentation for HDFS-9820 distcp improvement | Major | distcp | Yongjun Zhang | Yongjun Zhang | | [YARN-5677](https://issues.apache.org/jira/browse/YARN-5677) | RM should transition to standby when connection is lost for an extended period | Critical | resourcemanager | Daniel Templeton | Daniel Templeton | | [HDFS-11054](https://issues.apache.org/jira/browse/HDFS-11054) | Suppress verbose log message in BlockPlacementPolicyDefault | Major | . | Arpit Agarwal | Chen Liang | -| [HDFS-10935](https://issues.apache.org/jira/browse/HDFS-10935) | TestFileChecksum fails in some cases | Major | . | Wei-Chiu Chuang | SammiChen | +| [HDFS-10935](https://issues.apache.org/jira/browse/HDFS-10935) | TestFileChecksum fails in some cases | Major | . | Wei-Chiu Chuang | Sammi Chen | | [YARN-5753](https://issues.apache.org/jira/browse/YARN-5753) | fix NPE in AMRMClientImpl.getMatchingRequests() | Major | yarn | Haibo Chen | Haibo Chen | | [HDFS-11050](https://issues.apache.org/jira/browse/HDFS-11050) | Change log level to 'warn' when ssl initialization fails and defaults to DEFAULT\_TIMEOUT\_CONN\_CONFIGURATOR | Major | . | Kuhu Shukla | Kuhu Shukla | | [HDFS-11053](https://issues.apache.org/jira/browse/HDFS-11053) | Unnecessary superuser check in versionRequest() | Major | namenode, security | Kihwal Lee | Kihwal Lee | @@ -418,7 +419,7 @@ | [YARN-5757](https://issues.apache.org/jira/browse/YARN-5757) | RM REST API documentation is not up to date | Trivial | resourcemanager, yarn | Miklos Szegedi | Miklos Szegedi | | [MAPREDUCE-6541](https://issues.apache.org/jira/browse/MAPREDUCE-6541) | Exclude scheduled reducer memory when calculating available mapper slots from headroom to avoid deadlock | Major | . | Wangda Tan | Varun Saxena | | [YARN-3848](https://issues.apache.org/jira/browse/YARN-3848) | TestNodeLabelContainerAllocation is not timing out | Major | test | Jason Lowe | Varun Saxena | -| [YARN-5420](https://issues.apache.org/jira/browse/YARN-5420) | Delete org.apache.hadoop.yarn.server.resourcemanager.resource.Priority as its not necessary | Minor | resourcemanager | Sunil G | Sunil G | +| [YARN-5420](https://issues.apache.org/jira/browse/YARN-5420) | Delete org.apache.hadoop.yarn.server.resourcemanager.resource.Priority as its not necessary | Minor | resourcemanager | Sunil Govindan | Sunil Govindan | | [HADOOP-13201](https://issues.apache.org/jira/browse/HADOOP-13201) | Print the directory paths when ViewFs denies the rename operation on internal dirs | Major | viewfs | Tianyin Xu | Rakesh R | | [YARN-5172](https://issues.apache.org/jira/browse/YARN-5172) | Update yarn daemonlog documentation due to HADOOP-12847 | Trivial | documentation | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-4831](https://issues.apache.org/jira/browse/YARN-4831) | Recovered containers will be killed after NM stateful restart | Major | nodemanager | Siqi Li | Siqi Li | @@ -426,7 +427,7 @@ | [YARN-5776](https://issues.apache.org/jira/browse/YARN-5776) | Checkstyle: MonitoringThread.Run method length is too long | Trivial | nodemanager | Miklos Szegedi | Miklos Szegedi | | [YARN-3432](https://issues.apache.org/jira/browse/YARN-3432) | Cluster metrics have wrong Total Memory when there is reserved memory on CS | Major | capacityscheduler, resourcemanager | Thomas Graves | Brahma Reddy Battula | | [HDFS-9500](https://issues.apache.org/jira/browse/HDFS-9500) | datanodesSoftwareVersions map may counting wrong when rolling upgrade | Major | . | Phil Yang | Erik Krogen | -| [MAPREDUCE-2631](https://issues.apache.org/jira/browse/MAPREDUCE-2631) | Potential resource leaks in BinaryProtocol$TeeOutputStream.java | Major | . | Ravi Teja Ch N V | Sunil G | +| [MAPREDUCE-2631](https://issues.apache.org/jira/browse/MAPREDUCE-2631) | Potential resource leaks in BinaryProtocol$TeeOutputStream.java | Major | . | Ravi Teja Ch N V | Sunil Govindan | | [YARN-2306](https://issues.apache.org/jira/browse/YARN-2306) | Add test for leakage of reservation metrics in fair scheduler | Minor | fairscheduler | Hong Zhiguo | Hong Zhiguo | | [YARN-4743](https://issues.apache.org/jira/browse/YARN-4743) | FairSharePolicy breaks TimSort assumption | Major | fairscheduler | Zephyr Guo | Zephyr Guo | | [HADOOP-13763](https://issues.apache.org/jira/browse/HADOOP-13763) | KMS REST API Documentation Decrypt URL typo | Minor | documentation, kms | Jeffrey E Rodriguez | Jeffrey E Rodriguez | @@ -436,7 +437,6 @@ | [YARN-5767](https://issues.apache.org/jira/browse/YARN-5767) | Fix the order that resources are cleaned up from the local Public/Private caches | Major | . | Chris Trezzo | Chris Trezzo | | [HDFS-11061](https://issues.apache.org/jira/browse/HDFS-11061) | Update dfs -count -t command line help and documentation | Minor | documentation, fs | Wei-Chiu Chuang | Yiqun Lin | | [YARN-5773](https://issues.apache.org/jira/browse/YARN-5773) | RM recovery too slow due to LeafQueue#activateApplication() | Critical | capacity scheduler, rolling upgrade | Bibin A Chundatt | Bibin A Chundatt | -| [YARN-5793](https://issues.apache.org/jira/browse/YARN-5793) | Trim configuration values in DockerLinuxContainerRuntime | Minor | nodemanager | Tianyin Xu | Tianyin Xu | | [YARN-5800](https://issues.apache.org/jira/browse/YARN-5800) | Delete LinuxContainerExecutor comment from yarn-default.xml | Trivial | yarn | Daniel Templeton | Jan Hentschel | | [YARN-5809](https://issues.apache.org/jira/browse/YARN-5809) | AsyncDispatcher possibly invokes multiple shutdown thread when handling exception | Major | . | Jian He | Jian He | | [HADOOP-8500](https://issues.apache.org/jira/browse/HADOOP-8500) | Fix javadoc jars to not contain entire target directory | Minor | build | EJ Ciramella | Andrew Wang | @@ -470,9 +470,9 @@ | [HDFS-9337](https://issues.apache.org/jira/browse/HDFS-9337) | Validate required params for WebHDFS requests | Major | . | Jagadesh Kiran N | Jagadesh Kiran N | | [YARN-5834](https://issues.apache.org/jira/browse/YARN-5834) | TestNodeStatusUpdater.testNMRMConnectionConf compares nodemanager wait time to the incorrect value | Trivial | . | Miklos Szegedi | Chang Li | | [HDFS-11128](https://issues.apache.org/jira/browse/HDFS-11128) | CreateEditsLog throws NullPointerException | Major | hdfs | Hanisha Koneru | Hanisha Koneru | -| [YARN-5868](https://issues.apache.org/jira/browse/YARN-5868) | Update npm to latest version in Dockerfile to avoid random failures of npm while run maven build | Major | yarn-ui-v2 | Sunil G | Sunil G | +| [YARN-5868](https://issues.apache.org/jira/browse/YARN-5868) | Update npm to latest version in Dockerfile to avoid random failures of npm while run maven build | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | | [YARN-5545](https://issues.apache.org/jira/browse/YARN-5545) | Fix issues related to Max App in capacity scheduler | Major | capacity scheduler | Bibin A Chundatt | Bibin A Chundatt | -| [YARN-5825](https://issues.apache.org/jira/browse/YARN-5825) | ProportionalPreemptionalPolicy could use readLock over LeafQueue instead of synchronized block | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-5825](https://issues.apache.org/jira/browse/YARN-5825) | ProportionalPreemptionalPolicy could use readLock over LeafQueue instead of synchronized block | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [HDFS-11129](https://issues.apache.org/jira/browse/HDFS-11129) | TestAppendSnapshotTruncate fails with bind exception | Major | test | Brahma Reddy Battula | Brahma Reddy Battula | | [HADOOP-13813](https://issues.apache.org/jira/browse/HADOOP-13813) | TestDelegationTokenFetcher#testDelegationTokenWithoutRenewer is failing | Major | security, test | Mingliang Liu | Mingliang Liu | | [HDFS-11135](https://issues.apache.org/jira/browse/HDFS-11135) | The tests in TestBalancer run fails due to NPE | Major | test | Yiqun Lin | Yiqun Lin | @@ -531,7 +531,7 @@ | [MAPREDUCE-6816](https://issues.apache.org/jira/browse/MAPREDUCE-6816) | Progress bars in Web UI always at 100% | Blocker | webapps | Shen Yinjie | Shen Yinjie | | [HADOOP-13859](https://issues.apache.org/jira/browse/HADOOP-13859) | TestConfigurationFieldsBase fails for fields that are DEFAULT values of skipped properties. | Major | common | Haibo Chen | Haibo Chen | | [YARN-5184](https://issues.apache.org/jira/browse/YARN-5184) | Fix up incompatible changes introduced on ContainerStatus and NodeReport | Blocker | api | Karthik Kambatla | Sangjin Lee | -| [YARN-5932](https://issues.apache.org/jira/browse/YARN-5932) | Retrospect moveApplicationToQueue in align with YARN-5611 | Major | capacity scheduler, resourcemanager | Sunil G | Sunil G | +| [YARN-5932](https://issues.apache.org/jira/browse/YARN-5932) | Retrospect moveApplicationToQueue in align with YARN-5611 | Major | capacity scheduler, resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-5136](https://issues.apache.org/jira/browse/YARN-5136) | Error in handling event type APP\_ATTEMPT\_REMOVED to the scheduler | Major | . | tangshangwen | Wilfred Spiegelenburg | | [MAPREDUCE-6817](https://issues.apache.org/jira/browse/MAPREDUCE-6817) | The format of job start time in JHS is different from those of submit and finish time | Major | jobhistoryserver | Haibo Chen | Haibo Chen | | [YARN-5963](https://issues.apache.org/jira/browse/YARN-5963) | Spelling errors in logging and exceptions for node manager, client, web-proxy, common, and app history code | Trivial | client, nodemanager | Grant Sohn | Grant Sohn | @@ -583,7 +583,7 @@ | [YARN-5962](https://issues.apache.org/jira/browse/YARN-5962) | Spelling errors in logging and exceptions for resource manager code | Trivial | resourcemanager | Grant Sohn | Grant Sohn | | [YARN-5257](https://issues.apache.org/jira/browse/YARN-5257) | Fix unreleased resources and null dereferences | Major | . | Yufei Gu | Yufei Gu | | [HDFS-11252](https://issues.apache.org/jira/browse/HDFS-11252) | TestFileTruncate#testTruncateWithDataNodesRestartImmediately can fail with BindException | Major | . | Jason Lowe | Yiqun Lin | -| [YARN-6001](https://issues.apache.org/jira/browse/YARN-6001) | Improve moveApplicationQueues command line | Major | client | Sunil G | Sunil G | +| [YARN-6001](https://issues.apache.org/jira/browse/YARN-6001) | Improve moveApplicationQueues command line | Major | client | Sunil Govindan | Sunil Govindan | | [YARN-6024](https://issues.apache.org/jira/browse/YARN-6024) | Capacity Scheduler 'continuous reservation looking' doesn't work when sum of queue's used and reserved resources is equal to max | Major | . | Wangda Tan | Wangda Tan | | [YARN-4882](https://issues.apache.org/jira/browse/YARN-4882) | Change the log level to DEBUG for recovering completed applications | Major | resourcemanager | Rohith Sharma K S | Daniel Templeton | | [HDFS-11251](https://issues.apache.org/jira/browse/HDFS-11251) | ConcurrentModificationException during DataNode#refreshVolumes | Major | . | Jason Lowe | Manoj Govindassamy | @@ -630,7 +630,7 @@ | [HADOOP-13965](https://issues.apache.org/jira/browse/HADOOP-13965) | Groups should be consistent in using default group mapping class | Minor | security | Yiqun Lin | Yiqun Lin | | [HDFS-11316](https://issues.apache.org/jira/browse/HDFS-11316) | TestDataNodeVolumeFailure#testUnderReplicationAfterVolFailure fails in trunk | Minor | . | Yiqun Lin | Yiqun Lin | | [HDFS-11287](https://issues.apache.org/jira/browse/HDFS-11287) | Storage class member storageDirs should be private to avoid unprotected access by derived classes | Major | . | Manoj Govindassamy | Manoj Govindassamy | -| [HDFS-11268](https://issues.apache.org/jira/browse/HDFS-11268) | Correctly reconstruct erasure coding file from FSImage | Critical | erasure-coding | SammiChen | SammiChen | +| [HDFS-11268](https://issues.apache.org/jira/browse/HDFS-11268) | Correctly reconstruct erasure coding file from FSImage | Critical | erasure-coding | Sammi Chen | Sammi Chen | | [HADOOP-14001](https://issues.apache.org/jira/browse/HADOOP-14001) | Improve delegation token validity checking | Major | . | Akira Ajisaka | Akira Ajisaka | | [YARN-6110](https://issues.apache.org/jira/browse/YARN-6110) | Fix opportunistic containers documentation | Minor | documentation | Akira Ajisaka | Akira Ajisaka | | [YARN-6104](https://issues.apache.org/jira/browse/YARN-6104) | RegistrySecurity overrides zookeeper sasl system properties | Major | . | Billie Rinaldi | Billie Rinaldi | @@ -667,13 +667,13 @@ |:---- |:---- | :--- |:---- |:---- |:---- | | [YARN-4733](https://issues.apache.org/jira/browse/YARN-4733) | [YARN-3368] Initial commit of new YARN web UI | Major | . | Wangda Tan | Wangda Tan | | [YARN-4517](https://issues.apache.org/jira/browse/YARN-4517) | [YARN-3368] Add nodes page | Major | yarn | Wangda Tan | Varun Saxena | -| [YARN-4514](https://issues.apache.org/jira/browse/YARN-4514) | [YARN-3368] Cleanup hardcoded configurations, such as RM/ATS addresses | Major | . | Wangda Tan | Sunil G | -| [YARN-5019](https://issues.apache.org/jira/browse/YARN-5019) | [YARN-3368] Change urls in new YARN ui from camel casing to hyphens | Major | . | Varun Vasudev | Sunil G | -| [YARN-5000](https://issues.apache.org/jira/browse/YARN-5000) | [YARN-3368] App attempt page is not loading when timeline server is not started | Major | . | Sunil G | Sunil G | -| [YARN-5038](https://issues.apache.org/jira/browse/YARN-5038) | [YARN-3368] Application and Container pages shows wrong values when RM is stopped | Major | . | Sunil G | Sunil G | -| [YARN-4515](https://issues.apache.org/jira/browse/YARN-4515) | [YARN-3368] Support hosting web UI framework inside YARN RM | Major | . | Wangda Tan | Sunil G | +| [YARN-4514](https://issues.apache.org/jira/browse/YARN-4514) | [YARN-3368] Cleanup hardcoded configurations, such as RM/ATS addresses | Major | . | Wangda Tan | Sunil Govindan | +| [YARN-5019](https://issues.apache.org/jira/browse/YARN-5019) | [YARN-3368] Change urls in new YARN ui from camel casing to hyphens | Major | . | Varun Vasudev | Sunil Govindan | +| [YARN-5000](https://issues.apache.org/jira/browse/YARN-5000) | [YARN-3368] App attempt page is not loading when timeline server is not started | Major | . | Sunil Govindan | Sunil Govindan | +| [YARN-5038](https://issues.apache.org/jira/browse/YARN-5038) | [YARN-3368] Application and Container pages shows wrong values when RM is stopped | Major | . | Sunil Govindan | Sunil Govindan | +| [YARN-4515](https://issues.apache.org/jira/browse/YARN-4515) | [YARN-3368] Support hosting web UI framework inside YARN RM | Major | . | Wangda Tan | Sunil Govindan | | [YARN-5183](https://issues.apache.org/jira/browse/YARN-5183) | [YARN-3368] Support for responsive navbar when window is resized | Major | . | Kai Sasaki | Kai Sasaki | -| [YARN-5161](https://issues.apache.org/jira/browse/YARN-5161) | [YARN-3368] Add Apache Hadoop logo in YarnUI home page | Major | webapp | Sunil G | Kai Sasaki | +| [YARN-5161](https://issues.apache.org/jira/browse/YARN-5161) | [YARN-3368] Add Apache Hadoop logo in YarnUI home page | Major | webapp | Sunil Govindan | Kai Sasaki | | [YARN-5344](https://issues.apache.org/jira/browse/YARN-5344) | [YARN-3368] Generic UI improvements | Major | . | Sreenath Somarajapuram | Sreenath Somarajapuram | | [YARN-5345](https://issues.apache.org/jira/browse/YARN-5345) | [YARN-3368] Cluster overview page improvements | Major | . | Sreenath Somarajapuram | Sreenath Somarajapuram | | [YARN-5346](https://issues.apache.org/jira/browse/YARN-5346) | [YARN-3368] Queues page improvements | Major | . | Sreenath Somarajapuram | Sreenath Somarajapuram | @@ -681,7 +681,7 @@ | [YARN-5348](https://issues.apache.org/jira/browse/YARN-5348) | [YARN-3368] Node details page improvements | Major | . | Sreenath Somarajapuram | Sreenath Somarajapuram | | [YARN-5321](https://issues.apache.org/jira/browse/YARN-5321) | [YARN-3368] Add resource usage for application by node managers | Major | . | Wangda Tan | Wangda Tan | | [YARN-5322](https://issues.apache.org/jira/browse/YARN-5322) | [YARN-3368] Add a node heat chart map | Major | . | Wangda Tan | Wangda Tan | -| [YARN-5334](https://issues.apache.org/jira/browse/YARN-5334) | [YARN-3368] Introduce REFRESH button in various UI pages | Major | webapp | Sunil G | Sreenath Somarajapuram | +| [YARN-5334](https://issues.apache.org/jira/browse/YARN-5334) | [YARN-3368] Introduce REFRESH button in various UI pages | Major | webapp | Sunil Govindan | Sreenath Somarajapuram | | [YARN-5509](https://issues.apache.org/jira/browse/YARN-5509) | Build error due to preparing 3.0.0-alpha2 deployment | Major | yarn | Kai Sasaki | Kai Sasaki | | [YARN-5488](https://issues.apache.org/jira/browse/YARN-5488) | Applications table overflows beyond the page boundary | Major | . | Harish Jaiprakash | Harish Jaiprakash | | [YARN-5504](https://issues.apache.org/jira/browse/YARN-5504) | [YARN-3368] Fix YARN UI build pom.xml | Major | . | Sreenath Somarajapuram | Sreenath Somarajapuram | @@ -699,6 +699,7 @@ | [HDFS-10813](https://issues.apache.org/jira/browse/HDFS-10813) | DiskBalancer: Add the getNodeList method in Command | Minor | balancer & mover | Yiqun Lin | Yiqun Lin | | [HADOOP-13563](https://issues.apache.org/jira/browse/HADOOP-13563) | hadoop\_subcommand\_opts should print name not actual content during debug | Major | scripts | Allen Wittenauer | Allen Wittenauer | | [HADOOP-13360](https://issues.apache.org/jira/browse/HADOOP-13360) | Documentation for HADOOP\_subcommand\_OPTS | Major | scripts | Allen Wittenauer | Allen Wittenauer | +| [YARN-5221](https://issues.apache.org/jira/browse/YARN-5221) | Expose UpdateResourceRequest API to allow AM to request for change in container properties | Major | . | Arun Suresh | Arun Suresh | | [YARN-5596](https://issues.apache.org/jira/browse/YARN-5596) | Fix failing unit test in TestDockerContainerRuntime | Minor | nodemanager, yarn | Sidharta Seethana | Sidharta Seethana | | [HADOOP-13547](https://issues.apache.org/jira/browse/HADOOP-13547) | Optimize IPC client protobuf decoding | Major | . | Daryn Sharp | Daryn Sharp | | [YARN-5264](https://issues.apache.org/jira/browse/YARN-5264) | Store all queue-specific information in FSQueue | Major | fairscheduler | Yufei Gu | Yufei Gu | @@ -708,7 +709,7 @@ | [YARN-5598](https://issues.apache.org/jira/browse/YARN-5598) | [YARN-3368] Fix create-release to be able to generate bits for the new yarn-ui | Major | yarn, yarn-ui-v2 | Wangda Tan | Wangda Tan | | [HDFS-9847](https://issues.apache.org/jira/browse/HDFS-9847) | HDFS configuration should accept time units | Major | . | Yiqun Lin | Yiqun Lin | | [HADOOP-13541](https://issues.apache.org/jira/browse/HADOOP-13541) | explicitly declare the Joda time version S3A depends on | Minor | build, fs/s3 | Steve Loughran | Steve Loughran | -| [HDFS-8901](https://issues.apache.org/jira/browse/HDFS-8901) | Use ByteBuffer in striping positional read | Major | erasure-coding | Kai Zheng | SammiChen | +| [HDFS-8901](https://issues.apache.org/jira/browse/HDFS-8901) | Use ByteBuffer in striping positional read | Major | erasure-coding | Kai Zheng | Sammi Chen | | [HDFS-10845](https://issues.apache.org/jira/browse/HDFS-10845) | Change defaults in hdfs-site.xml to match timeunit type | Minor | datanode, namenode | Yiqun Lin | Yiqun Lin | | [HDFS-10553](https://issues.apache.org/jira/browse/HDFS-10553) | DiskBalancer: Rename Tools/DiskBalancer class to Tools/DiskBalancerCLI | Minor | balancer & mover | Anu Engineer | Manoj Govindassamy | | [HDFS-9849](https://issues.apache.org/jira/browse/HDFS-9849) | DiskBalancer : reduce lock path in shutdown code | Major | balancer & mover | Anu Engineer | Yuanbo Liu | @@ -764,14 +765,14 @@ | [HDFS-10985](https://issues.apache.org/jira/browse/HDFS-10985) | o.a.h.ha.TestZKFailoverController should not use fixed time sleep before assertions | Minor | ha, test | Mingliang Liu | Mingliang Liu | | [HDFS-10972](https://issues.apache.org/jira/browse/HDFS-10972) | Add unit test for HDFS command 'dfsadmin -getDatanodeInfo' | Major | fs, shell, test | Xiaobing Zhou | Xiaobing Zhou | | [HDFS-10965](https://issues.apache.org/jira/browse/HDFS-10965) | Add unit test for HDFS command 'dfsadmin -printTopology' | Major | fs, shell, test | Xiaobing Zhou | Xiaobing Zhou | -| [YARN-5698](https://issues.apache.org/jira/browse/YARN-5698) | [YARN-3368] Launch new YARN UI under hadoop web app port | Major | . | Sunil G | Sunil G | +| [YARN-5698](https://issues.apache.org/jira/browse/YARN-5698) | [YARN-3368] Launch new YARN UI under hadoop web app port | Major | . | Sunil Govindan | Sunil Govindan | | [HDFS-10949](https://issues.apache.org/jira/browse/HDFS-10949) | DiskBalancer: deprecate TestDiskBalancer#setVolumeCapacity | Minor | balancer & mover | Xiaobing Zhou | Xiaobing Zhou | | [HADOOP-13417](https://issues.apache.org/jira/browse/HADOOP-13417) | Fix javac and checkstyle warnings in hadoop-auth package | Major | . | Kai Sasaki | Kai Sasaki | | [HDFS-10827](https://issues.apache.org/jira/browse/HDFS-10827) | When there are unrecoverable ec block groups, Namenode Web UI shows "There are X missing blocks." but doesn't show the block names. | Major | erasure-coding | Takanobu Asanuma | Takanobu Asanuma | | [HDFS-11008](https://issues.apache.org/jira/browse/HDFS-11008) | Change unit test for testing parsing "-source" parameter in Balancer CLI | Major | test | Mingliang Liu | Mingliang Liu | | [HDFS-10558](https://issues.apache.org/jira/browse/HDFS-10558) | DiskBalancer: Print the full path to plan file | Minor | balancer & mover | Anu Engineer | Xiaobing Zhou | | [YARN-5699](https://issues.apache.org/jira/browse/YARN-5699) | Retrospect yarn entity fields which are publishing in events info fields. | Major | . | Rohith Sharma K S | Rohith Sharma K S | -| [YARN-5145](https://issues.apache.org/jira/browse/YARN-5145) | [YARN-3368] Move new YARN UI configuration to HADOOP\_CONF\_DIR | Major | . | Wangda Tan | Sunil G | +| [YARN-5145](https://issues.apache.org/jira/browse/YARN-5145) | [YARN-3368] Move new YARN UI configuration to HADOOP\_CONF\_DIR | Major | . | Wangda Tan | Sunil Govindan | | [HDFS-11013](https://issues.apache.org/jira/browse/HDFS-11013) | Correct typos in native erasure coding dump code | Trivial | erasure-coding, native | László Bence Nagy | László Bence Nagy | | [HDFS-10922](https://issues.apache.org/jira/browse/HDFS-10922) | Adding additional unit tests for Trash (II) | Major | test | Xiaoyu Yao | Weiwei Yang | | [HDFS-9390](https://issues.apache.org/jira/browse/HDFS-9390) | Block management for maintenance states | Major | . | Ming Ma | Ming Ma | @@ -784,26 +785,27 @@ | [HDFS-10998](https://issues.apache.org/jira/browse/HDFS-10998) | Add unit tests for HDFS command 'dfsadmin -fetchImage' in HA | Major | test | Xiaobing Zhou | Xiaobing Zhou | | [YARN-4911](https://issues.apache.org/jira/browse/YARN-4911) | Bad placement policy in FairScheduler causes the RM to crash | Major | fairscheduler | Ray Chiang | Ray Chiang | | [YARN-5047](https://issues.apache.org/jira/browse/YARN-5047) | Refactor nodeUpdate across schedulers | Major | capacityscheduler, fairscheduler, scheduler | Ray Chiang | Ray Chiang | -| [HDFS-8410](https://issues.apache.org/jira/browse/HDFS-8410) | Add computation time metrics to datanode for ECWorker | Major | . | Li Bo | SammiChen | +| [HDFS-8410](https://issues.apache.org/jira/browse/HDFS-8410) | Add computation time metrics to datanode for ECWorker | Major | . | Li Bo | Sammi Chen | | [HDFS-10975](https://issues.apache.org/jira/browse/HDFS-10975) | fsck -list-corruptfileblocks does not report corrupt EC files | Major | . | Wei-Chiu Chuang | Takanobu Asanuma | | [HADOOP-13727](https://issues.apache.org/jira/browse/HADOOP-13727) | S3A: Reduce high number of connections to EC2 Instance Metadata Service caused by InstanceProfileCredentialsProvider. | Minor | fs/s3 | Rajesh Balamohan | Chris Nauroth | | [HADOOP-12774](https://issues.apache.org/jira/browse/HADOOP-12774) | s3a should use UGI.getCurrentUser.getShortname() for username | Major | fs/s3 | Steve Loughran | Steve Loughran | | [HADOOP-13309](https://issues.apache.org/jira/browse/HADOOP-13309) | Document S3A known limitations in file ownership and permission model. | Minor | fs/s3 | Chris Nauroth | Chris Nauroth | | [HDFS-11011](https://issues.apache.org/jira/browse/HDFS-11011) | Add unit tests for HDFS command 'dfsadmin -set/clrSpaceQuota' | Major | hdfs-client | Xiaobing Zhou | Xiaobing Zhou | | [HDFS-10638](https://issues.apache.org/jira/browse/HDFS-10638) | Modifications to remove the assumption that StorageLocation is associated with java.io.File in Datanode. | Major | datanode, fs | Virajith Jalaparti | Virajith Jalaparti | -| [HADOOP-11798](https://issues.apache.org/jira/browse/HADOOP-11798) | Native raw erasure coder in XOR codes | Major | io | Kai Zheng | SammiChen | +| [HADOOP-11798](https://issues.apache.org/jira/browse/HADOOP-11798) | Native raw erasure coder in XOR codes | Major | io | Kai Zheng | Sammi Chen | | [HADOOP-13614](https://issues.apache.org/jira/browse/HADOOP-13614) | Purge some superfluous/obsolete S3 FS tests that are slowing test runs down | Minor | fs/s3, test | Steve Loughran | Steve Loughran | | [HDFS-11038](https://issues.apache.org/jira/browse/HDFS-11038) | DiskBalancer: support running multiple commands in single test | Major | balancer & mover | Xiaobing Zhou | Xiaobing Zhou | | [YARN-5772](https://issues.apache.org/jira/browse/YARN-5772) | Replace old Hadoop logo with new one | Major | yarn-ui-v2 | Akira Ajisaka | Akhil PB | | [YARN-5500](https://issues.apache.org/jira/browse/YARN-5500) | 'Master node' link under application tab is broken | Critical | . | Sumana Sathish | Akhil PB | | [YARN-5497](https://issues.apache.org/jira/browse/YARN-5497) | Use different color for Undefined and Succeeded for Final State in applications page | Trivial | . | Yesha Vora | Akhil PB | -| [YARN-5490](https://issues.apache.org/jira/browse/YARN-5490) | [YARN-3368] Fix various alignment issues and broken breadcrumb link in Node page | Major | . | Sunil G | Akhil PB | +| [YARN-5490](https://issues.apache.org/jira/browse/YARN-5490) | [YARN-3368] Fix various alignment issues and broken breadcrumb link in Node page | Major | . | Sunil Govindan | Akhil PB | | [YARN-5779](https://issues.apache.org/jira/browse/YARN-5779) | [YARN-3368] Document limits/notes of the new YARN UI | Major | . | Wangda Tan | Wangda Tan | -| [YARN-5785](https://issues.apache.org/jira/browse/YARN-5785) | [YARN-3368] Accessing applications and containers list from Node page is throwing few exceptions in console | Major | yarn-ui-v2 | Sunil G | Akhil PB | +| [YARN-5785](https://issues.apache.org/jira/browse/YARN-5785) | [YARN-3368] Accessing applications and containers list from Node page is throwing few exceptions in console | Major | yarn-ui-v2 | Sunil Govindan | Akhil PB | | [YARN-5799](https://issues.apache.org/jira/browse/YARN-5799) | Fix Opportunistic Allocation to set the correct value of Node Http Address | Major | resourcemanager | Arun Suresh | Arun Suresh | | [YARN-4765](https://issues.apache.org/jira/browse/YARN-4765) | Split TestHBaseTimelineStorage into multiple test classes | Major | timelineserver | Varun Saxena | Varun Saxena | -| [YARN-5804](https://issues.apache.org/jira/browse/YARN-5804) | New UI2 is not able to launch with jetty 9 upgrade post HADOOP-10075 | Major | yarn-ui-v2 | Sunil G | Sunil G | +| [YARN-5804](https://issues.apache.org/jira/browse/YARN-5804) | New UI2 is not able to launch with jetty 9 upgrade post HADOOP-10075 | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | | [HADOOP-13680](https://issues.apache.org/jira/browse/HADOOP-13680) | fs.s3a.readahead.range to use getLongBytes | Major | fs/s3 | Steve Loughran | Abhishek Modi | +| [YARN-5793](https://issues.apache.org/jira/browse/YARN-5793) | Trim configuration values in DockerLinuxContainerRuntime | Minor | nodemanager | Tianyin Xu | Tianyin Xu | | [HDFS-11030](https://issues.apache.org/jira/browse/HDFS-11030) | TestDataNodeVolumeFailure#testVolumeFailure is flaky (though passing) | Major | datanode, test | Mingliang Liu | Mingliang Liu | | [HDFS-10997](https://issues.apache.org/jira/browse/HDFS-10997) | Reduce number of path resolving methods | Major | namenode | Daryn Sharp | Daryn Sharp | | [HDFS-10499](https://issues.apache.org/jira/browse/HDFS-10499) | TestNameNodeMetadataConsistency#testGenerationStampInFuture Fails Intermittently | Major | namenode, test | Hanisha Koneru | Yiqun Lin | @@ -833,12 +835,12 @@ | [HDFS-11113](https://issues.apache.org/jira/browse/HDFS-11113) | Document dfs.client.read.striped configuration in hdfs-default.xml | Minor | documentation, hdfs-client | Rakesh R | Rakesh R | | [HDFS-11148](https://issues.apache.org/jira/browse/HDFS-11148) | Update DataNode to use StorageLocationChecker at startup | Major | datanode | Arpit Agarwal | Arpit Agarwal | | [HADOOP-13655](https://issues.apache.org/jira/browse/HADOOP-13655) | document object store use with fs shell and distcp | Major | documentation, fs, fs/s3 | Steve Loughran | Steve Loughran | -| [YARN-5865](https://issues.apache.org/jira/browse/YARN-5865) | Retrospect updateApplicationPriority api to handle state store exception in align with YARN-5611 | Major | . | Sunil G | Sunil G | +| [YARN-5865](https://issues.apache.org/jira/browse/YARN-5865) | Retrospect updateApplicationPriority api to handle state store exception in align with YARN-5611 | Major | . | Sunil Govindan | Sunil Govindan | | [HADOOP-13801](https://issues.apache.org/jira/browse/HADOOP-13801) | regression: ITestS3AMiniYarnCluster failing | Major | fs/s3 | Steve Loughran | Steve Loughran | | [HDFS-10368](https://issues.apache.org/jira/browse/HDFS-10368) | Erasure Coding: Deprecate replication-related config keys | Major | erasure-coding | Rakesh R | Rakesh R | | [YARN-5649](https://issues.apache.org/jira/browse/YARN-5649) | Add REST endpoints for updating application timeouts | Major | scheduler | Rohith Sharma K S | Rohith Sharma K S | | [YARN-4206](https://issues.apache.org/jira/browse/YARN-4206) | Add Application timeouts in Application report and CLI | Major | scheduler | nijel | Rohith Sharma K S | -| [HDFS-10994](https://issues.apache.org/jira/browse/HDFS-10994) | Support an XOR policy XOR-2-1-64k in HDFS | Major | erasure-coding | SammiChen | SammiChen | +| [HDFS-10994](https://issues.apache.org/jira/browse/HDFS-10994) | Support an XOR policy XOR-2-1-64k in HDFS | Major | erasure-coding | Sammi Chen | Sammi Chen | | [HADOOP-13823](https://issues.apache.org/jira/browse/HADOOP-13823) | s3a rename: fail if dest file exists | Blocker | fs/s3 | Steve Loughran | Steve Loughran | | [HDFS-11149](https://issues.apache.org/jira/browse/HDFS-11149) | Support for parallel checking of FsVolumes | Major | datanode | Arpit Agarwal | Arpit Agarwal | | [HDFS-8678](https://issues.apache.org/jira/browse/HDFS-8678) | Bring back the feature to view chunks of files in the HDFS file browser | Major | ui | Ravi Prakash | Ravi Prakash | @@ -848,7 +850,7 @@ | [HADOOP-13257](https://issues.apache.org/jira/browse/HADOOP-13257) | Improve Azure Data Lake contract tests. | Major | fs/adl | Chris Nauroth | Vishwajeet Dusane | | [YARN-5746](https://issues.apache.org/jira/browse/YARN-5746) | The state of the parentQueue and its childQueues should be synchronized. | Major | capacity scheduler, resourcemanager | Xuan Gong | Xuan Gong | | [HDFS-11178](https://issues.apache.org/jira/browse/HDFS-11178) | TestAddStripedBlockInFBR#testAddBlockInFullBlockReport fails frequently in trunk | Major | test | Yiqun Lin | Yiqun Lin | -| [HDFS-11172](https://issues.apache.org/jira/browse/HDFS-11172) | Support an erasure coding policy using RS 10 + 4 | Major | erasure-coding | SammiChen | Wei Zhou | +| [HDFS-11172](https://issues.apache.org/jira/browse/HDFS-11172) | Support an erasure coding policy using RS 10 + 4 | Major | erasure-coding | Sammi Chen | Wei Zhou | | [YARN-5965](https://issues.apache.org/jira/browse/YARN-5965) | Retrospect ApplicationReport#getApplicationTimeouts | Major | scheduler | Jian He | Rohith Sharma K S | | [YARN-5922](https://issues.apache.org/jira/browse/YARN-5922) | Remove direct references of HBaseTimelineWriter/Reader in core ATS classes | Major | yarn | Haibo Chen | Haibo Chen | | [HDFS-8630](https://issues.apache.org/jira/browse/HDFS-8630) | WebHDFS : Support get/set/unset StoragePolicy | Major | webhdfs | nijel | Surendra Singh Lilhore | @@ -856,8 +858,8 @@ | [YARN-5982](https://issues.apache.org/jira/browse/YARN-5982) | Simplify opportunistic container parameters and metrics | Major | . | Konstantinos Karanasos | Konstantinos Karanasos | | [YARN-5970](https://issues.apache.org/jira/browse/YARN-5970) | Validate application update timeout request parameters | Major | scheduler | Rohith Sharma K S | Rohith Sharma K S | | [HADOOP-13871](https://issues.apache.org/jira/browse/HADOOP-13871) | ITestS3AInputStreamPerformance.testTimeToOpenAndReadWholeFileBlocks performance awful | Major | fs/s3 | Steve Loughran | Steve Loughran | -| [YARN-2009](https://issues.apache.org/jira/browse/YARN-2009) | CapacityScheduler: Add intra-queue preemption for app priority support | Major | capacityscheduler | Devaraj K | Sunil G | -| [HDFS-8411](https://issues.apache.org/jira/browse/HDFS-8411) | Add bytes count metrics to datanode for ECWorker | Major | . | Li Bo | SammiChen | +| [YARN-2009](https://issues.apache.org/jira/browse/YARN-2009) | CapacityScheduler: Add intra-queue preemption for app priority support | Major | capacityscheduler | Devaraj K | Sunil Govindan | +| [HDFS-8411](https://issues.apache.org/jira/browse/HDFS-8411) | Add bytes count metrics to datanode for ECWorker | Major | . | Li Bo | Sammi Chen | | [HADOOP-11804](https://issues.apache.org/jira/browse/HADOOP-11804) | Shaded Hadoop client artifacts and minicluster | Major | build | Sean Busbey | Sean Busbey | | [HDFS-11188](https://issues.apache.org/jira/browse/HDFS-11188) | Change min supported DN and NN versions back to 2.x | Critical | rolling upgrades | Andrew Wang | Andrew Wang | | [YARN-5524](https://issues.apache.org/jira/browse/YARN-5524) | Yarn live log aggregation does not throw if command line arg is wrong | Major | log-aggregation | Prasanth Jayachandran | Xuan Gong | @@ -876,8 +878,8 @@ | [YARN-5906](https://issues.apache.org/jira/browse/YARN-5906) | Update AppSchedulingInfo to use SchedulingPlacementSet | Major | . | Wangda Tan | Wangda Tan | | [YARN-6009](https://issues.apache.org/jira/browse/YARN-6009) | RM fails to start during an upgrade - Failed to load/recover state (YarnException: Invalid application timeout, value=0 for type=LIFETIME) | Critical | resourcemanager | Gour Saha | Rohith Sharma K S | | [YARN-6074](https://issues.apache.org/jira/browse/YARN-6074) | FlowRunEntity does not deserialize long values correctly | Major | timelinereader | Rohith Sharma K S | Rohith Sharma K S | -| [YARN-3955](https://issues.apache.org/jira/browse/YARN-3955) | Support for application priority ACLs in queues of CapacityScheduler | Major | capacityscheduler | Sunil G | Sunil G | -| [HDFS-11072](https://issues.apache.org/jira/browse/HDFS-11072) | Add ability to unset and change directory EC policy | Major | erasure-coding | Andrew Wang | SammiChen | +| [YARN-3955](https://issues.apache.org/jira/browse/YARN-3955) | Support for application priority ACLs in queues of CapacityScheduler | Major | capacityscheduler | Sunil Govindan | Sunil Govindan | +| [HDFS-11072](https://issues.apache.org/jira/browse/HDFS-11072) | Add ability to unset and change directory EC policy | Major | erasure-coding | Andrew Wang | Sammi Chen | | [HDFS-9391](https://issues.apache.org/jira/browse/HDFS-9391) | Update webUI/JMX to display maintenance state info | Major | . | Ming Ma | Manoj Govindassamy | | [YARN-5416](https://issues.apache.org/jira/browse/YARN-5416) | TestRMRestart#testRMRestartWaitForPreviousAMToFinish failed intermittently due to not wait SchedulerApplicationAttempt to be stopped | Minor | test, yarn | Junping Du | Junping Du | | [HADOOP-13336](https://issues.apache.org/jira/browse/HADOOP-13336) | S3A to support per-bucket configuration | Major | fs/s3 | Steve Loughran | Steve Loughran | @@ -895,13 +897,13 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | -| [HDFS-10861](https://issues.apache.org/jira/browse/HDFS-10861) | Refactor StripeReaders and use ECChunk version decode API | Major | . | SammiChen | SammiChen | -| [MAPREDUCE-6780](https://issues.apache.org/jira/browse/MAPREDUCE-6780) | Add support for striping files in benchmarking of TeraGen and TeraSort | Major | . | SammiChen | SammiChen | -| [HDFS-10895](https://issues.apache.org/jira/browse/HDFS-10895) | Update HDFS Erasure Coding doc to add how to use ISA-L based coder | Major | . | SammiChen | SammiChen | +| [HDFS-10861](https://issues.apache.org/jira/browse/HDFS-10861) | Refactor StripeReaders and use ECChunk version decode API | Major | . | Sammi Chen | Sammi Chen | +| [MAPREDUCE-6780](https://issues.apache.org/jira/browse/MAPREDUCE-6780) | Add support for striping files in benchmarking of TeraGen and TeraSort | Major | . | Sammi Chen | Sammi Chen | +| [HDFS-10895](https://issues.apache.org/jira/browse/HDFS-10895) | Update HDFS Erasure Coding doc to add how to use ISA-L based coder | Major | . | Sammi Chen | Sammi Chen | | [HDFS-10984](https://issues.apache.org/jira/browse/HDFS-10984) | Expose nntop output as metrics | Major | namenode | Siddharth Wagle | Siddharth Wagle | | [YARN-5717](https://issues.apache.org/jira/browse/YARN-5717) | Add tests for container-executor's is\_feature\_enabled function | Major | yarn | Sidharta Seethana | Sidharta Seethana | | [YARN-5704](https://issues.apache.org/jira/browse/YARN-5704) | Provide config knobs to control enabling/disabling new/work in progress features in container-executor | Major | yarn | Sidharta Seethana | Sidharta Seethana | -| [HDFS-11033](https://issues.apache.org/jira/browse/HDFS-11033) | Add documents for native raw erasure coder in XOR codes | Major | documentation, erasure-coding | SammiChen | SammiChen | +| [HDFS-11033](https://issues.apache.org/jira/browse/HDFS-11033) | Add documents for native raw erasure coder in XOR codes | Major | documentation, erasure-coding | Sammi Chen | Sammi Chen | | [YARN-5308](https://issues.apache.org/jira/browse/YARN-5308) | FairScheduler: Move continuous scheduling related tests to TestContinuousScheduling | Major | fairscheduler, test | Karthik Kambatla | Kai Sasaki | | [YARN-5822](https://issues.apache.org/jira/browse/YARN-5822) | Log ContainerRuntime initialization error in LinuxContainerExecutor | Trivial | nodemanager | Sidharta Seethana | Sidharta Seethana | | [HDFS-11145](https://issues.apache.org/jira/browse/HDFS-11145) | Implement getTrashRoot() for ViewFileSystem | Major | . | Manoj Govindassamy | Manoj Govindassamy | @@ -910,10 +912,11 @@ | [YARN-4395](https://issues.apache.org/jira/browse/YARN-4395) | Typo in comment in ClientServiceDelegate | Trivial | . | Daniel Templeton | Alison Yu | | [MAPREDUCE-6810](https://issues.apache.org/jira/browse/MAPREDUCE-6810) | hadoop-mapreduce-client-nativetask compilation broken on GCC-6.2.1 | Major | . | Ravi Prakash | Ravi Prakash | | [HADOOP-13840](https://issues.apache.org/jira/browse/HADOOP-13840) | Implement getUsed() for ViewFileSystem | Major | viewfs | Manoj Govindassamy | Manoj Govindassamy | -| [HDFS-11216](https://issues.apache.org/jira/browse/HDFS-11216) | Add remoteBytesRead counter metrics for erasure coding reconstruction task | Major | . | SammiChen | SammiChen | +| [HDFS-11216](https://issues.apache.org/jira/browse/HDFS-11216) | Add remoteBytesRead counter metrics for erasure coding reconstruction task | Major | . | Sammi Chen | Sammi Chen | | [YARN-5719](https://issues.apache.org/jira/browse/YARN-5719) | Enforce a C standard for native container-executor | Major | nodemanager | Chris Douglas | Chris Douglas | | [HADOOP-13885](https://issues.apache.org/jira/browse/HADOOP-13885) | Implement getLinkTarget for ViewFileSystem | Major | viewfs | Manoj Govindassamy | Manoj Govindassamy | | [YARN-5646](https://issues.apache.org/jira/browse/YARN-5646) | Add documentation and update config parameter names for scheduling of OPPORTUNISTIC containers | Blocker | . | Konstantinos Karanasos | Konstantinos Karanasos | | [HDFS-9809](https://issues.apache.org/jira/browse/HDFS-9809) | Abstract implementation-specific details from the datanode | Major | datanode, fs | Virajith Jalaparti | Virajith Jalaparti | +| [HADOOP-13835](https://issues.apache.org/jira/browse/HADOOP-13835) | Move Google Test Framework code from mapreduce to hadoop-common | Major | test | Varun Vasudev | Varun Vasudev | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/RELEASENOTES.3.0.0-alpha2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/RELEASENOTES.3.0.0-alpha2.md index 66344012bb1..c48ffc5c64d 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/RELEASENOTES.3.0.0-alpha2.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha2/RELEASENOTES.3.0.0-alpha2.md @@ -280,7 +280,7 @@ An unnecessary dependency on hadoop-mapreduce-client-shuffle in hadoop-mapreduce * [HADOOP-7352](https://issues.apache.org/jira/browse/HADOOP-7352) | *Major* | **FileSystem#listStatus should throw IOE upon access error** -Change FileSystem#listStatus contract to never return null. Local filesystems prior to 3.0.0 returned null upon access error. It is considered erroneous. We should expect FileSystem#listStatus to throw IOException upon access error. +Change FileSystem#listStatus contract to never return null. Local filesystems prior to 3.0.0 returned null upon access error. It is considered erroneous. We should expect FileSystem#listStatus to throw IOException upon access error. --- @@ -633,3 +633,18 @@ Skip instantiating a Timeline Service client if encountering NoClassDefFoundErro Hadoop now supports integration with Azure Data Lake as an alternative Hadoop-compatible file system. Please refer to the Hadoop site documentation of Azure Data Lake for details on usage and configuration. + +--- + +* [YARN-2877](https://issues.apache.org/jira/browse/YARN-2877) | *Major* | **Extend YARN to support distributed scheduling** + +With this JIRA we are introducing distributed scheduling in YARN. +In particular, we make the following contributions: +- Introduce the notion of container types. GUARANTEED containers follow the semantics of the existing YARN containers. OPPORTUNISTIC ones can be seen as lower priority containers, and can be preempted in order to make space for GUARANTEED containers to run. +- Queuing of tasks at the NMs. This enables us to send more containers in an NM than its available resources. At the moment we are allowing queuing of OPPORTUNISTIC containers. Once resources become available at the NM, such containers can immediately start their execution. +- Introduce the AMRMProxy. This is a service running at each node, intercepting the requests between the AM and the RM. It is instrumental for both distributed scheduling and YARN Federation (YARN-2915). +- Enable distributed scheduling. To minimize their allocation latency, OPPORTUNISTIC containers are dispatched immediately to NMs in a distributed fashion by using the AMRMProxy of the node where the corresponding AM resides, without needing to go through the ResourceManager. + +All the functionality introduced in this JIRA is disabled by default, so it will not affect the behavior of existing applications. +We have introduced parameters in YarnConfiguration to enable NM queuing (yarn.nodemanager.container-queuing-enabled), distributed scheduling (yarn.distributed-scheduling.enabled) and the AMRMProxy service (yarn.nodemanager.amrmproxy.enable). +AMs currently need to specify the type of container to be requested for each task. We are in the process of adding in the MapReduce AM the ability to randomly request OPPORTUNISTIC containers for a specified percentage of a job's tasks, so that users can experiment with the new features. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha3/CHANGES.3.0.0-alpha3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha3/CHANGELOG.3.0.0-alpha3.md similarity index 96% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha3/CHANGES.3.0.0-alpha3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha3/CHANGELOG.3.0.0-alpha3.md index 30b58cb23a8..be3aa95e87b 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha3/CHANGES.3.0.0-alpha3.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha3/CHANGELOG.3.0.0-alpha3.md @@ -40,7 +40,7 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | -| [YARN-6278](https://issues.apache.org/jira/browse/YARN-6278) | Enforce to use correct node and npm version in new YARN-UI build | Critical | . | Sunil G | Sunil G | -| [YARN-6336](https://issues.apache.org/jira/browse/YARN-6336) | Jenkins report YARN new UI build failure | Blocker | . | Junping Du | Sunil G | +| [YARN-6278](https://issues.apache.org/jira/browse/YARN-6278) | Enforce to use correct node and npm version in new YARN-UI build | Critical | . | Sunil Govindan | Sunil Govindan | +| [YARN-6336](https://issues.apache.org/jira/browse/YARN-6336) | Jenkins report YARN new UI build failure | Blocker | . | Junping Du | Sunil Govindan | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/CHANGES.3.0.0-alpha4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/CHANGELOG.3.0.0-alpha4.md similarity index 98% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/CHANGES.3.0.0-alpha4.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/CHANGELOG.3.0.0-alpha4.md index f013e8b386b..c1e8ef056a3 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/CHANGES.3.0.0-alpha4.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/CHANGELOG.3.0.0-alpha4.md @@ -25,6 +25,7 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | | [HDFS-10860](https://issues.apache.org/jira/browse/HDFS-10860) | Switch HttpFS from Tomcat to Jetty | Blocker | httpfs | John Zhuge | John Zhuge | +| [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | Add ability to secure log servlet using proxy users | Major | . | Jeffrey E Rodriguez | Yuanbo Liu | | [HADOOP-13929](https://issues.apache.org/jira/browse/HADOOP-13929) | ADLS connector should not check in contract-test-options.xml | Major | fs/adl, test | John Zhuge | John Zhuge | | [HDFS-11100](https://issues.apache.org/jira/browse/HDFS-11100) | Recursively deleting file protected by sticky bit should fail | Critical | fs | John Zhuge | John Zhuge | | [HADOOP-13805](https://issues.apache.org/jira/browse/HADOOP-13805) | UGI.getCurrentUser() fails if user does not have a keytab associated | Major | security | Alejandro Abdelnur | Xiao Chen | @@ -34,7 +35,7 @@ | [HDFS-11382](https://issues.apache.org/jira/browse/HDFS-11382) | Persist Erasure Coding Policy ID in a new optional field in INodeFile in FSImage | Major | hdfs | Manoj Govindassamy | Manoj Govindassamy | | [HDFS-11428](https://issues.apache.org/jira/browse/HDFS-11428) | Change setErasureCodingPolicy to take a required string EC policy name | Major | erasure-coding | Andrew Wang | Andrew Wang | | [HADOOP-14138](https://issues.apache.org/jira/browse/HADOOP-14138) | Remove S3A ref from META-INF service discovery, rely on existing core-default entry | Critical | fs/s3 | Steve Loughran | Steve Loughran | -| [HDFS-11152](https://issues.apache.org/jira/browse/HDFS-11152) | Start erasure coding policy ID number from 1 instead of 0 to void potential unexpected errors | Blocker | erasure-coding | SammiChen | SammiChen | +| [HDFS-11152](https://issues.apache.org/jira/browse/HDFS-11152) | Start erasure coding policy ID number from 1 instead of 0 to void potential unexpected errors | Blocker | erasure-coding | Sammi Chen | Sammi Chen | | [HDFS-11314](https://issues.apache.org/jira/browse/HDFS-11314) | Enforce set of enabled EC policies on the NameNode | Blocker | erasure-coding | Andrew Wang | Andrew Wang | | [HDFS-11505](https://issues.apache.org/jira/browse/HDFS-11505) | Do not enable any erasure coding policies by default | Major | erasure-coding | Andrew Wang | Manoj Govindassamy | | [HADOOP-10101](https://issues.apache.org/jira/browse/HADOOP-10101) | Update guava dependency to the latest version | Major | . | Rakesh R | Tsuyoshi Ozawa | @@ -64,13 +65,6 @@ | [YARN-6127](https://issues.apache.org/jira/browse/YARN-6127) | Add support for work preserving NM restart when AMRMProxy is enabled | Major | amrmproxy, nodemanager | Subru Krishnan | Botong Huang | -### IMPORTANT ISSUES: - -| JIRA | Summary | Priority | Component | Reporter | Contributor | -|:---- |:---- | :--- |:---- |:---- |:---- | -| [YARN-6959](https://issues.apache.org/jira/browse/YARN-6959) | RM may allocate wrong AM Container for new attempt | Major | capacity scheduler, fairscheduler, scheduler | Yuqi Wang | Yuqi Wang | - - ### NEW FEATURES: | JIRA | Summary | Priority | Component | Reporter | Contributor | @@ -85,6 +79,7 @@ | [MAPREDUCE-6871](https://issues.apache.org/jira/browse/MAPREDUCE-6871) | Allow users to specify racks and nodes for strict locality for AMs | Major | client | Robert Kanter | Robert Kanter | | [HDFS-11417](https://issues.apache.org/jira/browse/HDFS-11417) | Add datanode admin command to get the storage info. | Major | . | Surendra Singh Lilhore | Surendra Singh Lilhore | | [YARN-679](https://issues.apache.org/jira/browse/YARN-679) | add an entry point that can start any Yarn service | Major | api | Steve Loughran | Steve Loughran | +| [HDFS-10480](https://issues.apache.org/jira/browse/HDFS-10480) | Add an admin command to list currently open files | Major | . | Kihwal Lee | Manoj Govindassamy | ### IMPROVEMENTS: @@ -94,7 +89,6 @@ | [HADOOP-14002](https://issues.apache.org/jira/browse/HADOOP-14002) | Document -DskipShade property in BUILDING.txt | Minor | build, documentation | Hanisha Koneru | Hanisha Koneru | | [HADOOP-13956](https://issues.apache.org/jira/browse/HADOOP-13956) | Read ADLS credentials from Credential Provider | Critical | fs/adl | John Zhuge | John Zhuge | | [HADOOP-13962](https://issues.apache.org/jira/browse/HADOOP-13962) | Update ADLS SDK to 2.1.4 | Major | fs/adl | John Zhuge | John Zhuge | -| [YARN-5547](https://issues.apache.org/jira/browse/YARN-5547) | NMLeveldbStateStore should be more tolerant of unknown keys | Major | nodemanager | Jason Lowe | Ajith S | | [HADOOP-13990](https://issues.apache.org/jira/browse/HADOOP-13990) | Document KMS usage of CredentialProvider API | Minor | documentation, kms | John Zhuge | John Zhuge | | [HDFS-10534](https://issues.apache.org/jira/browse/HDFS-10534) | NameNode WebUI should display DataNode usage histogram | Major | namenode, ui | Zhe Zhang | Kai Sasaki | | [MAPREDUCE-6829](https://issues.apache.org/jira/browse/MAPREDUCE-6829) | Add peak memory usage counter for each task | Major | mrv2 | Yufei Gu | Miklos Szegedi | @@ -120,14 +114,13 @@ | [YARN-6174](https://issues.apache.org/jira/browse/YARN-6174) | Log files pattern should be same for both running and finished container | Major | yarn | Sumana Sathish | Xuan Gong | | [HDFS-11375](https://issues.apache.org/jira/browse/HDFS-11375) | Display the volume storage type in datanode UI | Minor | datanode, ui | Surendra Singh Lilhore | Surendra Singh Lilhore | | [YARN-6125](https://issues.apache.org/jira/browse/YARN-6125) | The application attempt's diagnostic message should have a maximum size | Critical | resourcemanager | Daniel Templeton | Andras Piros | -| [HADOOP-14077](https://issues.apache.org/jira/browse/HADOOP-14077) | Improve the patch of HADOOP-13119 | Major | security | Yuanbo Liu | Yuanbo Liu | | [HDFS-11406](https://issues.apache.org/jira/browse/HDFS-11406) | Remove unused getStartInstance and getFinalizeInstance in FSEditLogOp | Trivial | . | Andrew Wang | Alison Yu | | [HDFS-11438](https://issues.apache.org/jira/browse/HDFS-11438) | Fix typo in error message of StoragePolicyAdmin tool | Trivial | . | Alison Yu | Alison Yu | | [YARN-6194](https://issues.apache.org/jira/browse/YARN-6194) | Cluster capacity in SchedulingPolicy is updated only on allocation file reload | Major | fairscheduler | Karthik Kambatla | Yufei Gu | | [HADOOP-13321](https://issues.apache.org/jira/browse/HADOOP-13321) | Deprecate FileSystem APIs that promote inefficient call patterns. | Major | fs | Chris Nauroth | Mingliang Liu | | [HADOOP-14097](https://issues.apache.org/jira/browse/HADOOP-14097) | Remove Java6 specific code from GzipCodec.java | Minor | . | Akira Ajisaka | Elek, Marton | | [HADOOP-13817](https://issues.apache.org/jira/browse/HADOOP-13817) | Add a finite shell command timeout to ShellBasedUnixGroupsMapping | Minor | security | Harsh J | Harsh J | -| [HDFS-11295](https://issues.apache.org/jira/browse/HDFS-11295) | Check storage remaining instead of node remaining in BlockPlacementPolicyDefault.chooseReplicaToDelete() | Major | namenode | Xiao Liang | Elek, Marton | +| [HDFS-11295](https://issues.apache.org/jira/browse/HDFS-11295) | Check storage remaining instead of node remaining in BlockPlacementPolicyDefault.chooseReplicaToDelete() | Major | namenode | X. Liang | Elek, Marton | | [HADOOP-14127](https://issues.apache.org/jira/browse/HADOOP-14127) | Add log4j configuration to enable logging in hadoop-distcp's tests | Minor | test | Xiao Chen | Xiao Chen | | [HDFS-11466](https://issues.apache.org/jira/browse/HDFS-11466) | Change dfs.namenode.write-lock-reporting-threshold-ms default from 1000ms to 5000ms | Major | namenode | Andrew Wang | Andrew Wang | | [YARN-6189](https://issues.apache.org/jira/browse/YARN-6189) | Improve application status log message when RM restarted when app is in NEW state | Major | . | Yesha Vora | Junping Du | @@ -165,9 +158,9 @@ | [HADOOP-14233](https://issues.apache.org/jira/browse/HADOOP-14233) | Delay construction of PreCondition.check failure message in Configuration#set | Major | . | Jonathan Eagles | Jonathan Eagles | | [HADOOP-14240](https://issues.apache.org/jira/browse/HADOOP-14240) | Configuration#get return value optimization | Major | . | Jonathan Eagles | Jonathan Eagles | | [YARN-6339](https://issues.apache.org/jira/browse/YARN-6339) | Improve performance for createAndGetApplicationReport | Major | . | yunjiong zhao | yunjiong zhao | -| [HDFS-11170](https://issues.apache.org/jira/browse/HDFS-11170) | Add builder-based create API to FileSystem | Major | . | SammiChen | SammiChen | +| [HDFS-11170](https://issues.apache.org/jira/browse/HDFS-11170) | Add builder-based create API to FileSystem | Major | . | Sammi Chen | Sammi Chen | | [YARN-6329](https://issues.apache.org/jira/browse/YARN-6329) | Remove unnecessary TODO comment from AppLogAggregatorImpl.java | Minor | . | Akira Ajisaka | victor bertschinger | -| [HDFS-9705](https://issues.apache.org/jira/browse/HDFS-9705) | Refine the behaviour of getFileChecksum when length = 0 | Minor | . | Kai Zheng | SammiChen | +| [HDFS-9705](https://issues.apache.org/jira/browse/HDFS-9705) | Refine the behaviour of getFileChecksum when length = 0 | Minor | . | Kai Zheng | Sammi Chen | | [HADOOP-14250](https://issues.apache.org/jira/browse/HADOOP-14250) | Correct spelling of 'separate' and variants | Minor | . | Doris Gu | Doris Gu | | [HDFS-10974](https://issues.apache.org/jira/browse/HDFS-10974) | Document replication factor for EC files. | Major | documentation, erasure-coding | Wei-Chiu Chuang | Yiqun Lin | | [HDFS-11551](https://issues.apache.org/jira/browse/HDFS-11551) | Handle SlowDiskReport from DataNode at the NameNode | Major | hdfs | Hanisha Koneru | Hanisha Koneru | @@ -217,13 +210,13 @@ | [MAPREDUCE-6883](https://issues.apache.org/jira/browse/MAPREDUCE-6883) | AuditLogger and TestAuditLogger are dead code | Minor | client | Daniel Templeton | Vrushali C | | [HDFS-11800](https://issues.apache.org/jira/browse/HDFS-11800) | Document output of 'hdfs count -u' should contain PATHNAME | Minor | hdfs | Xiaobing Zhou | Xiaobing Zhou | | [HADOOP-14413](https://issues.apache.org/jira/browse/HADOOP-14413) | Add Javadoc comment for jitter parameter on CachingGetSpaceUsed | Trivial | . | Erik Krogen | Erik Krogen | -| [HDFS-11757](https://issues.apache.org/jira/browse/HDFS-11757) | Query StreamCapabilities when creating balancer's lock file | Major | balancer & mover | Andrew Wang | SammiChen | +| [HDFS-11757](https://issues.apache.org/jira/browse/HDFS-11757) | Query StreamCapabilities when creating balancer's lock file | Major | balancer & mover | Andrew Wang | Sammi Chen | | [HDFS-11641](https://issues.apache.org/jira/browse/HDFS-11641) | Reduce cost of audit logging by using FileStatus instead of HdfsFileStatus | Major | hdfs | Daryn Sharp | Daryn Sharp | | [YARN-6447](https://issues.apache.org/jira/browse/YARN-6447) | Provide container sandbox policies for groups | Minor | nodemanager, yarn | Greg Phillips | Greg Phillips | | [HADOOP-14415](https://issues.apache.org/jira/browse/HADOOP-14415) | Use java.lang.AssertionError instead of junit.framework.AssertionFailedError | Minor | . | Akira Ajisaka | Chen Liang | | [HDFS-11803](https://issues.apache.org/jira/browse/HDFS-11803) | Add -v option for du command to show header line | Major | hdfs | Xiaobing Zhou | Xiaobing Zhou | | [YARN-6493](https://issues.apache.org/jira/browse/YARN-6493) | Print requested node partition in assignContainer logs | Major | . | Jonathan Hung | Jonathan Hung | -| [HDFS-11793](https://issues.apache.org/jira/browse/HDFS-11793) | Allow to enable user defined erasure coding policy | Major | erasure-coding | SammiChen | SammiChen | +| [HDFS-11793](https://issues.apache.org/jira/browse/HDFS-11793) | Allow to enable user defined erasure coding policy | Major | erasure-coding | Sammi Chen | Sammi Chen | | [HADOOP-14407](https://issues.apache.org/jira/browse/HADOOP-14407) | DistCp - Introduce a configurable copy buffer size | Major | tools/distcp | Omkar Aradhya K S | Omkar Aradhya K S | | [YARN-6582](https://issues.apache.org/jira/browse/YARN-6582) | FSAppAttempt demand can be updated atomically in updateDemand() | Major | . | Karthik Kambatla | Karthik Kambatla | | [HDFS-11421](https://issues.apache.org/jira/browse/HDFS-11421) | Make WebHDFS' ACLs RegEx configurable | Major | webhdfs | Harsh J | Harsh J | @@ -249,7 +242,7 @@ | [HADOOP-14524](https://issues.apache.org/jira/browse/HADOOP-14524) | Make CryptoCodec Closeable so it can be cleaned up proactively | Major | . | Xiao Chen | Xiao Chen | | [HADOOP-14424](https://issues.apache.org/jira/browse/HADOOP-14424) | Add CRC32C performance test. | Minor | common | LiXin Ge | LiXin Ge | | [HDFS-11345](https://issues.apache.org/jira/browse/HDFS-11345) | Document the configuration key for FSNamesystem lock fairness | Minor | documentation, namenode | Zhe Zhang | Erik Krogen | -| [HDFS-11647](https://issues.apache.org/jira/browse/HDFS-11647) | Add -E option in hdfs "count" command to show erasure policy summarization | Major | . | SammiChen | luhuichun | +| [HDFS-11647](https://issues.apache.org/jira/browse/HDFS-11647) | Add -E option in hdfs "count" command to show erasure policy summarization | Major | . | Sammi Chen | luhuichun | | [HDFS-11789](https://issues.apache.org/jira/browse/HDFS-11789) | Maintain Short-Circuit Read Statistics | Major | hdfs-client | Hanisha Koneru | Hanisha Koneru | | [HDFS-11943](https://issues.apache.org/jira/browse/HDFS-11943) | [Erasure coding] Warn log frequently print to screen in doEncode/doDecode functions | Major | erasure-coding, native | liaoyuxiangqin | liaoyuxiangqin | | [HDFS-11992](https://issues.apache.org/jira/browse/HDFS-11992) | Replace commons-logging APIs with slf4j in FsDatasetImpl | Major | . | Akira Ajisaka | hu xiaodong | @@ -258,14 +251,15 @@ | [HADOOP-14536](https://issues.apache.org/jira/browse/HADOOP-14536) | Update azure-storage sdk to version 5.3.0 | Major | fs/azure | Mingliang Liu | Georgi Chalakov | | [YARN-6738](https://issues.apache.org/jira/browse/YARN-6738) | LevelDBCacheTimelineStore should reuse ObjectMapper instances | Major | timelineserver | Zoltan Haindrich | Zoltan Haindrich | | [HADOOP-14515](https://issues.apache.org/jira/browse/HADOOP-14515) | Specifically configure zookeeper-related log levels in KMS log4j | Major | kms | Xiao Chen | Xiao Chen | -| [HDFS-11646](https://issues.apache.org/jira/browse/HDFS-11646) | Add -E option in 'ls' to list erasure coding policy of each file and directory if applicable | Major | erasure-coding | SammiChen | luhuichun | +| [HDFS-11646](https://issues.apache.org/jira/browse/HDFS-11646) | Add -E option in 'ls' to list erasure coding policy of each file and directory if applicable | Major | erasure-coding | Sammi Chen | luhuichun | | [HDFS-11881](https://issues.apache.org/jira/browse/HDFS-11881) | NameNode consumes a lot of memory for snapshot diff report generation | Major | hdfs, snapshots | Manoj Govindassamy | Manoj Govindassamy | | [HADOOP-14602](https://issues.apache.org/jira/browse/HADOOP-14602) | allow custom release notes/changelog during create-release | Minor | build, scripts | Allen Wittenauer | Allen Wittenauer | | [HADOOP-14611](https://issues.apache.org/jira/browse/HADOOP-14611) | NetworkTopology.DEFAULT\_HOST\_LEVEL is unused | Trivial | . | Daniel Templeton | Chen Liang | | [YARN-6751](https://issues.apache.org/jira/browse/YARN-6751) | Display reserved resources in web UI per queue | Major | fairscheduler, webapp | Abdullah Yousufi | Abdullah Yousufi | | [YARN-6280](https://issues.apache.org/jira/browse/YARN-6280) | Introduce deselect query param to skip ResourceRequest from getApp/getApps REST API | Major | resourcemanager, restapi | Lantao Jin | Lantao Jin | -| [HDFS-12138](https://issues.apache.org/jira/browse/HDFS-12138) | Remove redundant 'public' modifiers from BlockCollection | Trivial | namenode | Chen Liang | Chen Liang | | [YARN-6634](https://issues.apache.org/jira/browse/YARN-6634) | [API] Refactor ResourceManager WebServices to make API explicit | Critical | resourcemanager | Subru Krishnan | Giovanni Matteo Fumarola | +| [YARN-5547](https://issues.apache.org/jira/browse/YARN-5547) | NMLeveldbStateStore should be more tolerant of unknown keys | Major | nodemanager | Jason Lowe | Ajith S | +| [HADOOP-14077](https://issues.apache.org/jira/browse/HADOOP-14077) | Improve the patch of HADOOP-13119 | Major | security | Yuanbo Liu | Yuanbo Liu | ### BUG FIXES: @@ -275,7 +269,7 @@ | [HADOOP-13858](https://issues.apache.org/jira/browse/HADOOP-13858) | TestGridmixMemoryEmulation and TestResourceUsageEmulators fail on the environment other than Linux or Windows | Major | test | Akira Ajisaka | Akira Ajisaka | | [YARN-6012](https://issues.apache.org/jira/browse/YARN-6012) | Remove node label (removeFromClusterNodeLabels) document is missing | Major | documentation | Weiwei Yang | Ying Zhang | | [YARN-6117](https://issues.apache.org/jira/browse/YARN-6117) | SharedCacheManager does not start up | Major | . | Chris Trezzo | Chris Trezzo | -| [YARN-6082](https://issues.apache.org/jira/browse/YARN-6082) | Invalid REST api response for getApps since queueUsagePercentage is coming as INF | Critical | . | Sunil G | Sunil G | +| [YARN-6082](https://issues.apache.org/jira/browse/YARN-6082) | Invalid REST api response for getApps since queueUsagePercentage is coming as INF | Critical | . | Sunil Govindan | Sunil Govindan | | [HDFS-11365](https://issues.apache.org/jira/browse/HDFS-11365) | Log portnumber in PrivilegedNfsGatewayStarter | Minor | nfs | Mukul Kumar Singh | Mukul Kumar Singh | | [MAPREDUCE-6808](https://issues.apache.org/jira/browse/MAPREDUCE-6808) | Log map attempts as part of shuffle handler audit log | Major | . | Jonathan Eagles | Gergő Pásztor | | [HADOOP-13989](https://issues.apache.org/jira/browse/HADOOP-13989) | Remove erroneous source jar option from hadoop-client shade configuration | Minor | build | Joe Pallas | Joe Pallas | @@ -284,7 +278,7 @@ | [HDFS-11364](https://issues.apache.org/jira/browse/HDFS-11364) | Add a test to verify Audit log entries for setfacl/getfacl commands over FS shell | Major | hdfs, test | Manoj Govindassamy | Manoj Govindassamy | | [HDFS-11376](https://issues.apache.org/jira/browse/HDFS-11376) | Revert HDFS-8377 Support HTTP/2 in datanode | Major | datanode | Andrew Wang | Xiao Chen | | [HADOOP-13988](https://issues.apache.org/jira/browse/HADOOP-13988) | KMSClientProvider does not work with WebHDFS and Apache Knox w/ProxyUser | Major | common, kms | Greg Senia | Xiaoyu Yao | -| [HADOOP-14029](https://issues.apache.org/jira/browse/HADOOP-14029) | Fix KMSClientProvider for non-secure proxyuser use case | Major | common,kms | Xiaoyu Yao | Xiaoyu Yao | +| [HADOOP-14029](https://issues.apache.org/jira/browse/HADOOP-14029) | Fix KMSClientProvider for non-secure proxyuser use case | Major | kms | Xiaoyu Yao | Xiaoyu Yao | | [YARN-5641](https://issues.apache.org/jira/browse/YARN-5641) | Localizer leaves behind tarballs after container is complete | Major | . | Eric Badger | Eric Badger | | [HADOOP-13992](https://issues.apache.org/jira/browse/HADOOP-13992) | KMS should load SSL configuration the same way as SSLFactory | Major | kms, security | John Zhuge | John Zhuge | | [HDFS-11378](https://issues.apache.org/jira/browse/HDFS-11378) | Verify multiple DataNodes can be decommissioned/maintenance at the same time | Major | hdfs | Manoj Govindassamy | Manoj Govindassamy | @@ -308,7 +302,6 @@ | [YARN-6112](https://issues.apache.org/jira/browse/YARN-6112) | UpdateCallDuration is calculated only when debug logging is enabled | Major | fairscheduler | Yufei Gu | Yufei Gu | | [YARN-6144](https://issues.apache.org/jira/browse/YARN-6144) | FairScheduler: preempted resources can become negative | Blocker | fairscheduler, resourcemanager | Miklos Szegedi | Miklos Szegedi | | [YARN-6118](https://issues.apache.org/jira/browse/YARN-6118) | Add javadoc for Resources.isNone | Minor | scheduler | Karthik Kambatla | Andres Perez | -| [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | Add ability to secure log servlet using proxy users | Major | . | Jeffrey E Rodriguez | Yuanbo Liu | | [YARN-6166](https://issues.apache.org/jira/browse/YARN-6166) | Unnecessary INFO logs in AMRMClientAsyncImpl$CallbackHandlerThread.run | Trivial | . | Grant W | Grant W | | [HADOOP-14055](https://issues.apache.org/jira/browse/HADOOP-14055) | SwiftRestClient includes pass length in exception if auth fails | Minor | security | Marcell Hegedus | Marcell Hegedus | | [HDFS-11403](https://issues.apache.org/jira/browse/HDFS-11403) | Zookeper ACLs on NN HA enabled clusters to be handled consistently | Major | hdfs | Laszlo Puskas | Hanisha Koneru | @@ -316,7 +309,7 @@ | [YARN-3933](https://issues.apache.org/jira/browse/YARN-3933) | FairScheduler: Multiple calls to completedContainer are not safe | Major | fairscheduler | Lavkesh Lahngir | Shiwei Guo | | [HDFS-11407](https://issues.apache.org/jira/browse/HDFS-11407) | Document the missing usages of OfflineImageViewer processors | Minor | documentation, tools | Yiqun Lin | Yiqun Lin | | [HDFS-11408](https://issues.apache.org/jira/browse/HDFS-11408) | The config name of balance bandwidth is out of date | Minor | balancer & mover, documentation | Yiqun Lin | Yiqun Lin | -| [HADOOP-14058](https://issues.apache.org/jira/browse/HADOOP-14058) | Fix NativeS3FileSystemContractBaseTest#testDirWithDifferentMarkersWorks | Major | fs/s3, test | Akira Ajisaka | Yiqun Lin | +| [HADOOP-14058](https://issues.apache.org/jira/browse/HADOOP-14058) | Fix NativeS3FileSystemContractBaseTest#testDirWithDifferentMarkersWorks | Minor | fs/s3, test | Akira Ajisaka | Yiqun Lin | | [HDFS-11084](https://issues.apache.org/jira/browse/HDFS-11084) | Add a regression test for sticky bit support of OIV ReverseXML processor | Major | tools | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HDFS-11391](https://issues.apache.org/jira/browse/HDFS-11391) | Numeric usernames do no work with WebHDFS FS (write access) | Major | webhdfs | Pierre Villard | Pierre Villard | | [HADOOP-13924](https://issues.apache.org/jira/browse/HADOOP-13924) | Update checkstyle and checkstyle plugin version to handle indentation of JDK8 Lambdas | Major | . | Xiaoyu Yao | Akira Ajisaka | @@ -380,7 +373,7 @@ | [HADOOP-14156](https://issues.apache.org/jira/browse/HADOOP-14156) | Fix grammar error in ConfTest.java | Trivial | test | Andrey Dyatlov | Andrey Dyatlov | | [HDFS-11512](https://issues.apache.org/jira/browse/HDFS-11512) | Increase timeout on TestShortCircuitLocalRead#testSkipWithVerifyChecksum | Minor | . | Eric Badger | Eric Badger | | [HDFS-11499](https://issues.apache.org/jira/browse/HDFS-11499) | Decommissioning stuck because of failing recovery | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | -| [HDFS-11395](https://issues.apache.org/jira/browse/HDFS-11395) | RequestHedgingProxyProvider#RequestHedgingInvocationHandler hides the Exception thrown from NameNode | Major | ha | Nandakumar | Nandakumar | +| [HDFS-11395](https://issues.apache.org/jira/browse/HDFS-11395) | RequestHedgingProxyProvider#RequestHedgingInvocationHandler hides the Exception thrown from NameNode | Major | ha | Nanda kumar | Nanda kumar | | [HDFS-11526](https://issues.apache.org/jira/browse/HDFS-11526) | Fix confusing block recovery message | Minor | datanode | Wei-Chiu Chuang | Yiqun Lin | | [YARN-6327](https://issues.apache.org/jira/browse/YARN-6327) | Removing queues from CapacitySchedulerQueueManager and ParentQueue should be done with iterator | Major | capacityscheduler | Jonathan Hung | Jonathan Hung | | [HADOOP-14170](https://issues.apache.org/jira/browse/HADOOP-14170) | FileSystemContractBaseTest is not cleaning up test directory clearly | Major | fs | Mingliang Liu | Mingliang Liu | @@ -422,7 +415,7 @@ | [HADOOP-14247](https://issues.apache.org/jira/browse/HADOOP-14247) | FileContextMainOperationsBaseTest should clean up test root path | Minor | fs, test | Mingliang Liu | Mingliang Liu | | [MAPREDUCE-6862](https://issues.apache.org/jira/browse/MAPREDUCE-6862) | Fragments are not handled correctly by resource limit checking | Minor | . | Chris Trezzo | Chris Trezzo | | [MAPREDUCE-6873](https://issues.apache.org/jira/browse/MAPREDUCE-6873) | MR Job Submission Fails if MR framework application path not on defaultFS | Minor | mrv2 | Erik Krogen | Erik Krogen | -| [HADOOP-14256](https://issues.apache.org/jira/browse/HADOOP-14256) | [S3A DOC] Correct the format for "Seoul" example | Minor | documentation, s3 | Brahma Reddy Battula | Brahma Reddy Battula | +| [HADOOP-14256](https://issues.apache.org/jira/browse/HADOOP-14256) | [S3A DOC] Correct the format for "Seoul" example | Minor | documentation, fs/s3 | Brahma Reddy Battula | Brahma Reddy Battula | | [MAPREDUCE-6850](https://issues.apache.org/jira/browse/MAPREDUCE-6850) | Shuffle Handler keep-alive connections are closed from the server side | Major | . | Jonathan Eagles | Jonathan Eagles | | [MAPREDUCE-6836](https://issues.apache.org/jira/browse/MAPREDUCE-6836) | exception thrown when accessing the job configuration web UI | Minor | webapps | Sangjin Lee | Haibo Chen | | [HDFS-11592](https://issues.apache.org/jira/browse/HDFS-11592) | Closing a file has a wasteful preconditions in NameNode | Major | namenode | Eric Badger | Eric Badger | @@ -486,7 +479,7 @@ | [YARN-6363](https://issues.apache.org/jira/browse/YARN-6363) | Extending SLS: Synthetic Load Generator | Major | . | Carlo Curino | Carlo Curino | | [YARN-6153](https://issues.apache.org/jira/browse/YARN-6153) | keepContainer does not work when AM retry window is set | Major | resourcemanager | kyungwan nam | kyungwan nam | | [HDFS-11689](https://issues.apache.org/jira/browse/HDFS-11689) | New exception thrown by DFSClient#isHDFSEncryptionEnabled broke hacky hive code | Major | . | Yongjun Zhang | Yongjun Zhang | -| [YARN-5889](https://issues.apache.org/jira/browse/YARN-5889) | Improve and refactor user-limit calculation in capacity scheduler | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-5889](https://issues.apache.org/jira/browse/YARN-5889) | Improve and refactor user-limit calculation in capacity scheduler | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [HDFS-11529](https://issues.apache.org/jira/browse/HDFS-11529) | Add libHDFS API to return last exception | Critical | libhdfs | Sailesh Mukil | Sailesh Mukil | | [YARN-6500](https://issues.apache.org/jira/browse/YARN-6500) | Do not mount inaccessible cgroups directories in CgroupsLCEResourcesHandler | Major | nodemanager | Miklos Szegedi | Miklos Szegedi | | [HDFS-11691](https://issues.apache.org/jira/browse/HDFS-11691) | Add a proper scheme to the datanode links in NN web UI | Major | . | Kihwal Lee | Kihwal Lee | @@ -523,7 +516,7 @@ | [HADOOP-14306](https://issues.apache.org/jira/browse/HADOOP-14306) | TestLocalFileSystem tests have very low timeouts | Major | . | Eric Badger | Eric Badger | | [HADOOP-14372](https://issues.apache.org/jira/browse/HADOOP-14372) | TestSymlinkLocalFS timeouts are too low | Major | . | Eric Badger | Eric Badger | | [HDFS-11739](https://issues.apache.org/jira/browse/HDFS-11739) | Fix regression in tests caused by YARN-679 | Major | test | Steve Loughran | Steve Loughran | -| [HDFS-11643](https://issues.apache.org/jira/browse/HDFS-11643) | Add shouldReplicate option to create builder | Blocker | balancer & mover, erasure-coding | Andrew Wang | SammiChen | +| [HDFS-11643](https://issues.apache.org/jira/browse/HDFS-11643) | Add shouldReplicate option to create builder | Blocker | balancer & mover, erasure-coding | Andrew Wang | Sammi Chen | | [HADOOP-14380](https://issues.apache.org/jira/browse/HADOOP-14380) | Make the Guava version Hadoop which builds with configurable | Major | build | Steve Loughran | Steve Loughran | | [HDFS-11448](https://issues.apache.org/jira/browse/HDFS-11448) | JN log segment syncing should support HA upgrade | Major | hdfs | Hanisha Koneru | Hanisha Koneru | | [HADOOP-14207](https://issues.apache.org/jira/browse/HADOOP-14207) | "dfsadmin -refreshCallQueue" fails with DecayRpcScheduler | Blocker | rpc-server | Surendra Singh Lilhore | Surendra Singh Lilhore | @@ -587,7 +580,7 @@ | [YARN-6141](https://issues.apache.org/jira/browse/YARN-6141) | ppc64le on Linux doesn't trigger \_\_linux get\_executable codepath | Major | nodemanager | Sonia Garudi | Ayappan | | [HADOOP-14399](https://issues.apache.org/jira/browse/HADOOP-14399) | Configuration does not correctly XInclude absolute file URIs | Blocker | conf | Andrew Wang | Jonathan Eagles | | [HADOOP-14430](https://issues.apache.org/jira/browse/HADOOP-14430) | the accessTime of FileStatus returned by SFTPFileSystem's getFileStatus method is always 0 | Trivial | fs | Hongyuan Li | Hongyuan Li | -| [HDFS-11445](https://issues.apache.org/jira/browse/HDFS-11445) | FSCK shows overall health stauts as corrupt even one replica is corrupt | Critical | . | Brahma Reddy Battula | Brahma Reddy Battula | +| [HDFS-11445](https://issues.apache.org/jira/browse/HDFS-11445) | FSCK shows overall health status as corrupt even one replica is corrupt | Critical | . | Brahma Reddy Battula | Brahma Reddy Battula | | [HDFS-11878](https://issues.apache.org/jira/browse/HDFS-11878) | Fix journal missing log httpServerUrl address in JournalNodeSyncer | Major | hdfs | Hanisha Koneru | Hanisha Koneru | | [HDFS-11879](https://issues.apache.org/jira/browse/HDFS-11879) | Fix JN sync interval in case of exception | Major | . | Hanisha Koneru | Hanisha Koneru | | [YARN-6643](https://issues.apache.org/jira/browse/YARN-6643) | TestRMFailover fails rarely due to port conflict | Major | test | Robert Kanter | Robert Kanter | @@ -632,7 +625,7 @@ | [HDFS-11945](https://issues.apache.org/jira/browse/HDFS-11945) | Internal lease recovery may not be retried for a long time | Major | namenode | Kihwal Lee | Kihwal Lee | | [HADOOP-14283](https://issues.apache.org/jira/browse/HADOOP-14283) | Upgrade AWS SDK to 1.11.134 | Critical | fs/s3 | Aaron Fabbri | Aaron Fabbri | | [HADOOP-14512](https://issues.apache.org/jira/browse/HADOOP-14512) | WASB atomic rename should not throw exception if the file is neither in src nor in dst when doing the rename | Major | fs/azure | Duo Xu | Duo Xu | -| [YARN-6585](https://issues.apache.org/jira/browse/YARN-6585) | RM fails to start when upgrading from 2.7 to 2.8 for clusters with node labels. | Blocker | . | Eric Payne | Sunil G | +| [YARN-6585](https://issues.apache.org/jira/browse/YARN-6585) | RM fails to start when upgrading from 2.7 to 2.8 for clusters with node labels. | Blocker | . | Eric Payne | Sunil Govindan | | [YARN-6703](https://issues.apache.org/jira/browse/YARN-6703) | RM startup failure with old state store due to version mismatch | Critical | . | Bibin A Chundatt | Varun Saxena | | [HADOOP-14501](https://issues.apache.org/jira/browse/HADOOP-14501) | Switch from aalto-xml to woodstox to handle odd XML features | Blocker | conf | Andrew Wang | Jonathan Eagles | | [HDFS-11967](https://issues.apache.org/jira/browse/HDFS-11967) | TestJMXGet fails occasionally | Major | . | Arpit Agarwal | Arpit Agarwal | @@ -645,7 +638,7 @@ | [MAPREDUCE-6897](https://issues.apache.org/jira/browse/MAPREDUCE-6897) | Add Unit Test to make sure Job end notification get sent even appMaster stop get YarnRuntimeException | Minor | . | Junping Du | Gergely Novák | | [YARN-6517](https://issues.apache.org/jira/browse/YARN-6517) | Fix warnings from Spotbugs in hadoop-yarn-common | Major | . | Weiwei Yang | Weiwei Yang | | [HADOOP-14540](https://issues.apache.org/jira/browse/HADOOP-14540) | Replace MRv1 specific terms in HostsFileReader | Minor | documentation | Akira Ajisaka | hu xiaodong | -| [HDFS-11995](https://issues.apache.org/jira/browse/HDFS-11995) | HDFS Architecture documentation incorrectly describes writing to a local temporary file. | Minor | documentation | Chris Nauroth | Nandakumar | +| [HDFS-11995](https://issues.apache.org/jira/browse/HDFS-11995) | HDFS Architecture documentation incorrectly describes writing to a local temporary file. | Minor | documentation | Chris Nauroth | Nanda kumar | | [HDFS-11890](https://issues.apache.org/jira/browse/HDFS-11890) | Handle NPE in BlockRecoveryWorker when DN is getting shoutdown. | Major | datanode | Surendra Singh Lilhore | Surendra Singh Lilhore | | [HDFS-11736](https://issues.apache.org/jira/browse/HDFS-11736) | OIV tests should not write outside 'target' directory. | Major | . | Konstantin Shvachko | Yiqun Lin | | [YARN-6713](https://issues.apache.org/jira/browse/YARN-6713) | Fix dead link in the Javadoc of FairSchedulerEventLog.java | Minor | documentation | Akira Ajisaka | Weiwei Yang | @@ -672,11 +665,9 @@ | [YARN-6467](https://issues.apache.org/jira/browse/YARN-6467) | CSQueueMetrics needs to update the current metrics for default partition only | Major | capacity scheduler | Naganarasimha G R | Manikandan R | | [YARN-6743](https://issues.apache.org/jira/browse/YARN-6743) | yarn.resourcemanager.zk-max-znode-size.bytes description needs spaces in yarn-default.xml | Trivial | . | Daniel Templeton | Lori Loberg | | [MAPREDUCE-6536](https://issues.apache.org/jira/browse/MAPREDUCE-6536) | hadoop-pipes doesn't use maven properties for openssl | Blocker | pipes | Allen Wittenauer | Allen Wittenauer | -| [HADOOP-14479](https://issues.apache.org/jira/browse/HADOOP-14479) | Erasurecode testcase failures with native enabled | Critical | common | Ayappan | SammiChen | -| [HDFS-12043](https://issues.apache.org/jira/browse/HDFS-12043) | Add counters for block re-replication | Major | . | Chen Liang | Chen Liang | +| [HADOOP-14479](https://issues.apache.org/jira/browse/HADOOP-14479) | Erasurecode testcase failures with native enabled | Critical | common | Ayappan | Sammi Chen | | [YARN-6344](https://issues.apache.org/jira/browse/YARN-6344) | Add parameter for rack locality delay in CapacityScheduler | Major | capacityscheduler | Konstantinos Karanasos | Konstantinos Karanasos | | [MAPREDUCE-6697](https://issues.apache.org/jira/browse/MAPREDUCE-6697) | Concurrent task limits should only be applied when necessary | Major | mrv2 | Jason Lowe | Nathan Roberts | -| [HDFS-12107](https://issues.apache.org/jira/browse/HDFS-12107) | FsDatasetImpl#removeVolumes floods the logs when removing the volume | Major | . | Haohui Mai | Kelvin Chu | ### TESTS: @@ -734,14 +725,14 @@ | [HADOOP-14102](https://issues.apache.org/jira/browse/HADOOP-14102) | Relax error message assertion in S3A test ITestS3AEncryptionSSEC | Minor | fs/s3 | Mingliang Liu | Mingliang Liu | | [HDFS-4025](https://issues.apache.org/jira/browse/HDFS-4025) | QJM: Sychronize past log segments to JNs that missed them | Major | ha | Todd Lipcon | Hanisha Koneru | | [YARN-6069](https://issues.apache.org/jira/browse/YARN-6069) | CORS support in timeline v2 | Major | timelinereader | Sreenath Somarajapuram | Rohith Sharma K S | -| [YARN-6143](https://issues.apache.org/jira/browse/YARN-6143) | Fix incompatible issue caused by YARN-3583 | Blocker | rolling upgrade | Wangda Tan | Sunil G | +| [YARN-6143](https://issues.apache.org/jira/browse/YARN-6143) | Fix incompatible issue caused by YARN-3583 | Blocker | rolling upgrade | Wangda Tan | Sunil Govindan | | [HADOOP-14113](https://issues.apache.org/jira/browse/HADOOP-14113) | review ADL Docs | Minor | documentation, fs/adl | Steve Loughran | Steve Loughran | | [YARN-4779](https://issues.apache.org/jira/browse/YARN-4779) | Fix AM container allocation logic in SLS | Major | scheduler-load-simulator | Wangda Tan | Wangda Tan | | [YARN-6228](https://issues.apache.org/jira/browse/YARN-6228) | EntityGroupFSTimelineStore should allow configurable cache stores. | Major | timelineserver | Li Lu | Li Lu | -| [YARN-6215](https://issues.apache.org/jira/browse/YARN-6215) | FairScheduler preemption and update should not run concurrently | Major | fairscheduler, test | Sunil G | Tao Jie | +| [YARN-6215](https://issues.apache.org/jira/browse/YARN-6215) | FairScheduler preemption and update should not run concurrently | Major | fairscheduler, test | Sunil Govindan | Tao Jie | | [YARN-6123](https://issues.apache.org/jira/browse/YARN-6123) | [YARN-5864] Add a test to make sure queues of orderingPolicy will be updated when childQueues is added or removed. | Major | . | Wangda Tan | Wangda Tan | | [HADOOP-14118](https://issues.apache.org/jira/browse/HADOOP-14118) | move jets3t into a dependency on hadoop-aws JAR | Major | build, fs/s3 | Steve Loughran | Akira Ajisaka | -| [YARN-5335](https://issues.apache.org/jira/browse/YARN-5335) | Use em-table in app/nodes pages for new YARN UI | Major | . | Sunil G | Sunil G | +| [YARN-5335](https://issues.apache.org/jira/browse/YARN-5335) | Use em-table in app/nodes pages for new YARN UI | Major | . | Sunil Govindan | Sunil Govindan | | [HDFS-11450](https://issues.apache.org/jira/browse/HDFS-11450) | HDFS specific network topology classes with storage type info included | Major | namenode | Chen Liang | Chen Liang | | [HDFS-11412](https://issues.apache.org/jira/browse/HDFS-11412) | Maintenance minimum replication config value allowable range should be [0, DefaultReplication] | Major | datanode, namenode | Manoj Govindassamy | Manoj Govindassamy | | [HADOOP-14057](https://issues.apache.org/jira/browse/HADOOP-14057) | Fix package.html to compile with Java 9 | Major | documentation | Akira Ajisaka | Akira Ajisaka | @@ -778,7 +769,7 @@ | [HDFS-11577](https://issues.apache.org/jira/browse/HDFS-11577) | Combine the old and the new chooseRandom for better performance | Major | namenode | Chen Liang | Chen Liang | | [YARN-6357](https://issues.apache.org/jira/browse/YARN-6357) | Implement putEntitiesAsync API in TimelineCollector | Major | ATSv2, timelineserver | Joep Rottinghuis | Haibo Chen | | [HDFS-10971](https://issues.apache.org/jira/browse/HDFS-10971) | Distcp should not copy replication factor if source file is erasure coded | Blocker | distcp | Wei-Chiu Chuang | Manoj Govindassamy | -| [HDFS-11541](https://issues.apache.org/jira/browse/HDFS-11541) | Call RawErasureEncoder and RawErasureDecoder release() methods | Major | erasure-coding | László Bence Nagy | SammiChen | +| [HDFS-11541](https://issues.apache.org/jira/browse/HDFS-11541) | Call RawErasureEncoder and RawErasureDecoder release() methods | Major | erasure-coding | László Bence Nagy | Sammi Chen | | [YARN-5654](https://issues.apache.org/jira/browse/YARN-5654) | Not be able to run SLS with FairScheduler | Major | . | Wangda Tan | Yufei Gu | | [YARN-6342](https://issues.apache.org/jira/browse/YARN-6342) | Make TimelineV2Client's drain timeout after stop configurable | Major | . | Jian He | Haibo Chen | | [YARN-6376](https://issues.apache.org/jira/browse/YARN-6376) | Exceptions caused by synchronous putEntities requests can be swallowed | Critical | ATSv2 | Haibo Chen | Haibo Chen | @@ -790,15 +781,15 @@ | [HADOOP-14290](https://issues.apache.org/jira/browse/HADOOP-14290) | Update SLF4J from 1.7.10 to 1.7.25 | Major | . | Akira Ajisaka | Akira Ajisaka | | [YARN-5153](https://issues.apache.org/jira/browse/YARN-5153) | Add a toggle button to switch between timeline view / table view for containers and application-attempts in new YARN UI | Major | webapp | Wangda Tan | Akhil PB | | [YARN-6372](https://issues.apache.org/jira/browse/YARN-6372) | Add default value for NM disk validator | Major | nodemanager | Yufei Gu | Yufei Gu | -| [HDFS-10996](https://issues.apache.org/jira/browse/HDFS-10996) | Ability to specify per-file EC policy at create time | Major | erasure-coding | Andrew Wang | SammiChen | +| [HDFS-10996](https://issues.apache.org/jira/browse/HDFS-10996) | Ability to specify per-file EC policy at create time | Major | erasure-coding | Andrew Wang | Sammi Chen | | [HADOOP-14255](https://issues.apache.org/jira/browse/HADOOP-14255) | S3A to delete unnecessary fake directory objects in mkdirs() | Major | fs/s3 | Mingliang Liu | Mingliang Liu | | [YARN-6432](https://issues.apache.org/jira/browse/YARN-6432) | FairScheduler: Reserve preempted resources for corresponding applications | Major | . | Miklos Szegedi | Miklos Szegedi | | [HADOOP-14321](https://issues.apache.org/jira/browse/HADOOP-14321) | Explicitly exclude S3A root dir ITests from parallel runs | Minor | fs/s3, test | Steve Loughran | Steve Loughran | | [HADOOP-14241](https://issues.apache.org/jira/browse/HADOOP-14241) | Add ADLS sensitive config keys to default list | Minor | fs, fs/adl, security | John Zhuge | John Zhuge | -| [YARN-6402](https://issues.apache.org/jira/browse/YARN-6402) | Move 'Long Running Services' to an independent tab at top level for new Yarn UI | Major | webapp | Sunil G | Akhil PB | +| [YARN-6402](https://issues.apache.org/jira/browse/YARN-6402) | Move 'Long Running Services' to an independent tab at top level for new Yarn UI | Major | webapp | Sunil Govindan | Akhil PB | | [HADOOP-14324](https://issues.apache.org/jira/browse/HADOOP-14324) | Refine S3 server-side-encryption key as encryption secret; improve error reporting and diagnostics | Blocker | fs/s3 | Steve Loughran | Steve Loughran | -| [HDFS-11604](https://issues.apache.org/jira/browse/HDFS-11604) | Define and parse erasure code policies | Major | erasure-coding | Kai Zheng | Lin Zeng | -| [HADOOP-14261](https://issues.apache.org/jira/browse/HADOOP-14261) | Some refactoring work for erasure coding raw coder | Major | . | Kai Zheng | Lin Zeng | +| [HDFS-11604](https://issues.apache.org/jira/browse/HDFS-11604) | Define and parse erasure code policies | Major | erasure-coding | Kai Zheng | Frank Zeng | +| [HADOOP-14261](https://issues.apache.org/jira/browse/HADOOP-14261) | Some refactoring work for erasure coding raw coder | Major | . | Kai Zheng | Frank Zeng | | [YARN-6291](https://issues.apache.org/jira/browse/YARN-6291) | Introduce query parameters (sort, filter, etc.) for tables to keep state on refresh/navigation in new YARN UI | Major | . | Gergely Novák | Gergely Novák | | [HADOOP-14305](https://issues.apache.org/jira/browse/HADOOP-14305) | S3A SSE tests won't run in parallel: Bad request in directory GetFileStatus | Minor | fs/s3, test | Steve Loughran | Steve Moist | | [YARN-6423](https://issues.apache.org/jira/browse/YARN-6423) | Queue metrics doesn't work for Fair Scheduler in SLS | Major | scheduler-load-simulator | Yufei Gu | Yufei Gu | @@ -816,7 +807,7 @@ | [YARN-6522](https://issues.apache.org/jira/browse/YARN-6522) | Make SLS JSON input file format simple and scalable | Major | scheduler-load-simulator | Yufei Gu | Yufei Gu | | [HDFS-11530](https://issues.apache.org/jira/browse/HDFS-11530) | Use HDFS specific network topology to choose datanode in BlockPlacementPolicyDefault | Major | namenode | Yiqun Lin | Yiqun Lin | | [YARN-6565](https://issues.apache.org/jira/browse/YARN-6565) | Fix memory leak and finish app trigger in AMRMProxy | Critical | . | Botong Huang | Botong Huang | -| [HDFS-9342](https://issues.apache.org/jira/browse/HDFS-9342) | Erasure coding: client should update and commit block based on acknowledged size | Critical | erasure-coding | Zhe Zhang | SammiChen | +| [HDFS-9342](https://issues.apache.org/jira/browse/HDFS-9342) | Erasure coding: client should update and commit block based on acknowledged size | Critical | erasure-coding | Zhe Zhang | Sammi Chen | | [YARN-6234](https://issues.apache.org/jira/browse/YARN-6234) | Support multiple attempts on the node when AMRMProxy is enabled | Major | amrmproxy, federation, nodemanager | Subru Krishnan | Giovanni Matteo Fumarola | | [HADOOP-14384](https://issues.apache.org/jira/browse/HADOOP-14384) | Reduce the visibility of FileSystem#newFSDataOutputStreamBuilder before the API becomes stable | Blocker | fs | Lei (Eddy) Xu | Lei (Eddy) Xu | | [YARN-6563](https://issues.apache.org/jira/browse/YARN-6563) | ConcurrentModificationException in TimelineCollectorManager while stopping RM | Major | resourcemanager | Rohith Sharma K S | Haibo Chen | @@ -826,11 +817,10 @@ | [HADOOP-11572](https://issues.apache.org/jira/browse/HADOOP-11572) | s3a delete() operation fails during a concurrent delete of child entries | Major | fs/s3 | Steve Loughran | Steve Loughran | | [YARN-6560](https://issues.apache.org/jira/browse/YARN-6560) | SLS doesn't honor node total resource specified in sls-runner.xml | Major | . | Wangda Tan | Wangda Tan | | [HADOOP-14432](https://issues.apache.org/jira/browse/HADOOP-14432) | S3A copyFromLocalFile to be robust, tested | Major | fs | Steve Loughran | Steve Loughran | -| [YARN-5705](https://issues.apache.org/jira/browse/YARN-5705) | Show timeline data from ATS v2 in new web UI | Major | . | Sunil G | Akhil PB | +| [YARN-5705](https://issues.apache.org/jira/browse/YARN-5705) | Show timeline data from ATS v2 in new web UI | Major | . | Sunil Govindan | Akhil PB | | [YARN-6111](https://issues.apache.org/jira/browse/YARN-6111) | Rumen input does't work in SLS | Major | scheduler-load-simulator | YuJie Huang | Yufei Gu | | [HDFS-11535](https://issues.apache.org/jira/browse/HDFS-11535) | Performance analysis of new DFSNetworkTopology#chooseRandom | Major | namenode | Chen Liang | Chen Liang | -| [YARN-6627](https://issues.apache.org/jira/browse/YARN-6627) | Use deployed webapp folder to launch new YARN UI | Major | yarn-ui-v2 | Sunil G | Sunil G | -| [HDFS-11794](https://issues.apache.org/jira/browse/HDFS-11794) | Add ec sub command -listCodec to show currently supported ec codecs | Major | erasure-coding | SammiChen | SammiChen | +| [HDFS-11794](https://issues.apache.org/jira/browse/HDFS-11794) | Add ec sub command -listCodec to show currently supported ec codecs | Major | erasure-coding | Sammi Chen | Sammi Chen | | [HDFS-11823](https://issues.apache.org/jira/browse/HDFS-11823) | Extend TestDFSStripedIutputStream/TestDFSStripedOutputStream with a random EC policy | Major | erasure-coding, test | Takanobu Asanuma | Takanobu Asanuma | | [YARN-6587](https://issues.apache.org/jira/browse/YARN-6587) | Refactor of ResourceManager#startWebApp in a Util class | Major | nodemanager, resourcemanager | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | | [YARN-6555](https://issues.apache.org/jira/browse/YARN-6555) | Store application flow context in NM state store for work-preserving restart | Major | timelineserver | Vrushali C | Rohith Sharma K S | @@ -867,9 +857,10 @@ | [HADOOP-14297](https://issues.apache.org/jira/browse/HADOOP-14297) | Update the documentation about the new ec codecs config keys | Major | documentation | Kai Sasaki | Kai Sasaki | | [HADOOP-14609](https://issues.apache.org/jira/browse/HADOOP-14609) | NPE in AzureNativeFileSystemStore.checkContainer() if StorageException lacks an error code | Major | fs/azure | Steve Loughran | Steve Loughran | | [YARN-5311](https://issues.apache.org/jira/browse/YARN-5311) | Document graceful decommission CLI and usage | Major | documentation | Junping Du | Elek, Marton | -| [HADOOP-14601](https://issues.apache.org/jira/browse/HADOOP-14601) | Azure: Reuse ObjectMapper | Major | fa/azure | Mingliang Liu | Mingliang Liu | +| [HADOOP-14601](https://issues.apache.org/jira/browse/HADOOP-14601) | Azure: Reuse ObjectMapper | Major | fs/azure | Mingliang Liu | Mingliang Liu | | [HADOOP-14596](https://issues.apache.org/jira/browse/HADOOP-14596) | AWS SDK 1.11+ aborts() on close() if \> 0 bytes in stream; logs error | Blocker | fs/s3 | Steve Loughran | Steve Loughran | -| [YARN-2113](https://issues.apache.org/jira/browse/YARN-2113) | Add cross-user preemption within CapacityScheduler's leaf-queue | Major | capacity scheduler | Vinod Kumar Vavilapalli | Sunil G | +| [YARN-2113](https://issues.apache.org/jira/browse/YARN-2113) | Add cross-user preemption within CapacityScheduler's leaf-queue | Major | capacity scheduler | Vinod Kumar Vavilapalli | Sunil Govindan | +| [YARN-6627](https://issues.apache.org/jira/browse/YARN-6627) | Use deployed webapp folder to launch new YARN UI | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | ### OTHER: @@ -882,6 +873,6 @@ | [YARN-6411](https://issues.apache.org/jira/browse/YARN-6411) | Clean up the overwrite of createDispatcher() in subclass of MockRM | Minor | resourcemanager | Yufei Gu | Yufei Gu | | [HADOOP-14344](https://issues.apache.org/jira/browse/HADOOP-14344) | Revert HADOOP-13606 swift FS to add a service load metadata file | Major | . | John Zhuge | John Zhuge | | [HDFS-11717](https://issues.apache.org/jira/browse/HDFS-11717) | Add unit test for HDFS-11709 StandbyCheckpointer should handle non-existing legacyOivImageDir gracefully | Minor | ha, namenode | Erik Krogen | Erik Krogen | -| [HDFS-11870](https://issues.apache.org/jira/browse/HDFS-11870) | Add CLI cmd to enable/disable an erasure code policy | Major | erasure-coding | SammiChen | lufei | +| [HDFS-11870](https://issues.apache.org/jira/browse/HDFS-11870) | Add CLI cmd to enable/disable an erasure code policy | Major | erasure-coding | Sammi Chen | lufei | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/RELEASENOTES.3.0.0-alpha4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/RELEASENOTES.3.0.0-alpha4.md index dcffce435be..521feac9cc5 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/RELEASENOTES.3.0.0-alpha4.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-alpha4/RELEASENOTES.3.0.0-alpha4.md @@ -78,6 +78,13 @@ An `invalidateCache` command has been added to the KMS. The `rollNewVersion` semantics of the KMS has been improved so that after a key's version is rolled, `generateEncryptedKey` of that key guarantees to return the `EncryptedKeyVersion` based on the new key version. +--- + +* [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | *Major* | **Add ability to secure log servlet using proxy users** + +**WARNING: No release note provided for this change.** + + --- * [HADOOP-13075](https://issues.apache.org/jira/browse/HADOOP-13075) | *Major* | **Add support for SSE-KMS and SSE-C in s3a filesystem** @@ -498,9 +505,4 @@ The WASB FileSystem now uses version 5.3.0 of the Azure Storage SDK. Fix to wasb:// (Azure) file system that allows the concurrent I/O feature to be used with the secure mode feature. ---- - -* [YARN-6959](https://issues.apache.org/jira/browse/YARN-6959) | *Major* | **RM may allocate wrong AM Container for new attempt** - -ResourceManager will now record ResourceRequests from different attempts into different objects. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/CHANGES.3.0.0-beta1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/CHANGELOG.3.0.0-beta1.md similarity index 98% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/CHANGES.3.0.0-beta1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/CHANGELOG.3.0.0-beta1.md index 3ddf2ee93c1..9ddbb73e365 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/CHANGES.3.0.0-beta1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/CHANGELOG.3.0.0-beta1.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 3.0.0-beta1 - 2017-09-28 +## Release 3.0.0-beta1 - 2017-10-03 ### INCOMPATIBLE CHANGES: @@ -34,19 +34,19 @@ | [HADOOP-14260](https://issues.apache.org/jira/browse/HADOOP-14260) | Configuration.dumpConfiguration should redact sensitive information | Major | conf, security | Vihang Karajgaonkar | John Zhuge | | [HADOOP-14726](https://issues.apache.org/jira/browse/HADOOP-14726) | Mark FileStatus::isDir as final | Minor | fs | Chris Douglas | Chris Douglas | | [HDFS-12303](https://issues.apache.org/jira/browse/HDFS-12303) | Change default EC cell size to 1MB for better performance | Blocker | . | Wei Zhou | Wei Zhou | -| [HDFS-12258](https://issues.apache.org/jira/browse/HDFS-12258) | ec -listPolicies should list all policies in system, no matter it's enabled or disabled | Major | . | SammiChen | Wei Zhou | +| [HDFS-12258](https://issues.apache.org/jira/browse/HDFS-12258) | ec -listPolicies should list all policies in system, no matter it's enabled or disabled | Major | . | Sammi Chen | Wei Zhou | | [MAPREDUCE-6892](https://issues.apache.org/jira/browse/MAPREDUCE-6892) | Issues with the count of failed/killed tasks in the jhist file | Major | client, jobhistoryserver | Peter Bacsko | Peter Bacsko | | [HADOOP-14414](https://issues.apache.org/jira/browse/HADOOP-14414) | Calling maven-site-plugin directly for docs profile is unnecessary | Minor | . | Andras Bokor | Andras Bokor | | [HDFS-12218](https://issues.apache.org/jira/browse/HDFS-12218) | Rename split EC / replicated block metrics in BlockManager | Blocker | erasure-coding, metrics | Andrew Wang | Andrew Wang | | [HADOOP-14847](https://issues.apache.org/jira/browse/HADOOP-14847) | Remove Guava Supplier and change to java Supplier in AMRMClient and AMRMClientAysnc | Blocker | . | Bharat Viswanadham | Bharat Viswanadham | -| [HDFS-12414](https://issues.apache.org/jira/browse/HDFS-12414) | Ensure to use CLI command to enable/disable erasure coding policy | Major | . | SammiChen | SammiChen | +| [HDFS-12414](https://issues.apache.org/jira/browse/HDFS-12414) | Ensure to use CLI command to enable/disable erasure coding policy | Major | . | Sammi Chen | Sammi Chen | | [HDFS-12438](https://issues.apache.org/jira/browse/HDFS-12438) | Rename dfs.datanode.ec.reconstruction.stripedblock.threads.size to dfs.datanode.ec.reconstruction.threads | Major | . | Andrew Wang | Andrew Wang | | [HADOOP-14738](https://issues.apache.org/jira/browse/HADOOP-14738) | Remove S3N and obsolete bits of S3A; rework docs | Blocker | fs/s3 | Steve Loughran | Steve Loughran | -| [HDFS-7859](https://issues.apache.org/jira/browse/HDFS-7859) | Erasure Coding: Persist erasure coding policies in NameNode | Major | . | Kai Zheng | SammiChen | -| [HDFS-12395](https://issues.apache.org/jira/browse/HDFS-12395) | Support erasure coding policy operations in namenode edit log | Major | erasure-coding | SammiChen | SammiChen | +| [HDFS-7859](https://issues.apache.org/jira/browse/HDFS-7859) | Erasure Coding: Persist erasure coding policies in NameNode | Major | . | Kai Zheng | Sammi Chen | +| [HDFS-12395](https://issues.apache.org/jira/browse/HDFS-12395) | Support erasure coding policy operations in namenode edit log | Major | erasure-coding | Sammi Chen | Sammi Chen | | [HADOOP-14670](https://issues.apache.org/jira/browse/HADOOP-14670) | Increase minimum cmake version for all platforms | Major | build | Allen Wittenauer | Allen Wittenauer | -| [HDFS-12447](https://issues.apache.org/jira/browse/HDFS-12447) | Rename AddECPolicyResponse to AddErasureCodingPolicyResponse | Major | . | SammiChen | SammiChen | -| [HDFS-7337](https://issues.apache.org/jira/browse/HDFS-7337) | Configurable and pluggable erasure codec and policy | Critical | erasure-coding | Zhe Zhang | SammiChen | +| [HDFS-12447](https://issues.apache.org/jira/browse/HDFS-12447) | Rename AddECPolicyResponse to AddErasureCodingPolicyResponse | Major | . | Sammi Chen | Sammi Chen | +| [HDFS-7337](https://issues.apache.org/jira/browse/HDFS-7337) | Configurable and pluggable erasure codec and policy | Critical | erasure-coding | Zhe Zhang | Sammi Chen | ### IMPORTANT ISSUES: @@ -66,8 +66,10 @@ | [YARN-5355](https://issues.apache.org/jira/browse/YARN-5355) | YARN Timeline Service v.2: alpha 2 | Critical | timelineserver | Sangjin Lee | Vrushali C | | [HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345) | S3Guard: Improved Consistency for S3A | Major | fs/s3 | Chris Nauroth | Chris Nauroth | | [HADOOP-12077](https://issues.apache.org/jira/browse/HADOOP-12077) | Provide a multi-URI replication Inode for ViewFs | Major | fs | Gera Shegalov | Gera Shegalov | -| [HDFS-7877](https://issues.apache.org/jira/browse/HDFS-7877) | Support maintenance state for datanodes | Major | datanode, namenode | Ming Ma | Ming Ma | +| [HDFS-7877](https://issues.apache.org/jira/browse/HDFS-7877) | [Umbrella] Support maintenance state for datanodes | Major | datanode, namenode | Ming Ma | Ming Ma | | [YARN-2915](https://issues.apache.org/jira/browse/YARN-2915) | Enable YARN RM scale out via federation using multiple RM's | Major | nodemanager, resourcemanager | Sriram Rao | Subru Krishnan | +| [MAPREDUCE-6732](https://issues.apache.org/jira/browse/MAPREDUCE-6732) | mapreduce tasks for YARN Timeline Service v.2: alpha 2 | Major | . | Sangjin Lee | Vrushali C | +| [YARN-5220](https://issues.apache.org/jira/browse/YARN-5220) | Scheduling of OPPORTUNISTIC containers through YARN RM | Major | resourcemanager | Konstantinos Karanasos | Konstantinos Karanasos | ### IMPROVEMENTS: @@ -116,6 +118,7 @@ | [YARN-6802](https://issues.apache.org/jira/browse/YARN-6802) | Add Max AM Resource and AM Resource Usage to Leaf Queue View in FairScheduler WebUI | Major | fairscheduler | YunFan Zhou | YunFan Zhou | | [HDFS-12036](https://issues.apache.org/jira/browse/HDFS-12036) | Add audit log for some erasure coding operations | Major | namenode | Wei-Chiu Chuang | Huafeng Wang | | [HDFS-12264](https://issues.apache.org/jira/browse/HDFS-12264) | DataNode uses a deprecated method IoUtils#cleanup. | Major | . | Ajay Kumar | Ajay Kumar | +| [YARN-6757](https://issues.apache.org/jira/browse/YARN-6757) | Refactor the usage of yarn.nodemanager.linux-container-executor.cgroups.mount-path | Minor | nodemanager | Miklos Szegedi | Miklos Szegedi | | [YARN-6811](https://issues.apache.org/jira/browse/YARN-6811) | [ATS1.5] All history logs should be kept under its own User Directory. | Major | timelineclient, timelineserver | Rohith Sharma K S | Rohith Sharma K S | | [YARN-6879](https://issues.apache.org/jira/browse/YARN-6879) | TestLeafQueue.testDRFUserLimits() has commented out code | Trivial | capacity scheduler, test | Daniel Templeton | Angela Wang | | [MAPREDUCE-6923](https://issues.apache.org/jira/browse/MAPREDUCE-6923) | Optimize MapReduce Shuffle I/O for small partitions | Major | . | Robert Schmidtke | Robert Schmidtke | @@ -155,8 +158,7 @@ | [HDFS-12377](https://issues.apache.org/jira/browse/HDFS-12377) | Refactor TestReadStripedFileWithDecoding to avoid test timeouts | Major | erasure-coding | Andrew Wang | Andrew Wang | | [HADOOP-14827](https://issues.apache.org/jira/browse/HADOOP-14827) | Allow StopWatch to accept a Timer parameter for tests | Minor | common, test | Erik Krogen | Erik Krogen | | [HDFS-12131](https://issues.apache.org/jira/browse/HDFS-12131) | Add some of the FSNamesystem JMX values as metrics | Minor | hdfs, namenode | Erik Krogen | Erik Krogen | -| [HDFS-12402](https://issues.apache.org/jira/browse/HDFS-12402) | Refactor ErasureCodingPolicyManager and related codes | Major | erasure-coding | SammiChen | SammiChen | -| [YARN-6930](https://issues.apache.org/jira/browse/YARN-6930) | Admins should be able to explicitly enable specific LinuxContainerRuntime in the NodeManager | Major | nodemanager | Vinod Kumar Vavilapalli | Shane Kumpf | +| [HDFS-12402](https://issues.apache.org/jira/browse/HDFS-12402) | Refactor ErasureCodingPolicyManager and related codes | Major | erasure-coding | Sammi Chen | Sammi Chen | | [HADOOP-14844](https://issues.apache.org/jira/browse/HADOOP-14844) | Remove requirement to specify TenantGuid for MSI Token Provider | Major | fs/adl | Atul Sikaria | Atul Sikaria | | [YARN-6799](https://issues.apache.org/jira/browse/YARN-6799) | Remove the duplicated code in CGroupsHandlerImp.java | Trivial | nodemanager | Yufei Gu | weiyuan | | [HADOOP-14520](https://issues.apache.org/jira/browse/HADOOP-14520) | WASB: Block compaction for Azure Block Blobs | Major | fs/azure | Georgi Chalakov | Georgi Chalakov | @@ -175,7 +177,7 @@ | [HDFS-12441](https://issues.apache.org/jira/browse/HDFS-12441) | Suppress UnresolvedPathException in namenode log | Minor | . | Kihwal Lee | Kihwal Lee | | [HADOOP-13714](https://issues.apache.org/jira/browse/HADOOP-13714) | Tighten up our compatibility guidelines for Hadoop 3 | Blocker | documentation | Karthik Kambatla | Daniel Templeton | | [HDFS-12472](https://issues.apache.org/jira/browse/HDFS-12472) | Add JUNIT timeout to TestBlockStatsMXBean | Minor | . | Lei (Eddy) Xu | Bharat Viswanadham | -| [HDFS-12460](https://issues.apache.org/jira/browse/HDFS-12460) | Make addErasureCodingPolicy an idempotent operation | Major | erasure-coding | SammiChen | SammiChen | +| [HDFS-12460](https://issues.apache.org/jira/browse/HDFS-12460) | Make addErasureCodingPolicy an idempotent operation | Major | erasure-coding | Sammi Chen | Sammi Chen | | [HDFS-12479](https://issues.apache.org/jira/browse/HDFS-12479) | Some misuses of lock in DFSStripedOutputStream | Minor | erasure-coding | Huafeng Wang | Huafeng Wang | | [MAPREDUCE-6958](https://issues.apache.org/jira/browse/MAPREDUCE-6958) | Shuffle audit logger should log size of shuffle transfer | Minor | . | Jason Lowe | Jason Lowe | | [HDFS-12444](https://issues.apache.org/jira/browse/HDFS-12444) | Reduce runtime of TestWriteReadStripedFile | Major | erasure-coding, test | Andrew Wang | Huafeng Wang | @@ -185,6 +187,8 @@ | [HDFS-12530](https://issues.apache.org/jira/browse/HDFS-12530) | Processor argument in Offline Image Viewer should be case insensitive | Minor | tools | Hanisha Koneru | Hanisha Koneru | | [HDFS-12304](https://issues.apache.org/jira/browse/HDFS-12304) | Remove unused parameter from FsDatasetImpl#addVolume | Minor | . | Chen Liang | Chen Liang | | [YARN-65](https://issues.apache.org/jira/browse/YARN-65) | Reduce RM app memory footprint once app has completed | Major | resourcemanager | Jason Lowe | Manikandan R | +| [YARN-4879](https://issues.apache.org/jira/browse/YARN-4879) | Enhance Allocate Protocol to Identify Requests Explicitly | Major | applications, resourcemanager | Subru Krishnan | Subru Krishnan | +| [YARN-6930](https://issues.apache.org/jira/browse/YARN-6930) | Admins should be able to explicitly enable specific LinuxContainerRuntime in the NodeManager | Major | nodemanager | Vinod Kumar Vavilapalli | Shane Kumpf | ### BUG FIXES: @@ -222,12 +226,11 @@ | [YARN-6759](https://issues.apache.org/jira/browse/YARN-6759) | Fix TestRMRestart.testRMRestartWaitForPreviousAMToFinish failure | Major | . | Naganarasimha G R | Naganarasimha G R | | [YARN-3260](https://issues.apache.org/jira/browse/YARN-3260) | AM attempt fail to register before RM processes launch event | Critical | resourcemanager | Jason Lowe | Bibin A Chundatt | | [HDFS-12140](https://issues.apache.org/jira/browse/HDFS-12140) | Remove BPOfferService lock contention to get block pool id | Critical | datanode | Daryn Sharp | Daryn Sharp | -| [YARN-6625](https://issues.apache.org/jira/browse/YARN-6625) | yarn application -list returns a tracking URL for AM that doesn't work in secured and HA environment | Major | amrmproxy | Yufei Gu | Yufei Gu | | [HDFS-12112](https://issues.apache.org/jira/browse/HDFS-12112) | TestBlockManager#testBlockManagerMachinesArray sometimes fails with NPE | Minor | . | Wei-Chiu Chuang | Wei-Chiu Chuang | | [YARN-6714](https://issues.apache.org/jira/browse/YARN-6714) | IllegalStateException while handling APP\_ATTEMPT\_REMOVED event when async-scheduling enabled in CapacityScheduler | Major | . | Tao Yang | Tao Yang | | [MAPREDUCE-6889](https://issues.apache.org/jira/browse/MAPREDUCE-6889) | Add Job#close API to shutdown MR client services. | Major | . | Rohith Sharma K S | Rohith Sharma K S | | [MAPREDUCE-6910](https://issues.apache.org/jira/browse/MAPREDUCE-6910) | MapReduceTrackingUriPlugin can not return the right URI of history server with HTTPS | Major | jobhistoryserver | Lantao Jin | Lantao Jin | -| [HDFS-12154](https://issues.apache.org/jira/browse/HDFS-12154) | Incorrect javadoc description in StorageLocationChecker#check | Major | . | Nandakumar | Nandakumar | +| [HDFS-12154](https://issues.apache.org/jira/browse/HDFS-12154) | Incorrect javadoc description in StorageLocationChecker#check | Major | . | Nanda kumar | Nanda kumar | | [YARN-6798](https://issues.apache.org/jira/browse/YARN-6798) | Fix NM startup failure with old state store due to version mismatch | Major | nodemanager | Ray Chiang | Botong Huang | | [HADOOP-14637](https://issues.apache.org/jira/browse/HADOOP-14637) | GenericTestUtils.waitFor needs to check condition again after max wait time | Major | . | Daniel Templeton | Daniel Templeton | | [YARN-6819](https://issues.apache.org/jira/browse/YARN-6819) | Application report fails if app rejected due to nodesize | Major | . | Bibin A Chundatt | Bibin A Chundatt | @@ -244,7 +247,6 @@ | [YARN-6844](https://issues.apache.org/jira/browse/YARN-6844) | AMRMClientImpl.checkNodeLabelExpression() has wrong error message | Minor | . | Daniel Templeton | Manikandan R | | [YARN-6150](https://issues.apache.org/jira/browse/YARN-6150) | TestContainerManagerSecurity tests for Yarn Server are flakey | Major | test | Daniel Sturman | Daniel Sturman | | [YARN-6307](https://issues.apache.org/jira/browse/YARN-6307) | Refactor FairShareComparator#compare | Major | fairscheduler | Yufei Gu | Yufei Gu | -| [YARN-6102](https://issues.apache.org/jira/browse/YARN-6102) | RMActiveService context to be updated with new RMContext on failover | Critical | . | Ajith S | Rohith Sharma K S | | [HADOOP-14578](https://issues.apache.org/jira/browse/HADOOP-14578) | Bind IPC connections to kerberos UPN host for proxy users | Major | ipc | Daryn Sharp | Daryn Sharp | | [HDFS-11896](https://issues.apache.org/jira/browse/HDFS-11896) | Non-dfsUsed will be doubled on dead node re-registration | Blocker | . | Brahma Reddy Battula | Brahma Reddy Battula | | [HADOOP-14692](https://issues.apache.org/jira/browse/HADOOP-14692) | Upgrade Apache Rat | Trivial | build | Allen Wittenauer | Allen Wittenauer | @@ -260,13 +262,13 @@ | [HADOOP-14644](https://issues.apache.org/jira/browse/HADOOP-14644) | Increase max heap size of Maven javadoc plugin | Major | test | Andras Bokor | Andras Bokor | | [HADOOP-14343](https://issues.apache.org/jira/browse/HADOOP-14343) | Wrong pid file name in error message when starting secure daemon | Minor | . | Andras Bokor | Andras Bokor | | [MAPREDUCE-6921](https://issues.apache.org/jira/browse/MAPREDUCE-6921) | TestUmbilicalProtocolWithJobToken#testJobTokenRpc fails | Major | . | Sonia Garudi | Sonia Garudi | -| [HADOOP-14676](https://issues.apache.org/jira/browse/HADOOP-14676) | Wrong default value for "fs.df.interval" | Major | common, conf, fs | Konstantin Shvachko | xiangguang zheng | +| [HADOOP-14676](https://issues.apache.org/jira/browse/HADOOP-14676) | Wrong default value for "fs.df.interval" | Major | common, conf, fs | Konstantin Shvachko | Sherwood Zheng | | [HADOOP-14701](https://issues.apache.org/jira/browse/HADOOP-14701) | Configuration can log misleading warnings about an attempt to override final parameter | Major | conf | Andrew Sherman | Andrew Sherman | -| [YARN-5731](https://issues.apache.org/jira/browse/YARN-5731) | Preemption calculation is not accurate when reserved containers are present in queue. | Major | capacity scheduler | Sunil G | Wangda Tan | +| [YARN-5731](https://issues.apache.org/jira/browse/YARN-5731) | Preemption calculation is not accurate when reserved containers are present in queue. | Major | capacity scheduler | Sunil Govindan | Wangda Tan | | [HADOOP-14683](https://issues.apache.org/jira/browse/HADOOP-14683) | FileStatus.compareTo binary compatible issue | Blocker | . | Sergey Shelukhin | Akira Ajisaka | | [HDFS-12107](https://issues.apache.org/jira/browse/HDFS-12107) | FsDatasetImpl#removeVolumes floods the logs when removing the volume | Major | . | Haohui Mai | Kelvin Chu | | [HADOOP-14702](https://issues.apache.org/jira/browse/HADOOP-14702) | Fix formatting issue and regression caused by conversion from APT to Markdown | Minor | documentation | Doris Gu | Doris Gu | -| [YARN-6872](https://issues.apache.org/jira/browse/YARN-6872) | Ensure apps could run given NodeLabels are disabled post RM switchover/restart | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-6872](https://issues.apache.org/jira/browse/YARN-6872) | Ensure apps could run given NodeLabels are disabled post RM switchover/restart | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-12217](https://issues.apache.org/jira/browse/HDFS-12217) | HDFS snapshots doesn't capture all open files when one of the open files is deleted | Major | snapshots | Manoj Govindassamy | Manoj Govindassamy | | [YARN-6846](https://issues.apache.org/jira/browse/YARN-6846) | Nodemanager can fail to fully delete application local directories when applications are killed | Critical | nodemanager | Jason Lowe | Jason Lowe | | [YARN-6678](https://issues.apache.org/jira/browse/YARN-6678) | Handle IllegalStateException in Async Scheduling mode of CapacityScheduler | Major | capacityscheduler | Tao Yang | Tao Yang | @@ -280,7 +282,6 @@ | [HADOOP-14730](https://issues.apache.org/jira/browse/HADOOP-14730) | Support protobuf FileStatus in AdlFileSystem | Major | . | Vishwajeet Dusane | Chris Douglas | | [HDFS-12198](https://issues.apache.org/jira/browse/HDFS-12198) | Document missing namenode metrics that were added recently | Minor | documentation | Yiqun Lin | Yiqun Lin | | [HADOOP-14680](https://issues.apache.org/jira/browse/HADOOP-14680) | Azure: IndexOutOfBoundsException in BlockBlobInputStream | Minor | fs/azure | Rajesh Balamohan | Thomas Marquardt | -| [YARN-6757](https://issues.apache.org/jira/browse/YARN-6757) | Refactor the usage of yarn.nodemanager.linux-container-executor.cgroups.mount-path | Minor | nodemanager | Miklos Szegedi | Miklos Szegedi | | [MAPREDUCE-6927](https://issues.apache.org/jira/browse/MAPREDUCE-6927) | MR job should only set tracking url if history was successfully written | Major | . | Eric Badger | Eric Badger | | [YARN-6890](https://issues.apache.org/jira/browse/YARN-6890) | If UI is not secured, we allow user to kill other users' job even yarn cluster is secured. | Critical | . | Sumana Sathish | Junping Du | | [HDFS-10326](https://issues.apache.org/jira/browse/HDFS-10326) | Disable setting tcp socket send/receive buffers for write pipelines | Major | datanode, hdfs | Daryn Sharp | Daryn Sharp | @@ -336,14 +337,12 @@ | [YARN-6756](https://issues.apache.org/jira/browse/YARN-6756) | ContainerRequest#executionTypeRequest causes NPE | Critical | . | Jian He | Jian He | | [HDFS-12191](https://issues.apache.org/jira/browse/HDFS-12191) | Provide option to not capture the accessTime change of a file to snapshot if no other modification has been done to this file | Major | hdfs, namenode | Yongjun Zhang | Yongjun Zhang | | [YARN-6982](https://issues.apache.org/jira/browse/YARN-6982) | Potential issue on setting AMContainerSpec#tokenConf to null before app is completed | Major | . | Rohith Sharma K S | Manikandan R | -| [YARN-6868](https://issues.apache.org/jira/browse/YARN-6868) | Add test scope to certain entries in hadoop-yarn-server-resourcemanager pom.xml | Major | yarn | Ray Chiang | Ray Chiang | | [HDFS-12336](https://issues.apache.org/jira/browse/HDFS-12336) | Listing encryption zones still fails when deleted EZ is not a direct child of snapshottable directory | Minor | encryption, hdfs | Wellington Chevreuil | Wellington Chevreuil | | [HADOOP-14814](https://issues.apache.org/jira/browse/HADOOP-14814) | Fix incompatible API change on FsServerDefaults to HADOOP-14104 | Blocker | . | Junping Du | Junping Du | | [MAPREDUCE-6931](https://issues.apache.org/jira/browse/MAPREDUCE-6931) | Remove TestDFSIO "Total Throughput" calculation | Critical | benchmarks, test | Dennis Huo | Dennis Huo | | [YARN-7115](https://issues.apache.org/jira/browse/YARN-7115) | Move BoundedAppender to org.hadoop.yarn.util pacakge | Major | . | Jian He | Jian He | | [YARN-7077](https://issues.apache.org/jira/browse/YARN-7077) | TestAMSimulator and TestNMSimulator fail | Major | test | Akira Ajisaka | Akira Ajisaka | | [YARN-7116](https://issues.apache.org/jira/browse/YARN-7116) | CapacityScheduler Web UI: Queue's AM usage is always show on per-user's AM usage. | Major | capacity scheduler, webapp | Wangda Tan | Wangda Tan | -| [YARN-6726](https://issues.apache.org/jira/browse/YARN-6726) | Fix issues with docker commands executed by container-executor | Major | nodemanager | Shane Kumpf | Shane Kumpf | | [HADOOP-14364](https://issues.apache.org/jira/browse/HADOOP-14364) | refresh changelog/release notes with newer Apache Yetus build | Major | build, documentation | Allen Wittenauer | Allen Wittenauer | | [HDFS-12317](https://issues.apache.org/jira/browse/HDFS-12317) | HDFS metrics render error in the page of Github | Minor | documentation, metrics | Yiqun Lin | Yiqun Lin | | [HADOOP-14824](https://issues.apache.org/jira/browse/HADOOP-14824) | Update ADLS SDK to 2.2.2 for MSI fix | Major | fs/adl | Atul Sikaria | Atul Sikaria | @@ -359,7 +358,7 @@ | [HADOOP-14820](https://issues.apache.org/jira/browse/HADOOP-14820) | Wasb mkdirs security checks inconsistent with HDFS | Major | fs/azure | Sivaguru Sankaridurg | Sivaguru Sankaridurg | | [HDFS-12359](https://issues.apache.org/jira/browse/HDFS-12359) | Re-encryption should operate with minimum KMS ACL requirements. | Major | encryption | Xiao Chen | Xiao Chen | | [HDFS-11882](https://issues.apache.org/jira/browse/HDFS-11882) | Precisely calculate acked length of striped block groups in updatePipeline | Critical | erasure-coding, test | Akira Ajisaka | Andrew Wang | -| [HDFS-12392](https://issues.apache.org/jira/browse/HDFS-12392) | Writing striped file failed due to different cell size | Major | erasure-coding | SammiChen | SammiChen | +| [HDFS-12392](https://issues.apache.org/jira/browse/HDFS-12392) | Writing striped file failed due to different cell size | Major | erasure-coding | Sammi Chen | Sammi Chen | | [YARN-7164](https://issues.apache.org/jira/browse/YARN-7164) | TestAMRMClientOnRMRestart fails sporadically with bind address in use | Major | test | Jason Lowe | Jason Lowe | | [YARN-6992](https://issues.apache.org/jira/browse/YARN-6992) | Kill application button is visible even if the application is FINISHED in RM UI | Major | . | Sumana Sathish | Suma Shivaprasad | | [HDFS-12357](https://issues.apache.org/jira/browse/HDFS-12357) | Let NameNode to bypass external attribute provider for special user | Major | . | Yongjun Zhang | Yongjun Zhang | @@ -378,7 +377,6 @@ | [HADOOP-14867](https://issues.apache.org/jira/browse/HADOOP-14867) | Update HDFS Federation setup document, for incorrect property name for secondary name node http address | Major | . | Bharat Viswanadham | Bharat Viswanadham | | [YARN-4727](https://issues.apache.org/jira/browse/YARN-4727) | Unable to override the $HADOOP\_CONF\_DIR env variable for container | Major | nodemanager | Terence Yim | Jason Lowe | | [MAPREDUCE-6957](https://issues.apache.org/jira/browse/MAPREDUCE-6957) | shuffle hangs after a node manager connection timeout | Major | mrv2 | Jooseong Kim | Jooseong Kim | -| [YARN-7146](https://issues.apache.org/jira/browse/YARN-7146) | Many RM unit tests failing with FairScheduler | Major | test | Robert Kanter | Robert Kanter | | [HDFS-12457](https://issues.apache.org/jira/browse/HDFS-12457) | Revert HDFS-11156 Add new op GETFILEBLOCKLOCATIONS to WebHDFS REST API | Major | webhdfs | Andrew Wang | Andrew Wang | | [HDFS-12378](https://issues.apache.org/jira/browse/HDFS-12378) | TestClientProtocolForPipelineRecovery#testZeroByteBlockRecovery fails on trunk | Blocker | test | Xiao Chen | Lei (Eddy) Xu | | [HDFS-12456](https://issues.apache.org/jira/browse/HDFS-12456) | TestNamenodeMetrics.testSyncAndBlockReportMetric fails | Minor | hdfs, metrics | Bharat Viswanadham | Bharat Viswanadham | @@ -386,7 +384,7 @@ | [HDFS-12424](https://issues.apache.org/jira/browse/HDFS-12424) | Datatable sorting on the Datanode Information page in the Namenode UI is broken | Major | . | Shawna Martell | Shawna Martell | | [HADOOP-14853](https://issues.apache.org/jira/browse/HADOOP-14853) | hadoop-mapreduce-client-app is not a client module | Major | . | Haibo Chen | Haibo Chen | | [HDFS-12323](https://issues.apache.org/jira/browse/HDFS-12323) | NameNode terminates after full GC thinking QJM unresponsive if full GC is much longer than timeout | Major | namenode, qjm | Erik Krogen | Erik Krogen | -| [HDFS-10701](https://issues.apache.org/jira/browse/HDFS-10701) | TestDFSStripedOutputStreamWithFailure#testBlockTokenExpired occasionally fails | Major | erasure-coding | Wei-Chiu Chuang | SammiChen | +| [HDFS-10701](https://issues.apache.org/jira/browse/HDFS-10701) | TestDFSStripedOutputStreamWithFailure#testBlockTokenExpired occasionally fails | Major | erasure-coding | Wei-Chiu Chuang | Sammi Chen | | [YARN-6977](https://issues.apache.org/jira/browse/YARN-6977) | Node information is not provided for non am containers in RM logs | Major | capacity scheduler | Sumana Sathish | Suma Shivaprasad | | [YARN-7149](https://issues.apache.org/jira/browse/YARN-7149) | Cross-queue preemption sometimes starves an underserved queue | Major | capacity scheduler | Eric Payne | Eric Payne | | [YARN-7192](https://issues.apache.org/jira/browse/YARN-7192) | Add a pluggable StateMachine Listener that is notified of NM Container State changes | Major | . | Arun Suresh | Arun Suresh | @@ -394,7 +392,6 @@ | [MAPREDUCE-6960](https://issues.apache.org/jira/browse/MAPREDUCE-6960) | Shuffle Handler prints disk error stack traces for every read failure. | Major | . | Kuhu Shukla | Kuhu Shukla | | [HDFS-12480](https://issues.apache.org/jira/browse/HDFS-12480) | TestNameNodeMetrics#testTransactionAndCheckpointMetrics Fails in trunk | Blocker | test | Brahma Reddy Battula | Hanisha Koneru | | [HDFS-11799](https://issues.apache.org/jira/browse/HDFS-11799) | Introduce a config to allow setting up write pipeline with fewer nodes than replication factor | Major | . | Yongjun Zhang | Brahma Reddy Battula | -| [HDFS-12449](https://issues.apache.org/jira/browse/HDFS-12449) | TestReconstructStripedFile.testNNSendsErasureCodingTasks randomly cannot finish in 60s | Major | erasure-coding | SammiChen | SammiChen | | [HDFS-12437](https://issues.apache.org/jira/browse/HDFS-12437) | Fix test setup in TestLeaseRecoveryStriped | Major | erasure-coding, test | Arpit Agarwal | Andrew Wang | | [YARN-7196](https://issues.apache.org/jira/browse/YARN-7196) | Fix finicky TestContainerManager tests | Major | . | Arun Suresh | Arun Suresh | | [YARN-6771](https://issues.apache.org/jira/browse/YARN-6771) | Use classloader inside configuration class to make new classes | Major | . | Jongyoul Lee | Jongyoul Lee | @@ -410,15 +407,20 @@ | [MAPREDUCE-6967](https://issues.apache.org/jira/browse/MAPREDUCE-6967) | gridmix/SleepReducer should use Time.monotonicNow for measuring durations | Minor | . | Chetna Chaudhari | Chetna Chaudhari | | [YARN-7153](https://issues.apache.org/jira/browse/YARN-7153) | Remove duplicated code in AMRMClientAsyncImpl.java | Minor | client | Sen Zhao | Sen Zhao | | [HADOOP-14897](https://issues.apache.org/jira/browse/HADOOP-14897) | Loosen compatibility guidelines for native dependencies | Blocker | documentation, native | Chris Douglas | Daniel Templeton | -| [HADOOP-14903](https://issues.apache.org/jira/browse/HADOOP-14903) | Add json-smart explicitly to pom.xml | Major | common | Ray Chiang | Ray Chiang | | [YARN-7118](https://issues.apache.org/jira/browse/YARN-7118) | AHS REST API can return NullPointerException | Major | . | Prabhu Joseph | Billie Rinaldi | | [HDFS-12495](https://issues.apache.org/jira/browse/HDFS-12495) | TestPendingInvalidateBlock#testPendingDeleteUnknownBlocks fails intermittently | Major | . | Eric Badger | Eric Badger | | [MAPREDUCE-6968](https://issues.apache.org/jira/browse/MAPREDUCE-6968) | Staging directory erasure coding config property has a typo | Major | client | Jason Lowe | Jason Lowe | | [HADOOP-14822](https://issues.apache.org/jira/browse/HADOOP-14822) | hadoop-project/pom.xml is executable | Minor | . | Akira Ajisaka | Ajay Kumar | -| [YARN-7157](https://issues.apache.org/jira/browse/YARN-7157) | Add admin configuration to filter per-user's apps in secure cluster | Major | webapp | Sunil G | Sunil G | +| [YARN-7157](https://issues.apache.org/jira/browse/YARN-7157) | Add admin configuration to filter per-user's apps in secure cluster | Major | webapp | Sunil Govindan | Sunil Govindan | | [YARN-7253](https://issues.apache.org/jira/browse/YARN-7253) | Shared Cache Manager daemon command listed as admin subcmd in yarn script | Trivial | . | Chris Trezzo | Chris Trezzo | | [YARN-7257](https://issues.apache.org/jira/browse/YARN-7257) | AggregatedLogsBlock reports a bad 'end' value as a bad 'start' value | Major | log-aggregation | Jason Lowe | Jason Lowe | | [HDFS-12458](https://issues.apache.org/jira/browse/HDFS-12458) | TestReencryptionWithKMS fails regularly | Major | encryption, test | Konstantin Shvachko | Xiao Chen | +| [HDFS-10576](https://issues.apache.org/jira/browse/HDFS-10576) | DiskBalancer followup work items | Major | balancer & mover | Anu Engineer | Anu Engineer | +| [YARN-6625](https://issues.apache.org/jira/browse/YARN-6625) | yarn application -list returns a tracking URL for AM that doesn't work in secured and HA environment | Major | amrmproxy | Yufei Gu | Yufei Gu | +| [YARN-7146](https://issues.apache.org/jira/browse/YARN-7146) | Many RM unit tests failing with FairScheduler | Major | test | Robert Kanter | Robert Kanter | +| [YARN-4742](https://issues.apache.org/jira/browse/YARN-4742) | [Umbrella] Enhancements to Distributed Scheduling | Major | . | Arun Suresh | Arun Suresh | +| [YARN-6102](https://issues.apache.org/jira/browse/YARN-6102) | RMActiveService context to be updated with new RMContext on failover | Critical | . | Ajith S | Rohith Sharma K S | +| [HADOOP-14903](https://issues.apache.org/jira/browse/HADOOP-14903) | Add json-smart explicitly to pom.xml | Major | common | Ray Chiang | Ray Chiang | ### TESTS: @@ -568,12 +570,11 @@ | [HADOOP-14660](https://issues.apache.org/jira/browse/HADOOP-14660) | wasb: improve throughput by 34% when account limit exceeded | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | | [YARN-7006](https://issues.apache.org/jira/browse/YARN-7006) | [ATSv2 Security] Changes for authentication for CollectorNodemanagerProtocol | Major | timelineserver | Varun Saxena | Varun Saxena | | [YARN-6900](https://issues.apache.org/jira/browse/YARN-6900) | ZooKeeper based implementation of the FederationStateStore | Major | federation, nodemanager, resourcemanager | Subru Krishnan | Íñigo Goiri | -| [HDFS-11082](https://issues.apache.org/jira/browse/HDFS-11082) | Provide replicated EC policy to replicate files | Critical | erasure-coding | Rakesh R | SammiChen | +| [HDFS-11082](https://issues.apache.org/jira/browse/HDFS-11082) | Provide replicated EC policy to replicate files | Critical | erasure-coding | Rakesh R | Sammi Chen | | [YARN-6988](https://issues.apache.org/jira/browse/YARN-6988) | container-executor fails for docker when command length \> 4096 B | Major | yarn | Eric Badger | Eric Badger | | [YARN-7038](https://issues.apache.org/jira/browse/YARN-7038) | [Atsv2 Security] CollectorNodemanagerProtocol RPC interface doesn't work when service authorization is enabled | Major | . | Rohith Sharma K S | Varun Saxena | | [HADOOP-14769](https://issues.apache.org/jira/browse/HADOOP-14769) | WASB: delete recursive should not fail if a file is deleted | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | | [HADOOP-14398](https://issues.apache.org/jira/browse/HADOOP-14398) | Modify documents for the FileSystem Builder API | Major | fs | Lei (Eddy) Xu | Lei (Eddy) Xu | -| [YARN-6852](https://issues.apache.org/jira/browse/YARN-6852) | [YARN-6223] Native code changes to support isolate GPU devices by using CGroups | Major | . | Wangda Tan | Wangda Tan | | [YARN-7041](https://issues.apache.org/jira/browse/YARN-7041) | Nodemanager NPE running jobs with security off | Major | timelineserver | Aaron Gresch | Varun Saxena | | [YARN-6134](https://issues.apache.org/jira/browse/YARN-6134) | [ATSv2 Security] Regenerate delegation token for app just before token expires if app collector is active | Major | timelineserver | Varun Saxena | Varun Saxena | | [YARN-6979](https://issues.apache.org/jira/browse/YARN-6979) | Add flag to notify all types of container updates to NM via NodeHeartbeatResponse | Major | . | Arun Suresh | kartheek muthyala | @@ -585,16 +586,16 @@ | [YARN-5603](https://issues.apache.org/jira/browse/YARN-5603) | Metrics for Federation StateStore | Major | . | Subru Krishnan | Ellen Hui | | [YARN-6923](https://issues.apache.org/jira/browse/YARN-6923) | Metrics for Federation Router | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | | [HADOOP-14787](https://issues.apache.org/jira/browse/HADOOP-14787) | AliyunOSS: Implement the \`createNonRecursive\` operator | Major | fs, fs/oss | Genmao Yu | Genmao Yu | -| [HADOOP-14649](https://issues.apache.org/jira/browse/HADOOP-14649) | Update aliyun-sdk-oss version to 2.8.1 | Major | . | Ray Chiang | Genmao Yu | +| [HADOOP-14649](https://issues.apache.org/jira/browse/HADOOP-14649) | Update aliyun-sdk-oss version to 2.8.1 | Major | fs/oss | Ray Chiang | Genmao Yu | | [YARN-7047](https://issues.apache.org/jira/browse/YARN-7047) | Moving logging APIs over to slf4j in hadoop-yarn-server-nodemanager | Major | . | Yeliang Cang | Yeliang Cang | | [YARN-6876](https://issues.apache.org/jira/browse/YARN-6876) | Create an abstract log writer for extendability | Major | . | Xuan Gong | Xuan Gong | | [YARN-6386](https://issues.apache.org/jira/browse/YARN-6386) | Show decommissioning nodes in new YARN UI | Major | webapp | Elek, Marton | Elek, Marton | | [YARN-7010](https://issues.apache.org/jira/browse/YARN-7010) | Federation: routing REST invocations transparently to multiple RMs (part 2 - getApps) | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | -| [YARN-5219](https://issues.apache.org/jira/browse/YARN-5219) | When an export var command fails in launch\_container.sh, the full container launch should fail | Major | . | Hitesh Shah | Sunil G | +| [YARN-5219](https://issues.apache.org/jira/browse/YARN-5219) | When an export var command fails in launch\_container.sh, the full container launch should fail | Major | . | Hitesh Shah | Sunil Govindan | | [HADOOP-14802](https://issues.apache.org/jira/browse/HADOOP-14802) | Add support for using container saskeys for all accesses | Major | fs/azure | Sivaguru Sankaridurg | Sivaguru Sankaridurg | | [YARN-7094](https://issues.apache.org/jira/browse/YARN-7094) | Document the current known issue with server-side NM graceful decom | Blocker | graceful | Robert Kanter | Robert Kanter | | [YARN-7095](https://issues.apache.org/jira/browse/YARN-7095) | Federation: routing getNode/getNodes/getMetrics REST invocations transparently to multiple RMs | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | -| [YARN-6033](https://issues.apache.org/jira/browse/YARN-6033) | Add support for sections in container-executor configuration file | Major | nodemanager | Varun Vasudev | Varun Vasudev | +| [YARN-6726](https://issues.apache.org/jira/browse/YARN-6726) | Fix issues with docker commands executed by container-executor | Major | nodemanager | Shane Kumpf | Shane Kumpf | | [YARN-6877](https://issues.apache.org/jira/browse/YARN-6877) | Create an abstract log reader for extendability | Major | . | Xuan Gong | Xuan Gong | | [YARN-7071](https://issues.apache.org/jira/browse/YARN-7071) | Add vcores and number of containers in new YARN UI node heat map | Major | yarn-ui-v2 | Abdullah Yousufi | Abdullah Yousufi | | [YARN-7075](https://issues.apache.org/jira/browse/YARN-7075) | Better styling for donut charts in new YARN UI | Major | . | Da Ding | Da Ding | @@ -624,15 +625,17 @@ | [HDFS-12473](https://issues.apache.org/jira/browse/HDFS-12473) | Change hosts JSON file format | Major | . | Ming Ma | Ming Ma | | [HDFS-11035](https://issues.apache.org/jira/browse/HDFS-11035) | Better documentation for maintenace mode and upgrade domain | Major | datanode, documentation | Wei-Chiu Chuang | Ming Ma | | [YARN-4266](https://issues.apache.org/jira/browse/YARN-4266) | Allow users to enter containers as UID:GID pair instead of by username | Major | yarn | Sidharta Seethana | luhuichun | -| [MAPREDUCE-6947](https://issues.apache.org/jira/browse/MAPREDUCE-6947) | Moving logging APIs over to slf4j in hadoop-mapreduce-examples | Major | . | Gergely Novák | Gergely Novák | +| [MAPREDUCE-6947](https://issues.apache.org/jira/browse/MAPREDUCE-6947) | Moving logging APIs over to slf4j in hadoop-mapreduce-examples | Major | . | Gergely Novák | Gergely Novák | | [HADOOP-14799](https://issues.apache.org/jira/browse/HADOOP-14799) | Update nimbus-jose-jwt to 4.41.1 | Major | . | Ray Chiang | Ray Chiang | | [HADOOP-14892](https://issues.apache.org/jira/browse/HADOOP-14892) | MetricsSystemImpl should use Time.monotonicNow for measuring durations | Minor | . | Chetna Chaudhari | Chetna Chaudhari | | [HADOOP-14881](https://issues.apache.org/jira/browse/HADOOP-14881) | LoadGenerator should use Time.monotonicNow() to measure durations | Major | . | Chetna Chaudhari | Bharat Viswanadham | -| [HADOOP-14220](https://issues.apache.org/jira/browse/HADOOP-14220) | Enhance S3GuardTool with bucket-info and set-capacity commands, tests | Major | fs/s3 | Steve Loughran | Steve Loughran | | [HADOOP-14893](https://issues.apache.org/jira/browse/HADOOP-14893) | WritableRpcEngine should use Time.monotonicNow | Minor | . | Chetna Chaudhari | Chetna Chaudhari | | [HADOOP-14890](https://issues.apache.org/jira/browse/HADOOP-14890) | Move up to AWS SDK 1.11.199 | Blocker | build, fs/s3 | Steve Loughran | Steve Loughran | | [HDFS-12386](https://issues.apache.org/jira/browse/HDFS-12386) | Add fsserver defaults call to WebhdfsFileSystem. | Minor | webhdfs | Rushabh S Shah | Rushabh S Shah | | [YARN-6691](https://issues.apache.org/jira/browse/YARN-6691) | Update YARN daemon startup/shutdown scripts to include Router service | Major | nodemanager, resourcemanager | Subru Krishnan | Giovanni Matteo Fumarola | +| [HADOOP-14220](https://issues.apache.org/jira/browse/HADOOP-14220) | Enhance S3GuardTool with bucket-info and set-capacity commands, tests | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-6033](https://issues.apache.org/jira/browse/YARN-6033) | Add support for sections in container-executor configuration file | Major | nodemanager | Varun Vasudev | Varun Vasudev | +| [YARN-6852](https://issues.apache.org/jira/browse/YARN-6852) | [YARN-6223] Native code changes to support isolate GPU devices by using CGroups | Major | . | Wangda Tan | Wangda Tan | ### OTHER: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/RELEASENOTES.3.0.0-beta1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/RELEASENOTES.3.0.0-beta1.md index c7999334195..203654ea77d 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/RELEASENOTES.3.0.0-beta1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0-beta1/RELEASENOTES.3.0.0-beta1.md @@ -374,4 +374,17 @@ This allows users to: \* define their own erasure coding policies thru an xml file and CLI command. The added policies will be persisted into fsimage. +--- + +* [YARN-2915](https://issues.apache.org/jira/browse/YARN-2915) | *Major* | **Enable YARN RM scale out via federation using multiple RM's** + +A federation-based approach to transparently scale a single YARN cluster to tens of thousands of nodes, by federating multiple YARN standalone clusters (sub-clusters). The applications running in this federated environment will see a single massive YARN cluster and will be able to schedule tasks on any node of the federated cluster. Under the hood, the federation system will negotiate with sub-clusters ResourceManagers and provide resources to the application. The goal is to allow an individual job to “span” sub-clusters seamlessly. + + +--- + +* [YARN-5220](https://issues.apache.org/jira/browse/YARN-5220) | *Major* | **Scheduling of OPPORTUNISTIC containers through YARN RM** + +This extends the centralized YARN RM in to enable the scheduling of OPPORTUNISTIC containers in a centralized fashion. +This way, users can use OPPORTUNISTIC containers to improve the cluster's utilization, without needing to enable distributed scheduling. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/CHANGES.3.0.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/CHANGELOG.3.0.0.md similarity index 96% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/CHANGES.3.0.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/CHANGELOG.3.0.0.md index a0c59da696b..8365f0e737d 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/CHANGES.3.0.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/CHANGELOG.3.0.0.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 3.0.0 - 2017-12-08 +## Release 3.0.0 - 2017-12-13 ### INCOMPATIBLE CHANGES: @@ -48,6 +48,7 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | +| [MAPREDUCE-6829](https://issues.apache.org/jira/browse/MAPREDUCE-6829) | Add peak memory usage counter for each task | Major | mrv2 | Yufei Gu | Miklos Szegedi | | [YARN-7045](https://issues.apache.org/jira/browse/YARN-7045) | Remove FSLeafQueue#addAppSchedulable | Major | fairscheduler | Yufei Gu | Sen Zhao | | [YARN-7240](https://issues.apache.org/jira/browse/YARN-7240) | Add more states and transitions to stabilize the NM Container state machine | Major | . | Arun Suresh | kartheek muthyala | | [HADOOP-14909](https://issues.apache.org/jira/browse/HADOOP-14909) | Fix the word of "erasure encoding" in the top page | Trivial | documentation | Takanobu Asanuma | Takanobu Asanuma | @@ -59,7 +60,7 @@ | [YARN-7207](https://issues.apache.org/jira/browse/YARN-7207) | Cache the RM proxy server address | Major | RM | Yufei Gu | Yufei Gu | | [HADOOP-14939](https://issues.apache.org/jira/browse/HADOOP-14939) | Update project release notes with HDFS-10467 for 3.0.0 | Major | . | Íñigo Goiri | Íñigo Goiri | | [HDFS-12573](https://issues.apache.org/jira/browse/HDFS-12573) | Divide the total block metrics into replica and ec | Major | erasure-coding, metrics, namenode | Takanobu Asanuma | Takanobu Asanuma | -| [HDFS-12553](https://issues.apache.org/jira/browse/HDFS-12553) | Add nameServiceId to QJournalProtocol | Major | qjm | Bharat Viswanadham | Bharat Viswanadham | +| [HDFS-12553](https://issues.apache.org/jira/browse/HDFS-12553) | Add nameServiceId to QJournalProtocol | Major | journal-node | Bharat Viswanadham | Bharat Viswanadham | | [HDFS-12603](https://issues.apache.org/jira/browse/HDFS-12603) | Enable async edit logging by default | Major | namenode | Andrew Wang | Andrew Wang | | [HDFS-12642](https://issues.apache.org/jira/browse/HDFS-12642) | Log block and datanode details in BlockRecoveryWorker | Major | datanode | Xiao Chen | Xiao Chen | | [HADOOP-14938](https://issues.apache.org/jira/browse/HADOOP-14938) | Configuration.updatingResource map should be initialized lazily | Major | . | Misha Dmitriev | Misha Dmitriev | @@ -69,7 +70,7 @@ | [HDFS-12619](https://issues.apache.org/jira/browse/HDFS-12619) | Do not catch and throw unchecked exceptions if IBRs fail to process | Minor | namenode | Wei-Chiu Chuang | Wei-Chiu Chuang | | [HADOOP-14771](https://issues.apache.org/jira/browse/HADOOP-14771) | hadoop-client does not include hadoop-yarn-client | Critical | common | Haibo Chen | Ajay Kumar | | [YARN-7359](https://issues.apache.org/jira/browse/YARN-7359) | TestAppManager.testQueueSubmitWithNoPermission() should be scheduler agnostic | Minor | . | Haibo Chen | Haibo Chen | -| [HDFS-12448](https://issues.apache.org/jira/browse/HDFS-12448) | Make sure user defined erasure coding policy ID will not overflow | Major | erasure-coding | SammiChen | Huafeng Wang | +| [HDFS-12448](https://issues.apache.org/jira/browse/HDFS-12448) | Make sure user defined erasure coding policy ID will not overflow | Major | erasure-coding | Sammi Chen | Huafeng Wang | | [HADOOP-14944](https://issues.apache.org/jira/browse/HADOOP-14944) | Add JvmMetrics to KMS | Major | kms | Xiao Chen | Xiao Chen | | [YARN-7261](https://issues.apache.org/jira/browse/YARN-7261) | Add debug message for better download latency monitoring | Major | nodemanager | Yufei Gu | Yufei Gu | | [YARN-7357](https://issues.apache.org/jira/browse/YARN-7357) | Several methods in TestZKRMStateStore.TestZKRMStateStoreTester.TestZKRMStateStoreInternal should have @Override annotations | Trivial | resourcemanager | Daniel Templeton | Sen Zhao | @@ -82,7 +83,6 @@ | [YARN-7262](https://issues.apache.org/jira/browse/YARN-7262) | Add a hierarchy into the ZKRMStateStore for delegation token znodes to prevent jute buffer overflow | Major | . | Robert Kanter | Robert Kanter | | [YARN-7397](https://issues.apache.org/jira/browse/YARN-7397) | Reduce lock contention in FairScheduler#getAppWeight() | Major | fairscheduler | Daniel Templeton | Daniel Templeton | | [HADOOP-14992](https://issues.apache.org/jira/browse/HADOOP-14992) | Upgrade Avro patch version | Major | build | Chris Douglas | Bharat Viswanadham | -| [YARN-5326](https://issues.apache.org/jira/browse/YARN-5326) | Support for recurring reservations in the YARN ReservationSystem | Major | resourcemanager | Subru Krishnan | Carlo Curino | | [YARN-6413](https://issues.apache.org/jira/browse/YARN-6413) | FileSystem based Yarn Registry implementation | Major | amrmproxy, api, resourcemanager | Ellen Hui | Ellen Hui | | [HDFS-12482](https://issues.apache.org/jira/browse/HDFS-12482) | Provide a configuration to adjust the weight of EC recovery tasks to adjust the speed of recovery | Minor | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | | [HDFS-12744](https://issues.apache.org/jira/browse/HDFS-12744) | More logs when short-circuit read is failed and disabled | Major | datanode | Weiwei Yang | Weiwei Yang | @@ -98,6 +98,8 @@ | [HADOOP-14876](https://issues.apache.org/jira/browse/HADOOP-14876) | Create downstream developer docs from the compatibility guidelines | Critical | documentation | Daniel Templeton | Daniel Templeton | | [HADOOP-14112](https://issues.apache.org/jira/browse/HADOOP-14112) | Über-jira adl:// Azure Data Lake Phase I: Stabilization | Major | fs/adl | Steve Loughran | John Zhuge | | [HADOOP-15104](https://issues.apache.org/jira/browse/HADOOP-15104) | AliyunOSS: change the default value of max error retry | Major | fs/oss | wujinhu | wujinhu | +| [HADOOP-13204](https://issues.apache.org/jira/browse/HADOOP-13204) | Über-jira: S3a phase III: scale and tuning | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-14552](https://issues.apache.org/jira/browse/HADOOP-14552) | Über-jira: WASB client phase II: performance and testing | Major | fs/azure | Steve Loughran | Thomas Marquardt | ### BUG FIXES: @@ -149,7 +151,7 @@ | [HDFS-12612](https://issues.apache.org/jira/browse/HDFS-12612) | DFSStripedOutputStream#close will throw if called a second time with a failed streamer | Major | erasure-coding | Andrew Wang | Lei (Eddy) Xu | | [HADOOP-14958](https://issues.apache.org/jira/browse/HADOOP-14958) | CLONE - Fix source-level compatibility after HADOOP-11252 | Blocker | . | Junping Du | Junping Du | | [YARN-7294](https://issues.apache.org/jira/browse/YARN-7294) | TestSignalContainer#testSignalRequestDeliveryToNM fails intermittently with Fair scheduler | Major | . | Miklos Szegedi | Miklos Szegedi | -| [YARN-7170](https://issues.apache.org/jira/browse/YARN-7170) | Improve bower dependencies for YARN UI v2 | Critical | webapp | Sunil G | Sunil G | +| [YARN-7170](https://issues.apache.org/jira/browse/YARN-7170) | Improve bower dependencies for YARN UI v2 | Critical | webapp | Sunil Govindan | Sunil Govindan | | [YARN-7355](https://issues.apache.org/jira/browse/YARN-7355) | TestDistributedShell should be scheduler agnostic | Major | . | Haibo Chen | Haibo Chen | | [HDFS-12497](https://issues.apache.org/jira/browse/HDFS-12497) | Re-enable TestDFSStripedOutputStreamWithFailure tests | Major | erasure-coding | Andrew Wang | Huafeng Wang | | [HADOOP-14942](https://issues.apache.org/jira/browse/HADOOP-14942) | DistCp#cleanup() should check whether jobFS is null | Minor | . | Ted Yu | Andras Bokor | @@ -170,7 +172,7 @@ | [HADOOP-14986](https://issues.apache.org/jira/browse/HADOOP-14986) | Enforce JDK limitations | Major | build | Chris Douglas | Chris Douglas | | [HADOOP-14991](https://issues.apache.org/jira/browse/HADOOP-14991) | Add missing figures to Resource Estimator tool | Major | . | Subru Krishnan | Rui Li | | [YARN-7299](https://issues.apache.org/jira/browse/YARN-7299) | Fix TestDistributedScheduler | Major | . | Jason Lowe | Arun Suresh | -| [YARN-6747](https://issues.apache.org/jira/browse/YARN-6747) | TestFSAppStarvation.testPreemptionEnable fails intermittently | Major | . | Sunil G | Miklos Szegedi | +| [YARN-6747](https://issues.apache.org/jira/browse/YARN-6747) | TestFSAppStarvation.testPreemptionEnable fails intermittently | Major | . | Sunil Govindan | Miklos Szegedi | | [YARN-7336](https://issues.apache.org/jira/browse/YARN-7336) | Unsafe cast from long to int Resource.hashCode() method | Critical | resourcemanager | Daniel Templeton | Miklos Szegedi | | [YARN-7244](https://issues.apache.org/jira/browse/YARN-7244) | ShuffleHandler is not aware of disks that are added | Major | . | Kuhu Shukla | Kuhu Shukla | | [HADOOP-14990](https://issues.apache.org/jira/browse/HADOOP-14990) | Clean up jdiff xml files added for 2.8.2 release | Blocker | . | Subru Krishnan | Junping Du | @@ -180,7 +182,7 @@ | [HDFS-12219](https://issues.apache.org/jira/browse/HDFS-12219) | Javadoc for FSNamesystem#getMaxObjects is incorrect | Trivial | . | Erik Krogen | Erik Krogen | | [YARN-7412](https://issues.apache.org/jira/browse/YARN-7412) | test\_docker\_util.test\_check\_mount\_permitted() is failing | Critical | nodemanager | Haibo Chen | Eric Badger | | [MAPREDUCE-6999](https://issues.apache.org/jira/browse/MAPREDUCE-6999) | Fix typo "onf" in DynamicInputChunk.java | Trivial | . | fang zhenyi | fang zhenyi | -| [YARN-7364](https://issues.apache.org/jira/browse/YARN-7364) | Queue dash board in new YARN UI has incorrect values | Critical | webapp | Sunil G | Sunil G | +| [YARN-7364](https://issues.apache.org/jira/browse/YARN-7364) | Queue dash board in new YARN UI has incorrect values | Critical | webapp | Sunil Govindan | Sunil Govindan | | [YARN-7370](https://issues.apache.org/jira/browse/YARN-7370) | Preemption properties should be refreshable | Major | capacity scheduler, scheduler preemption | Eric Payne | Gergely Novák | | [YARN-7400](https://issues.apache.org/jira/browse/YARN-7400) | incorrect log preview displayed in jobhistory server ui | Major | yarn | Santhosh B Gowda | Xuan Gong | | [HADOOP-15013](https://issues.apache.org/jira/browse/HADOOP-15013) | Fix ResourceEstimator findbugs issues | Blocker | . | Allen Wittenauer | Arun Suresh | @@ -189,7 +191,7 @@ | [HDFS-12725](https://issues.apache.org/jira/browse/HDFS-12725) | BlockPlacementPolicyRackFaultTolerant fails with very uneven racks | Major | erasure-coding | Xiao Chen | Xiao Chen | | [YARN-5085](https://issues.apache.org/jira/browse/YARN-5085) | Add support for change of container ExecutionType | Major | . | Arun Suresh | Arun Suresh | | [YARN-7428](https://issues.apache.org/jira/browse/YARN-7428) | Add containerId to Localizer failed logs | Minor | nodemanager | Prabhu Joseph | Prabhu Joseph | -| [YARN-7410](https://issues.apache.org/jira/browse/YARN-7410) | Cleanup FixedValueResource to avoid dependency to ResourceUtils | Major | resourcemanager | Sunil G | Wangda Tan | +| [YARN-7410](https://issues.apache.org/jira/browse/YARN-7410) | Cleanup FixedValueResource to avoid dependency to ResourceUtils | Major | resourcemanager | Sunil Govindan | Wangda Tan | | [YARN-7360](https://issues.apache.org/jira/browse/YARN-7360) | TestRM.testNMTokenSentForNormalContainer() should be scheduler agnostic | Major | test | Haibo Chen | Haibo Chen | | [HADOOP-15018](https://issues.apache.org/jira/browse/HADOOP-15018) | Update JAVA\_HOME in create-release for Xenial Dockerfile | Blocker | build | Andrew Wang | Andrew Wang | | [HDFS-12788](https://issues.apache.org/jira/browse/HDFS-12788) | Reset the upload button when file upload fails | Critical | ui, webhdfs | Brahma Reddy Battula | Brahma Reddy Battula | @@ -201,7 +203,7 @@ | [YARN-7388](https://issues.apache.org/jira/browse/YARN-7388) | TestAMRestart should be scheduler agnostic | Major | . | Haibo Chen | Haibo Chen | | [YARN-7465](https://issues.apache.org/jira/browse/YARN-7465) | start-yarn.sh fails to start ResourceManager unless running as root | Blocker | . | Sean Mackrory | | | [HADOOP-8522](https://issues.apache.org/jira/browse/HADOOP-8522) | ResetableGzipOutputStream creates invalid gzip files when finish() and resetState() are used | Major | io | Mike Percy | Mike Percy | -| [YARN-7475](https://issues.apache.org/jira/browse/YARN-7475) | Fix Container log link in new YARN UI | Major | . | Sunil G | Sunil G | +| [YARN-7475](https://issues.apache.org/jira/browse/YARN-7475) | Fix Container log link in new YARN UI | Major | . | Sunil Govindan | Sunil Govindan | | [HADOOP-15036](https://issues.apache.org/jira/browse/HADOOP-15036) | Update LICENSE.txt for HADOOP-14840 | Major | . | Arun Suresh | Arun Suresh | | [YARN-6078](https://issues.apache.org/jira/browse/YARN-6078) | Containers stuck in Localizing state | Major | . | Jagadish | Billie Rinaldi | | [YARN-7469](https://issues.apache.org/jira/browse/YARN-7469) | Capacity Scheduler Intra-queue preemption: User can starve if newest app is exactly at user limit | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | @@ -213,14 +215,15 @@ | [YARN-7509](https://issues.apache.org/jira/browse/YARN-7509) | AsyncScheduleThread and ResourceCommitterService are still running after RM is transitioned to standby | Critical | . | Tao Yang | Tao Yang | | [YARN-7589](https://issues.apache.org/jira/browse/YARN-7589) | TestPBImplRecords fails with NullPointerException | Major | . | Jason Lowe | Daniel Templeton | | [HADOOP-15058](https://issues.apache.org/jira/browse/HADOOP-15058) | create-release site build outputs dummy shaded jars due to skipShade | Blocker | . | Andrew Wang | Andrew Wang | -| [YARN-7381](https://issues.apache.org/jira/browse/YARN-7381) | Enable the configuration: yarn.nodemanager.log-container-debug-info.enabled by default in yarn-default.xml | Critical | . | Xuan Gong | Xuan Gong | | [HDFS-12889](https://issues.apache.org/jira/browse/HDFS-12889) | Router UI is missing robots.txt file | Major | . | Bharat Viswanadham | Bharat Viswanadham | | [HDFS-12872](https://issues.apache.org/jira/browse/HDFS-12872) | EC Checksum broken when BlockAccessToken is enabled | Critical | erasure-coding | Xiao Chen | Xiao Chen | | [HDFS-11576](https://issues.apache.org/jira/browse/HDFS-11576) | Block recovery will fail indefinitely if recovery time \> heartbeat interval | Critical | datanode, hdfs, namenode | Lukas Majercak | Lukas Majercak | | [YARN-7607](https://issues.apache.org/jira/browse/YARN-7607) | Remove the trailing duplicated timestamp in container diagnostics message | Minor | nodemanager | Weiwei Yang | Weiwei Yang | | [HDFS-12840](https://issues.apache.org/jira/browse/HDFS-12840) | Creating a file with non-default EC policy in a EC zone is not correctly serialized in the editlog | Blocker | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | -| [HADOOP-15080](https://issues.apache.org/jira/browse/HADOOP-15080) | Aliyun OSS: update oss sdk from 2.8.1 to 2.8.3 to remove its dependency on Cat-x "json-lib" | Blocker | fs/oss | Chris Douglas | SammiChen | +| [HADOOP-15080](https://issues.apache.org/jira/browse/HADOOP-15080) | Aliyun OSS: update oss sdk from 2.8.1 to 2.8.3 to remove its dependency on Cat-x "json-lib" | Blocker | fs/oss | Chris Douglas | Sammi Chen | | [HADOOP-15059](https://issues.apache.org/jira/browse/HADOOP-15059) | 3.0 deployment cannot work with old version MR tar ball which breaks rolling upgrade | Blocker | security | Junping Du | Jason Lowe | +| [YARN-7591](https://issues.apache.org/jira/browse/YARN-7591) | NPE in async-scheduling mode of CapacityScheduler | Critical | capacityscheduler | Tao Yang | Tao Yang | +| [YARN-7381](https://issues.apache.org/jira/browse/YARN-7381) | Enable the configuration: yarn.nodemanager.log-container-debug-info.enabled by default in yarn-default.xml | Critical | . | Xuan Gong | Xuan Gong | ### SUB-TASKS: @@ -254,23 +257,23 @@ | [YARN-5947](https://issues.apache.org/jira/browse/YARN-5947) | Create LeveldbConfigurationStore class using Leveldb as backing store | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-6322](https://issues.apache.org/jira/browse/YARN-6322) | Disable queue refresh when configuration mutation is enabled | Major | . | Jonathan Hung | Jonathan Hung | | [HDFS-11826](https://issues.apache.org/jira/browse/HDFS-11826) | Federation Namenode Heartbeat | Major | fs | Íñigo Goiri | Íñigo Goiri | -| [YARN-6788](https://issues.apache.org/jira/browse/YARN-6788) | Improve performance of resource profile branch | Blocker | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-6788](https://issues.apache.org/jira/browse/YARN-6788) | Improve performance of resource profile branch | Blocker | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-10880](https://issues.apache.org/jira/browse/HDFS-10880) | Federation Mount Table State Store internal API | Major | fs | Jason Kace | Íñigo Goiri | | [HDFS-10646](https://issues.apache.org/jira/browse/HDFS-10646) | Federation admin tool | Major | fs | Íñigo Goiri | Íñigo Goiri | | [YARN-6994](https://issues.apache.org/jira/browse/YARN-6994) | Remove last uses of Long from resource types code | Minor | resourcemanager | Daniel Templeton | Daniel Templeton | -| [YARN-6892](https://issues.apache.org/jira/browse/YARN-6892) | Improve API implementation in Resources and DominantResourceCalculator class | Major | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-6892](https://issues.apache.org/jira/browse/YARN-6892) | Improve API implementation in Resources and DominantResourceCalculator class | Major | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-11554](https://issues.apache.org/jira/browse/HDFS-11554) | [Documentation] Router-based federation documentation | Minor | fs | Íñigo Goiri | Íñigo Goiri | | [HDFS-12312](https://issues.apache.org/jira/browse/HDFS-12312) | Rebasing HDFS-10467 (2) | Major | . | Íñigo Goiri | Íñigo Goiri | | [YARN-6610](https://issues.apache.org/jira/browse/YARN-6610) | DominantResourceCalculator#getResourceAsValue dominant param is updated to handle multiple resources | Critical | resourcemanager | Daniel Templeton | Daniel Templeton | | [YARN-7030](https://issues.apache.org/jira/browse/YARN-7030) | Performance optimizations in Resource and ResourceUtils class | Critical | nodemanager, resourcemanager | Wangda Tan | Wangda Tan | | [YARN-7042](https://issues.apache.org/jira/browse/YARN-7042) | Clean up unit tests after YARN-6610 | Major | test | Daniel Templeton | Daniel Templeton | -| [YARN-6789](https://issues.apache.org/jira/browse/YARN-6789) | Add Client API to get all supported resource types from RM | Major | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-6789](https://issues.apache.org/jira/browse/YARN-6789) | Add Client API to get all supported resource types from RM | Major | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-6781](https://issues.apache.org/jira/browse/YARN-6781) | ResourceUtils#initializeResourcesMap takes an unnecessary Map parameter | Minor | resourcemanager | Daniel Templeton | Yu-Tang Lin | | [HDFS-10631](https://issues.apache.org/jira/browse/HDFS-10631) | Federation State Store ZooKeeper implementation | Major | fs | Íñigo Goiri | Jason Kace | | [YARN-7067](https://issues.apache.org/jira/browse/YARN-7067) | Optimize ResourceType information display in UI | Critical | nodemanager, resourcemanager | Wangda Tan | Wangda Tan | -| [YARN-7039](https://issues.apache.org/jira/browse/YARN-7039) | Fix javac and javadoc errors in YARN-3926 branch | Major | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-7039](https://issues.apache.org/jira/browse/YARN-7039) | Fix javac and javadoc errors in YARN-3926 branch | Major | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-7024](https://issues.apache.org/jira/browse/YARN-7024) | Fix issues on recovery in LevelDB store | Major | . | Jonathan Hung | Jonathan Hung | -| [YARN-7093](https://issues.apache.org/jira/browse/YARN-7093) | Improve log message in ResourceUtils | Trivial | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-7093](https://issues.apache.org/jira/browse/YARN-7093) | Improve log message in ResourceUtils | Trivial | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-6933](https://issues.apache.org/jira/browse/YARN-6933) | ResourceUtils.DISALLOWED\_NAMES check is duplicated | Major | resourcemanager | Daniel Templeton | Manikandan R | | [YARN-5328](https://issues.apache.org/jira/browse/YARN-5328) | Plan/ResourceAllocation data structure enhancements required to support recurring reservations in ReservationSystem | Major | resourcemanager | Subru Krishnan | Subru Krishnan | | [HDFS-12384](https://issues.apache.org/jira/browse/HDFS-12384) | Fixing compilation issue with BanDuplicateClasses | Major | fs | Íñigo Goiri | Íñigo Goiri | @@ -308,17 +311,17 @@ | [HDFS-12541](https://issues.apache.org/jira/browse/HDFS-12541) | Extend TestSafeModeWithStripedFile with a random EC policy | Major | erasure-coding, test | Takanobu Asanuma | Takanobu Asanuma | | [YARN-7304](https://issues.apache.org/jira/browse/YARN-7304) | Merge YARN-5734 branch to branch-3.0 | Major | . | Xuan Gong | Xuan Gong | | [HDFS-12547](https://issues.apache.org/jira/browse/HDFS-12547) | Extend TestQuotaWithStripedBlocks with a random EC policy | Major | erasure-coding, test | Takanobu Asanuma | Takanobu Asanuma | -| [YARN-7205](https://issues.apache.org/jira/browse/YARN-7205) | Log improvements for the ResourceUtils | Major | nodemanager, resourcemanager | Jian He | Sunil G | +| [YARN-7205](https://issues.apache.org/jira/browse/YARN-7205) | Log improvements for the ResourceUtils | Major | nodemanager, resourcemanager | Jian He | Sunil Govindan | | [HDFS-12637](https://issues.apache.org/jira/browse/HDFS-12637) | Extend TestDistributedFileSystemWithECFile with a random EC policy | Major | erasure-coding, test | Takanobu Asanuma | Takanobu Asanuma | | [YARN-7275](https://issues.apache.org/jira/browse/YARN-7275) | NM Statestore cleanup for Container updates | Blocker | . | Arun Suresh | kartheek muthyala | | [YARN-7311](https://issues.apache.org/jira/browse/YARN-7311) | Fix TestRMWebServicesReservation parametrization for fair scheduler | Blocker | fairscheduler, reservation system | Yufei Gu | Yufei Gu | | [YARN-6546](https://issues.apache.org/jira/browse/YARN-6546) | SLS is slow while loading 10k queues | Major | scheduler-load-simulator | Yufei Gu | Yufei Gu | | [YARN-7345](https://issues.apache.org/jira/browse/YARN-7345) | GPU Isolation: Incorrect minor device numbers written to devices.deny file | Major | . | Jonathan Hung | Jonathan Hung | -| [YARN-7338](https://issues.apache.org/jira/browse/YARN-7338) | Support same origin policy for cross site scripting prevention. | Major | yarn-ui-v2 | Vrushali C | Sunil G | +| [YARN-7338](https://issues.apache.org/jira/browse/YARN-7338) | Support same origin policy for cross site scripting prevention. | Major | yarn-ui-v2 | Vrushali C | Sunil Govindan | | [HDFS-12620](https://issues.apache.org/jira/browse/HDFS-12620) | Backporting HDFS-10467 to branch-2 | Major | . | Íñigo Goiri | Íñigo Goiri | | [YARN-4090](https://issues.apache.org/jira/browse/YARN-4090) | Make Collections.sort() more efficient by caching resource usage | Major | fairscheduler | Xianyin Xin | Yufei Gu | | [YARN-7353](https://issues.apache.org/jira/browse/YARN-7353) | Docker permitted volumes don't properly check for directories | Major | yarn | Eric Badger | Eric Badger | -| [YARN-6984](https://issues.apache.org/jira/browse/YARN-6984) | DominantResourceCalculator.isAnyMajorResourceZero() should test all resources | Major | scheduler | Daniel Templeton | Sunil G | +| [YARN-6984](https://issues.apache.org/jira/browse/YARN-6984) | DominantResourceCalculator.isAnyMajorResourceZero() should test all resources | Major | scheduler | Daniel Templeton | Sunil Govindan | | [YARN-3661](https://issues.apache.org/jira/browse/YARN-3661) | Basic Federation UI | Major | nodemanager, resourcemanager | Giovanni Matteo Fumarola | Íñigo Goiri | | [YARN-4827](https://issues.apache.org/jira/browse/YARN-4827) | Document configuration of ReservationSystem for FairScheduler | Blocker | capacity scheduler | Subru Krishnan | Yufei Gu | | [YARN-5516](https://issues.apache.org/jira/browse/YARN-5516) | Add REST API for supporting recurring reservations | Major | resourcemanager | Sangeetha Abdu Jyothi | Sean Po | @@ -333,19 +336,19 @@ | [YARN-7289](https://issues.apache.org/jira/browse/YARN-7289) | Application lifetime does not work with FairScheduler | Major | resourcemanager | Miklos Szegedi | Miklos Szegedi | | [YARN-7286](https://issues.apache.org/jira/browse/YARN-7286) | Add support for docker to have no capabilities | Major | yarn | Eric Badger | Eric Badger | | [HDFS-11467](https://issues.apache.org/jira/browse/HDFS-11467) | Support ErasureCoding section in OIV XML/ReverseXML | Blocker | tools | Wei-Chiu Chuang | Huafeng Wang | -| [YARN-7307](https://issues.apache.org/jira/browse/YARN-7307) | Allow client/AM update supported resource types via YARN APIs | Blocker | nodemanager, resourcemanager | Wangda Tan | Sunil G | +| [YARN-7307](https://issues.apache.org/jira/browse/YARN-7307) | Allow client/AM update supported resource types via YARN APIs | Blocker | nodemanager, resourcemanager | Wangda Tan | Sunil Govindan | | [MAPREDUCE-6997](https://issues.apache.org/jira/browse/MAPREDUCE-6997) | Moving logging APIs over to slf4j in hadoop-mapreduce-client-hs | Major | . | Akira Ajisaka | Gergely Novák | | [MAPREDUCE-7001](https://issues.apache.org/jira/browse/MAPREDUCE-7001) | Moving logging APIs over to slf4j in hadoop-mapreduce-client-shuffle | Trivial | . | Jinjiang Ling | Jinjiang Ling | | [YARN-7166](https://issues.apache.org/jira/browse/YARN-7166) | Container REST endpoints should report resource types | Major | resourcemanager | Daniel Templeton | Daniel Templeton | | [YARN-7143](https://issues.apache.org/jira/browse/YARN-7143) | FileNotFound handling in ResourceUtils is inconsistent | Major | resourcemanager | Daniel Templeton | Daniel Templeton | | [YARN-7437](https://issues.apache.org/jira/browse/YARN-7437) | Rename PlacementSet and SchedulingPlacementSet | Major | . | Wangda Tan | Wangda Tan | -| [YARN-6909](https://issues.apache.org/jira/browse/YARN-6909) | Use LightWeightedResource when number of resource types more than two | Critical | resourcemanager | Daniel Templeton | Sunil G | +| [YARN-6909](https://issues.apache.org/jira/browse/YARN-6909) | Use LightWeightedResource when number of resource types more than two | Critical | resourcemanager | Daniel Templeton | Sunil Govindan | | [YARN-7406](https://issues.apache.org/jira/browse/YARN-7406) | Moving logging APIs over to slf4j in hadoop-yarn-api | Major | . | Yeliang Cang | Yeliang Cang | | [YARN-7442](https://issues.apache.org/jira/browse/YARN-7442) | [YARN-7069] Limit format of resource type name | Blocker | nodemanager, resourcemanager | Wangda Tan | Wangda Tan | | [YARN-7369](https://issues.apache.org/jira/browse/YARN-7369) | Improve the resource types docs | Major | docs | Daniel Templeton | Daniel Templeton | | [HADOOP-14993](https://issues.apache.org/jira/browse/HADOOP-14993) | AliyunOSS: Override listFiles and listLocatedStatus | Major | fs/oss | Genmao Yu | Genmao Yu | | [YARN-7430](https://issues.apache.org/jira/browse/YARN-7430) | Enable user re-mapping for Docker containers by default | Blocker | security, yarn | Eric Yang | Eric Yang | -| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server | Major | fs, fs/oss | SammiChen | SammiChen | +| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server | Major | fs, fs/oss | Sammi Chen | Sammi Chen | | [YARN-7541](https://issues.apache.org/jira/browse/YARN-7541) | Node updates don't update the maximum cluster capability for resources other than CPU and memory | Critical | resourcemanager | Daniel Templeton | Daniel Templeton | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/RELEASENOTES.3.0.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/RELEASENOTES.3.0.0.md index 2b77fbb71c9..4b12d12a64c 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/RELEASENOTES.3.0.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.0/RELEASENOTES.3.0.0.md @@ -94,13 +94,6 @@ ReconfigurationTaskStatus' API scope is reduced to LimitedPrivate, and its depen The first version of Resource Estimator service, a tool that captures the historical resource usage of an app and predicts its future resource requirement. ---- - -* [YARN-5326](https://issues.apache.org/jira/browse/YARN-5326) | *Major* | **Support for recurring reservations in the YARN ReservationSystem** - -Add native support for recurring reservations (good till cancelled) to enable periodic allocations of the same resources. - - --- * [MAPREDUCE-6983](https://issues.apache.org/jira/browse/MAPREDUCE-6983) | *Major* | **Moving logging APIs over to slf4j in hadoop-mapreduce-client-core** diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.1/CHANGES.3.0.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.1/CHANGELOG.3.0.1.md similarity index 92% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.1/CHANGES.3.0.1.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.1/CHANGELOG.3.0.1.md index d24a8f4c3e9..4beccf4a861 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.1/CHANGES.3.0.1.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.1/CHANGELOG.3.0.1.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 3.0.1 - 2018-03-16 +## Release 3.0.1 - 2018-03-25 ### INCOMPATIBLE CHANGES: @@ -65,7 +65,6 @@ | [HADOOP-15189](https://issues.apache.org/jira/browse/HADOOP-15189) | backport HADOOP-15039 to branch-2 and branch-3 | Blocker | . | Genmao Yu | Genmao Yu | | [HADOOP-15186](https://issues.apache.org/jira/browse/HADOOP-15186) | Allow Azure Data Lake SDK dependency version to be set on the command line | Major | build, fs/adl | Vishwajeet Dusane | Vishwajeet Dusane | | [HDFS-13092](https://issues.apache.org/jira/browse/HDFS-13092) | Reduce verbosity for ThrottledAsyncChecker.java:schedule | Minor | datanode | Mukul Kumar Singh | Mukul Kumar Singh | -| [HDFS-13062](https://issues.apache.org/jira/browse/HDFS-13062) | Provide support for JN to use separate journal disk per namespace | Major | . | Bharat Viswanadham | Bharat Viswanadham | | [HADOOP-15212](https://issues.apache.org/jira/browse/HADOOP-15212) | Add independent secret manager method for logging expired tokens | Major | security | Daryn Sharp | Daryn Sharp | | [YARN-7728](https://issues.apache.org/jira/browse/YARN-7728) | Expose container preemptions related information in Capacity Scheduler queue metrics | Major | . | Eric Payne | Eric Payne | | [MAPREDUCE-7048](https://issues.apache.org/jira/browse/MAPREDUCE-7048) | Uber AM can crash due to unknown task in statusUpdate | Major | mr-am | Peter Bacsko | Peter Bacsko | @@ -110,23 +109,23 @@ | [YARN-7662](https://issues.apache.org/jira/browse/YARN-7662) | [Atsv2] Define new set of configurations for reader and collectors to bind. | Major | . | Rohith Sharma K S | Rohith Sharma K S | | [HDFS-12845](https://issues.apache.org/jira/browse/HDFS-12845) | JournalNode Test failures | Major | journal-node | Bharat Viswanadham | Bharat Viswanadham | | [YARN-7466](https://issues.apache.org/jira/browse/YARN-7466) | ResourceRequest has a different default for allocationRequestId than Container | Major | . | Chandni Singh | Chandni Singh | -| [YARN-7674](https://issues.apache.org/jira/browse/YARN-7674) | Update Timeline Reader web app address in UI2 | Major | . | Rohith Sharma K S | Sunil G | +| [YARN-7674](https://issues.apache.org/jira/browse/YARN-7674) | Update Timeline Reader web app address in UI2 | Major | . | Rohith Sharma K S | Sunil Govindan | | [HDFS-12938](https://issues.apache.org/jira/browse/HDFS-12938) | TestErasureCodigCLI testAll failing consistently. | Major | erasure-coding, hdfs | Rushabh S Shah | Ajay Kumar | | [YARN-7542](https://issues.apache.org/jira/browse/YARN-7542) | Fix issue that causes some Running Opportunistic Containers to be recovered as PAUSED | Major | . | Arun Suresh | Sampada Dehankar | | [HDFS-12915](https://issues.apache.org/jira/browse/HDFS-12915) | Fix findbugs warning in INodeFile$HeaderFormat.getBlockLayoutRedundancy | Major | namenode | Wei-Chiu Chuang | Chris Douglas | | [HADOOP-15122](https://issues.apache.org/jira/browse/HADOOP-15122) | Lock down version of doxia-module-markdown plugin | Blocker | . | Elek, Marton | Elek, Marton | | [HADOOP-15143](https://issues.apache.org/jira/browse/HADOOP-15143) | NPE due to Invalid KerberosTicket in UGI | Major | . | Jitendra Nath Pandey | Mukul Kumar Singh | -| [YARN-7692](https://issues.apache.org/jira/browse/YARN-7692) | Skip validating priority acls while recovering applications | Blocker | resourcemanager | Charan Hebri | Sunil G | +| [YARN-7692](https://issues.apache.org/jira/browse/YARN-7692) | Skip validating priority acls while recovering applications | Blocker | resourcemanager | Charan Hebri | Sunil Govindan | | [MAPREDUCE-7028](https://issues.apache.org/jira/browse/MAPREDUCE-7028) | Concurrent task progress updates causing NPE in Application Master | Blocker | mr-am | Gergo Repas | Gergo Repas | | [YARN-7602](https://issues.apache.org/jira/browse/YARN-7602) | NM should reference the singleton JvmMetrics instance | Major | nodemanager | Haibo Chen | Haibo Chen | | [HDFS-12913](https://issues.apache.org/jira/browse/HDFS-12913) | TestDNFencingWithReplication.testFencingStress fix mini cluster not yet active issue | Major | . | Zsolt Venczel | Zsolt Venczel | | [HDFS-12860](https://issues.apache.org/jira/browse/HDFS-12860) | StripedBlockUtil#getRangesInternalBlocks throws exception for the block group size larger than 2GB | Major | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | | [YARN-7619](https://issues.apache.org/jira/browse/YARN-7619) | Max AM Resource value in Capacity Scheduler UI has to be refreshed for every user | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | -| [YARN-7699](https://issues.apache.org/jira/browse/YARN-7699) | queueUsagePercentage is coming as INF for getApp REST api call | Major | webapp | Sunil G | Sunil G | +| [YARN-7699](https://issues.apache.org/jira/browse/YARN-7699) | queueUsagePercentage is coming as INF for getApp REST api call | Major | webapp | Sunil Govindan | Sunil Govindan | | [HDFS-12985](https://issues.apache.org/jira/browse/HDFS-12985) | NameNode crashes during restart after an OpenForWrite file present in the Snapshot got deleted | Major | hdfs | Manoj Govindassamy | Manoj Govindassamy | | [YARN-7508](https://issues.apache.org/jira/browse/YARN-7508) | NPE in FiCaSchedulerApp when debug log enabled in async-scheduling mode | Major | capacityscheduler | Tao Yang | Tao Yang | -| [YARN-7663](https://issues.apache.org/jira/browse/YARN-7663) | RMAppImpl:Invalid event: START at KILLED | Minor | resourcemanager | lujie | lujie | -| [YARN-6948](https://issues.apache.org/jira/browse/YARN-6948) | Invalid event: ATTEMPT\_ADDED at FINAL\_SAVING | Minor | yarn | lujie | lujie | +| [YARN-7663](https://issues.apache.org/jira/browse/YARN-7663) | RMAppImpl:Invalid event: START at KILLED | Major | resourcemanager | lujie | lujie | +| [YARN-6948](https://issues.apache.org/jira/browse/YARN-6948) | Invalid event: ATTEMPT\_ADDED at FINAL\_SAVING | Major | yarn | lujie | lujie | | [HDFS-12994](https://issues.apache.org/jira/browse/HDFS-12994) | TestReconstructStripedFile.testNNSendsErasureCodingTasks fails due to socket timeout | Major | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | | [YARN-7665](https://issues.apache.org/jira/browse/YARN-7665) | Allow FS scheduler state dump to be turned on/off separately from FS debug log | Major | . | Wilfred Spiegelenburg | Wilfred Spiegelenburg | | [HADOOP-15060](https://issues.apache.org/jira/browse/HADOOP-15060) | TestShellBasedUnixGroupsMapping.testFiniteGroupResolutionTime flaky | Major | . | Miklos Szegedi | Miklos Szegedi | @@ -144,6 +143,7 @@ | [HADOOP-15181](https://issues.apache.org/jira/browse/HADOOP-15181) | Typo in SecureMode.md | Trivial | documentation | Masahiro Tanaka | Masahiro Tanaka | | [YARN-7796](https://issues.apache.org/jira/browse/YARN-7796) | Container-executor fails with segfault on certain OS configurations | Major | nodemanager | Gergo Repas | Gergo Repas | | [YARN-7806](https://issues.apache.org/jira/browse/YARN-7806) | Distributed Shell should use timeline async api's | Major | distributed-shell | Sumana Sathish | Rohith Sharma K S | +| [HADOOP-15121](https://issues.apache.org/jira/browse/HADOOP-15121) | Encounter NullPointerException when using DecayRpcScheduler | Major | . | Tao Jie | Tao Jie | | [MAPREDUCE-7015](https://issues.apache.org/jira/browse/MAPREDUCE-7015) | Possible race condition in JHS if the job is not loaded | Major | jobhistoryserver | Peter Bacsko | Peter Bacsko | | [YARN-7737](https://issues.apache.org/jira/browse/YARN-7737) | prelaunch.err file not found exception on container failure | Major | . | Jonathan Hung | Keqiu Hu | | [HDFS-13063](https://issues.apache.org/jira/browse/HDFS-13063) | Fix the incorrect spelling in HDFSHighAvailabilityWithQJM.md | Trivial | documentation | Jianfei Jiang | Jianfei Jiang | @@ -160,24 +160,16 @@ | [HDFS-12897](https://issues.apache.org/jira/browse/HDFS-12897) | getErasureCodingPolicy should handle .snapshot dir better | Major | erasure-coding, hdfs, snapshots | Harshakiran Reddy | LiXin Ge | | [MAPREDUCE-7033](https://issues.apache.org/jira/browse/MAPREDUCE-7033) | Map outputs implicitly rely on permissive umask for shuffle | Critical | mrv2 | Jason Lowe | Jason Lowe | | [HDFS-12942](https://issues.apache.org/jira/browse/HDFS-12942) | Synchronization issue in FSDataSetImpl#moveBlock | Major | . | Ajay Kumar | Ajay Kumar | -| [HDFS-13048](https://issues.apache.org/jira/browse/HDFS-13048) | LowRedundancyReplicatedBlocks metric can be negative | Major | metrics | Akira Ajisaka | Akira Ajisaka | | [HDFS-13100](https://issues.apache.org/jira/browse/HDFS-13100) | Handle IllegalArgumentException when GETSERVERDEFAULTS is not implemented in webhdfs. | Critical | hdfs, webhdfs | Yongjun Zhang | Yongjun Zhang | | [YARN-7849](https://issues.apache.org/jira/browse/YARN-7849) | TestMiniYarnClusterNodeUtilization#testUpdateNodeUtilization fails due to heartbeat sync error | Major | test | Jason Lowe | Botong Huang | | [YARN-7801](https://issues.apache.org/jira/browse/YARN-7801) | AmFilterInitializer should addFilter after fill all parameters | Critical | . | Sumana Sathish | Wangda Tan | | [YARN-7890](https://issues.apache.org/jira/browse/YARN-7890) | NPE during container relaunch | Major | . | Billie Rinaldi | Jason Lowe | -| [YARN-7873](https://issues.apache.org/jira/browse/YARN-7873) | Revert YARN-6078 | Blocker | . | Billie Rinaldi | Billie Rinaldi | -| [HDFS-13115](https://issues.apache.org/jira/browse/HDFS-13115) | In getNumUnderConstructionBlocks(), ignore the inodeIds for which the inodes have been deleted | Major | . | Yongjun Zhang | Yongjun Zhang | | [HDFS-12935](https://issues.apache.org/jira/browse/HDFS-12935) | Get ambiguous result for DFSAdmin command in HA mode when only one namenode is up | Major | tools | Jianfei Jiang | Jianfei Jiang | | [HDFS-13120](https://issues.apache.org/jira/browse/HDFS-13120) | Snapshot diff could be corrupted after concat | Major | namenode, snapshots | Xiaoyu Yao | Xiaoyu Yao | -| [HDFS-10453](https://issues.apache.org/jira/browse/HDFS-10453) | ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster. | Major | namenode | He Xiaoqiao | He Xiaoqiao | | [HDFS-8693](https://issues.apache.org/jira/browse/HDFS-8693) | refreshNamenodes does not support adding a new standby to a running DN | Critical | datanode, ha | Jian Fang | Ajith S | | [MAPREDUCE-7052](https://issues.apache.org/jira/browse/MAPREDUCE-7052) | TestFixedLengthInputFormat#testFormatCompressedIn is flaky | Major | client, test | Peter Bacsko | Peter Bacsko | | [HDFS-13112](https://issues.apache.org/jira/browse/HDFS-13112) | Token expiration edits may cause log corruption or deadlock | Critical | namenode | Daryn Sharp | Daryn Sharp | -| [YARN-7937](https://issues.apache.org/jira/browse/YARN-7937) | Fix http method name in Cluster Application Timeout Update API example request | Minor | docs, documentation | Charan Hebri | Charan Hebri | -| [HADOOP-10571](https://issues.apache.org/jira/browse/HADOOP-10571) | Use Log.\*(Object, Throwable) overload to log exceptions | Major | . | Arpit Agarwal | Andras Bokor | -| [HDFS-13164](https://issues.apache.org/jira/browse/HDFS-13164) | File not closed if streamer fail with DSQuotaExceededException | Major | hdfs-client | Xiao Chen | Xiao Chen | -| [HDFS-13244](https://issues.apache.org/jira/browse/HDFS-13244) | Add stack, conf, metrics links to utilities dropdown in NN webUI | Major | . | Bharat Viswanadham | Bharat Viswanadham | -| [YARN-8022](https://issues.apache.org/jira/browse/YARN-8022) | ResourceManager UI cluster/app/\ page fails to render | Blocker | webapp | Tarun Parimi | Tarun Parimi | +| [YARN-7873](https://issues.apache.org/jira/browse/YARN-7873) | Revert YARN-6078 | Blocker | . | Billie Rinaldi | Billie Rinaldi | ### TESTS: @@ -185,6 +177,7 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | | [MAPREDUCE-7011](https://issues.apache.org/jira/browse/MAPREDUCE-7011) | TestClientDistributedCacheManager::testDetermineCacheVisibilities assumes all parent dirs set other exec | Trivial | . | Chris Douglas | Chris Douglas | +| [HADOOP-14696](https://issues.apache.org/jira/browse/HADOOP-14696) | parallel tests don't work for Windows | Minor | test | Allen Wittenauer | Allen Wittenauer | ### SUB-TASKS: @@ -194,7 +187,7 @@ | [HADOOP-14993](https://issues.apache.org/jira/browse/HADOOP-14993) | AliyunOSS: Override listFiles and listLocatedStatus | Major | fs/oss | Genmao Yu | Genmao Yu | | [YARN-6953](https://issues.apache.org/jira/browse/YARN-6953) | Clean up ResourceUtils.setMinimumAllocationForMandatoryResources() and setMaximumAllocationForMandatoryResources() | Minor | resourcemanager | Daniel Templeton | Manikandan R | | [HDFS-12801](https://issues.apache.org/jira/browse/HDFS-12801) | RBF: Set MountTableResolver as default file resolver | Minor | . | Íñigo Goiri | Íñigo Goiri | -| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server | Major | fs, fs/oss | SammiChen | SammiChen | +| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server | Major | fs, fs/oss | Sammi Chen | Sammi Chen | | [HDFS-12858](https://issues.apache.org/jira/browse/HDFS-12858) | RBF: Add router admin commands usage in HDFS commands reference doc | Minor | documentation | Yiqun Lin | Yiqun Lin | | [HDFS-12835](https://issues.apache.org/jira/browse/HDFS-12835) | RBF: Fix Javadoc parameter errors | Minor | . | Wei Yan | Wei Yan | | [YARN-6907](https://issues.apache.org/jira/browse/YARN-6907) | Node information page in the old web UI should report resource types | Major | resourcemanager | Daniel Templeton | Gergely Novák | @@ -215,7 +208,7 @@ | [YARN-6736](https://issues.apache.org/jira/browse/YARN-6736) | Consider writing to both ats v1 & v2 from RM for smoother upgrades | Major | timelineserver | Vrushali C | Aaron Gresch | | [HADOOP-15027](https://issues.apache.org/jira/browse/HADOOP-15027) | AliyunOSS: Support multi-thread pre-read to improve sequential read from Hadoop to Aliyun OSS performance | Major | fs/oss | wujinhu | wujinhu | | [HDFS-13028](https://issues.apache.org/jira/browse/HDFS-13028) | RBF: Fix spurious TestRouterRpc#testProxyGetStats | Minor | . | Íñigo Goiri | Íñigo Goiri | -| [YARN-5094](https://issues.apache.org/jira/browse/YARN-5094) | some YARN container events have timestamp of -1 | Critical | timelineserver | Sangjin Lee | Haibo Chen | +| [YARN-5094](https://issues.apache.org/jira/browse/YARN-5094) | some YARN container events have timestamp of -1 | Critical | . | Sangjin Lee | Haibo Chen | | [YARN-7782](https://issues.apache.org/jira/browse/YARN-7782) | Enable user re-mapping for Docker containers in yarn-default.xml | Blocker | security, yarn | Eric Yang | Eric Yang | | [HDFS-12772](https://issues.apache.org/jira/browse/HDFS-12772) | RBF: Federation Router State State Store internal API | Major | . | Íñigo Goiri | Íñigo Goiri | | [HDFS-13042](https://issues.apache.org/jira/browse/HDFS-13042) | RBF: Heartbeat Router State | Major | . | Íñigo Goiri | Íñigo Goiri | @@ -224,9 +217,6 @@ | [HDFS-13044](https://issues.apache.org/jira/browse/HDFS-13044) | RBF: Add a safe mode for the Router | Major | . | Íñigo Goiri | Íñigo Goiri | | [HDFS-13043](https://issues.apache.org/jira/browse/HDFS-13043) | RBF: Expose the state of the Routers in the federation | Major | . | Íñigo Goiri | Íñigo Goiri | | [HDFS-12997](https://issues.apache.org/jira/browse/HDFS-12997) | Move logging to slf4j in BlockPoolSliceStorage and Storage | Major | . | Ajay Kumar | Ajay Kumar | -| [HDFS-13068](https://issues.apache.org/jira/browse/HDFS-13068) | RBF: Add router admin option to manage safe mode | Major | . | Íñigo Goiri | Yiqun Lin | -| [HADOOP-15247](https://issues.apache.org/jira/browse/HADOOP-15247) | Move commons-net up to 3.6 | Minor | fs | Steve Loughran | Steve Loughran | -| [HADOOP-15090](https://issues.apache.org/jira/browse/HADOOP-15090) | Add ADL troubleshooting doc | Major | documentation, fs/adl | Steve Loughran | Steve Loughran | ### OTHER: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.2/CHANGES.3.0.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.2/CHANGELOG.3.0.2.md similarity index 97% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.2/CHANGES.3.0.2.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.2/CHANGELOG.3.0.2.md index 96953ee704f..9880babde0f 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.2/CHANGES.3.0.2.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.2/CHANGELOG.3.0.2.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 3.0.2 - 2018-04-13 +## Release 3.0.2 - 2018-04-21 diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/CHANGES.3.0.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/CHANGELOG.3.0.3.md similarity index 97% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/CHANGES.3.0.3.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/CHANGELOG.3.0.3.md index 48065430f0b..ebaea548fb2 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/CHANGES.3.0.3.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/CHANGELOG.3.0.3.md @@ -24,6 +24,7 @@ | JIRA | Summary | Priority | Component | Reporter | Contributor | |:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-7190](https://issues.apache.org/jira/browse/YARN-7190) | Ensure only NM classpath in 2.x gets TSv2 related hbase jars, not the user classpath | Major | timelineclient, timelinereader, timelineserver | Vrushali C | Varun Saxena | | [HDFS-13099](https://issues.apache.org/jira/browse/HDFS-13099) | RBF: Use the ZooKeeper as the default State Store | Minor | documentation | Yiqun Lin | Yiqun Lin | @@ -53,7 +54,7 @@ | [HDFS-13225](https://issues.apache.org/jira/browse/HDFS-13225) | StripeReader#checkMissingBlocks() 's IOException info is incomplete | Major | erasure-coding, hdfs-client | lufei | lufei | | [HDFS-11394](https://issues.apache.org/jira/browse/HDFS-11394) | Support for getting erasure coding policy through WebHDFS#FileStatus | Major | erasure-coding, namenode | Kai Sasaki | Kai Sasaki | | [HADOOP-15311](https://issues.apache.org/jira/browse/HADOOP-15311) | HttpServer2 needs a way to configure the acceptor/selector count | Major | common | Erik Krogen | Erik Krogen | -| [HDFS-11600](https://issues.apache.org/jira/browse/HDFS-11600) | Refactor TestDFSStripedOutputStreamWithFailure test classes | Minor | erasure-coding, test | Andrew Wang | SammiChen | +| [HDFS-11600](https://issues.apache.org/jira/browse/HDFS-11600) | Refactor TestDFSStripedOutputStreamWithFailure test classes | Minor | erasure-coding, test | Andrew Wang | Sammi Chen | | [HDFS-12884](https://issues.apache.org/jira/browse/HDFS-12884) | BlockUnderConstructionFeature.truncateBlock should be of type BlockInfo | Major | namenode | Konstantin Shvachko | chencan | | [HADOOP-15334](https://issues.apache.org/jira/browse/HADOOP-15334) | Upgrade Maven surefire plugin | Major | build | Arpit Agarwal | Arpit Agarwal | | [HADOOP-15312](https://issues.apache.org/jira/browse/HADOOP-15312) | Undocumented KeyProvider configuration keys | Major | . | Wei-Chiu Chuang | LiXin Ge | @@ -189,6 +190,8 @@ | [HDFS-13618](https://issues.apache.org/jira/browse/HDFS-13618) | Fix TestDataNodeFaultInjector test failures on Windows | Major | test | Xiao Liang | Xiao Liang | | [HADOOP-15473](https://issues.apache.org/jira/browse/HADOOP-15473) | Configure serialFilter in KeyProvider to avoid UnrecoverableKeyException caused by JDK-8189997 | Critical | kms | Gabor Bota | Gabor Bota | | [YARN-8338](https://issues.apache.org/jira/browse/YARN-8338) | TimelineService V1.5 doesn't come up after HADOOP-15406 | Critical | . | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | +| [HADOOP-9747](https://issues.apache.org/jira/browse/HADOOP-9747) | Reduce unnecessary UGI synchronization | Critical | security | Daryn Sharp | Daryn Sharp | +| [YARN-8518](https://issues.apache.org/jira/browse/YARN-8518) | test-container-executor test\_is\_empty() is broken | Major | . | Jim Brennan | Jim Brennan | ### TESTS: @@ -216,7 +219,6 @@ | [HDFS-13619](https://issues.apache.org/jira/browse/HDFS-13619) | TestAuditLoggerWithCommands fails on Windows | Minor | test | Anbang Hu | Anbang Hu | | [HDFS-13620](https://issues.apache.org/jira/browse/HDFS-13620) | Randomize the test directory path for TestHDFSFileSystemContract | Minor | . | Anbang Hu | Anbang Hu | | [HDFS-13591](https://issues.apache.org/jira/browse/HDFS-13591) | TestDFSShell#testSetrepLow fails on Windows | Minor | . | Anbang Hu | Anbang Hu | -| [HDFS-13632](https://issues.apache.org/jira/browse/HDFS-13632) | Randomize baseDir for MiniJournalCluster in MiniQJMHACluster for TestDFSAdminWithHA | Minor | . | Anbang Hu | Anbang Hu | ### SUB-TASKS: @@ -238,7 +240,7 @@ | [HDFS-13233](https://issues.apache.org/jira/browse/HDFS-13233) | RBF: MountTableResolver doesn't return the correct mount point of the given path | Major | hdfs | wangzhiyuan | wangzhiyuan | | [HADOOP-15277](https://issues.apache.org/jira/browse/HADOOP-15277) | remove .FluentPropertyBeanIntrospector from CLI operation log output | Minor | conf | Steve Loughran | Steve Loughran | | [HDFS-13212](https://issues.apache.org/jira/browse/HDFS-13212) | RBF: Fix router location cache issue | Major | federation, hdfs | Weiwei Wu | Weiwei Wu | -| [HDFS-13232](https://issues.apache.org/jira/browse/HDFS-13232) | RBF: ConnectionPool should return first usable connection | Minor | . | Wei Yan | Ekanth S | +| [HDFS-13232](https://issues.apache.org/jira/browse/HDFS-13232) | RBF: ConnectionPool should return first usable connection | Minor | . | Wei Yan | Ekanth Sethuramalingam | | [HDFS-13240](https://issues.apache.org/jira/browse/HDFS-13240) | RBF: Update some inaccurate document descriptions | Minor | . | Yiqun Lin | Yiqun Lin | | [HDFS-11399](https://issues.apache.org/jira/browse/HDFS-11399) | Many tests fails in Windows due to injecting disk failures | Major | . | Yiqun Lin | Yiqun Lin | | [HDFS-13241](https://issues.apache.org/jira/browse/HDFS-13241) | RBF: TestRouterSafemode failed if the port 8888 is in use | Major | hdfs, test | maobaolong | maobaolong | @@ -253,7 +255,7 @@ | [HADOOP-15262](https://issues.apache.org/jira/browse/HADOOP-15262) | AliyunOSS: move files under a directory in parallel when rename a directory | Major | fs/oss | wujinhu | wujinhu | | [HDFS-13215](https://issues.apache.org/jira/browse/HDFS-13215) | RBF: Move Router to its own module | Major | . | Íñigo Goiri | Wei Yan | | [HDFS-13250](https://issues.apache.org/jira/browse/HDFS-13250) | RBF: Router to manage requests across multiple subclusters | Major | . | Íñigo Goiri | Íñigo Goiri | -| [HDFS-13318](https://issues.apache.org/jira/browse/HDFS-13318) | RBF: Fix FindBugs in hadoop-hdfs-rbf | Minor | . | Íñigo Goiri | Ekanth S | +| [HDFS-13318](https://issues.apache.org/jira/browse/HDFS-13318) | RBF: Fix FindBugs in hadoop-hdfs-rbf | Minor | . | Íñigo Goiri | Ekanth Sethuramalingam | | [HDFS-12792](https://issues.apache.org/jira/browse/HDFS-12792) | RBF: Test Router-based federation using HDFSContract | Major | . | Íñigo Goiri | Íñigo Goiri | | [YARN-7986](https://issues.apache.org/jira/browse/YARN-7986) | ATSv2 REST API queries do not return results for uppercase application tags | Critical | . | Charan Hebri | Charan Hebri | | [HDFS-12512](https://issues.apache.org/jira/browse/HDFS-12512) | RBF: Add WebHDFS | Major | fs | Íñigo Goiri | Wei Yan | @@ -286,7 +288,7 @@ | [HDFS-13326](https://issues.apache.org/jira/browse/HDFS-13326) | RBF: Improve the interfaces to modify and view mount tables | Minor | . | Wei Yan | Gang Li | | [HDFS-13499](https://issues.apache.org/jira/browse/HDFS-13499) | RBF: Show disabled name services in the UI | Minor | . | Íñigo Goiri | Íñigo Goiri | | [YARN-8215](https://issues.apache.org/jira/browse/YARN-8215) | ATS v2 returns invalid YARN\_CONTAINER\_ALLOCATED\_HOST\_HTTP\_ADDRESS from NM | Critical | ATSv2 | Yesha Vora | Rohith Sharma K S | -| [HDFS-13508](https://issues.apache.org/jira/browse/HDFS-13508) | RBF: Normalize paths (automatically) when adding, updating, removing or listing mount table entries | Minor | . | Ekanth S | Ekanth S | +| [HDFS-13508](https://issues.apache.org/jira/browse/HDFS-13508) | RBF: Normalize paths (automatically) when adding, updating, removing or listing mount table entries | Minor | . | Ekanth Sethuramalingam | Ekanth Sethuramalingam | | [HDFS-13434](https://issues.apache.org/jira/browse/HDFS-13434) | RBF: Fix dead links in RBF document | Major | documentation | Akira Ajisaka | Chetna Chaudhari | | [YARN-8212](https://issues.apache.org/jira/browse/YARN-8212) | Pending backlog for async allocation threads should be configurable | Major | . | Weiwei Yang | Tao Yang | | [HDFS-13488](https://issues.apache.org/jira/browse/HDFS-13488) | RBF: Reject requests when a Router is overloaded | Major | . | Íñigo Goiri | Íñigo Goiri | @@ -297,6 +299,7 @@ | [YARN-8130](https://issues.apache.org/jira/browse/YARN-8130) | Race condition when container events are published for KILLED applications | Major | ATSv2 | Charan Hebri | Rohith Sharma K S | | [HADOOP-15498](https://issues.apache.org/jira/browse/HADOOP-15498) | TestHadoopArchiveLogs (#testGenerateScript, #testPrepareWorkingDir) fails on Windows | Minor | . | Anbang Hu | Anbang Hu | | [HADOOP-15497](https://issues.apache.org/jira/browse/HADOOP-15497) | TestTrash should use proper test path to avoid failing on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-4781](https://issues.apache.org/jira/browse/YARN-4781) | Support intra-queue preemption for fairness ordering policy. | Major | scheduler | Wangda Tan | Eric Payne | ### OTHER: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/RELEASENOTES.3.0.3.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/RELEASENOTES.3.0.3.md index 9f35dbe15e4..c31e478468a 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/RELEASENOTES.3.0.3.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.3/RELEASENOTES.3.0.3.md @@ -21,6 +21,13 @@ These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. +--- + +* [YARN-7190](https://issues.apache.org/jira/browse/YARN-7190) | *Major* | **Ensure only NM classpath in 2.x gets TSv2 related hbase jars, not the user classpath** + +Ensure only NM classpath in 2.x gets TSv2 related hbase jars, not the user classpath. + + --- * [HDFS-13099](https://issues.apache.org/jira/browse/HDFS-13099) | *Minor* | **RBF: Use the ZooKeeper as the default State Store** diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.4/CHANGELOG.3.0.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.4/CHANGELOG.3.0.4.md new file mode 100644 index 00000000000..c2979bdb148 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.4/CHANGELOG.3.0.4.md @@ -0,0 +1,189 @@ + + +# Apache Hadoop Changelog + +## Release 3.0.4 - Unreleased (as of 2018-09-02) + + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13448](https://issues.apache.org/jira/browse/HDFS-13448) | HDFS Block Placement - Ignore Locality for First Block Replica | Minor | block placement, hdfs-client | BELUGA BEHR | BELUGA BEHR | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15252](https://issues.apache.org/jira/browse/HADOOP-15252) | Checkstyle version is not compatible with IDEA's checkstyle plugin | Major | . | Andras Bokor | Andras Bokor | +| [HDFS-13602](https://issues.apache.org/jira/browse/HDFS-13602) | Add checkOperation(WRITE) checks in FSNamesystem | Major | ha, namenode | Erik Krogen | Chao Sun | +| [HDFS-13155](https://issues.apache.org/jira/browse/HDFS-13155) | BlockPlacementPolicyDefault.chooseTargetInOrder Not Checking Return Value for NULL | Minor | namenode | BELUGA BEHR | Zsolt Venczel | +| [HDFS-13659](https://issues.apache.org/jira/browse/HDFS-13659) | Add more test coverage for contentSummary for snapshottable path | Major | namenode, test | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-15499](https://issues.apache.org/jira/browse/HADOOP-15499) | Performance severe drop when running RawErasureCoderBenchmark with NativeRSRawErasureCoder | Major | . | Sammi Chen | Sammi Chen | +| [HDFS-13653](https://issues.apache.org/jira/browse/HDFS-13653) | Make dfs.client.failover.random.order a per nameservice configuration | Major | federation | Ekanth Sethuramalingam | Ekanth Sethuramalingam | +| [YARN-8394](https://issues.apache.org/jira/browse/YARN-8394) | Improve data locality documentation for Capacity Scheduler | Major | . | Weiwei Yang | Weiwei Yang | +| [HDFS-13641](https://issues.apache.org/jira/browse/HDFS-13641) | Add metrics for edit log tailing | Major | metrics | Chao Sun | Chao Sun | +| [HDFS-13686](https://issues.apache.org/jira/browse/HDFS-13686) | Add overall metrics for FSNamesystemLock | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | +| [HDFS-13692](https://issues.apache.org/jira/browse/HDFS-13692) | StorageInfoDefragmenter floods log when compacting StorageInfo TreeSet | Minor | . | Yiqun Lin | Bharat Viswanadham | +| [HDFS-13703](https://issues.apache.org/jira/browse/HDFS-13703) | Avoid allocation of CorruptedBlocks hashmap when no corrupted blocks are hit | Major | performance | Todd Lipcon | Todd Lipcon | +| [HADOOP-15554](https://issues.apache.org/jira/browse/HADOOP-15554) | Improve JIT performance for Configuration parsing | Minor | conf, performance | Todd Lipcon | Todd Lipcon | +| [HDFS-13714](https://issues.apache.org/jira/browse/HDFS-13714) | Fix TestNameNodePrunesMissingStorages test failures on Windows | Major | hdfs, namenode, test | Lukas Majercak | Lukas Majercak | +| [HDFS-13712](https://issues.apache.org/jira/browse/HDFS-13712) | BlockReaderRemote.read() logging improvement | Minor | hdfs-client | Gergo Repas | Gergo Repas | +| [HDFS-13719](https://issues.apache.org/jira/browse/HDFS-13719) | Docs around dfs.image.transfer.timeout are misleading | Major | . | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15598](https://issues.apache.org/jira/browse/HADOOP-15598) | DataChecksum calculate checksum is contented on hashtable synchronization | Major | common | Prasanth Jayachandran | Prasanth Jayachandran | +| [HADOOP-15609](https://issues.apache.org/jira/browse/HADOOP-15609) | Retry KMS calls when SSLHandshakeException occurs | Major | common, kms | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15612](https://issues.apache.org/jira/browse/HADOOP-15612) | Improve exception when tfile fails to load LzoCodec | Major | . | Gera Shegalov | Gera Shegalov | +| [HDFS-11060](https://issues.apache.org/jira/browse/HDFS-11060) | make DEFAULT\_MAX\_CORRUPT\_FILEBLOCKS\_RETURNED configurable | Minor | hdfs | Lantao Jin | Lantao Jin | +| [HDFS-13727](https://issues.apache.org/jira/browse/HDFS-13727) | Log full stack trace if DiskBalancer exits with an unhandled exception | Minor | diskbalancer | Stephen O'Donnell | Gabor Bota | +| [YARN-8155](https://issues.apache.org/jira/browse/YARN-8155) | Improve ATSv2 client logging in RM and NM publisher | Major | . | Rohith Sharma K S | Abhishek Modi | +| [HDFS-13728](https://issues.apache.org/jira/browse/HDFS-13728) | Disk Balancer should not fail if volume usage is greater than capacity | Minor | diskbalancer | Stephen O'Donnell | Stephen O'Donnell | +| [YARN-8568](https://issues.apache.org/jira/browse/YARN-8568) | Replace the deprecated zk-address property in the HA config example in ResourceManagerHA.md | Minor | yarn | Antal Bálint Steinbach | Antal Bálint Steinbach | +| [HDFS-13814](https://issues.apache.org/jira/browse/HDFS-13814) | Remove super user privilege requirement for NameNode.getServiceStatus | Minor | namenode | Chao Sun | Chao Sun | +| [YARN-8559](https://issues.apache.org/jira/browse/YARN-8559) | Expose mutable-conf scheduler's configuration in RM /scheduler-conf endpoint | Major | resourcemanager | Anna Savarin | Weiwei Yang | +| [HDFS-13813](https://issues.apache.org/jira/browse/HDFS-13813) | Exit NameNode if dangling child inode is detected when saving FsImage | Major | hdfs, namenode | Siyao Meng | Siyao Meng | +| [HDFS-13821](https://issues.apache.org/jira/browse/HDFS-13821) | RBF: Add dfs.federation.router.mount-table.cache.enable so that users can disable cache | Major | hdfs | Fei Hui | Fei Hui | +| [HDFS-13831](https://issues.apache.org/jira/browse/HDFS-13831) | Make block increment deletion number configurable | Major | . | Yiqun Lin | Ryan Wu | +| [YARN-8051](https://issues.apache.org/jira/browse/YARN-8051) | TestRMEmbeddedElector#testCallbackSynchronization is flakey | Major | test | Robert Kanter | Robert Kanter | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-12857](https://issues.apache.org/jira/browse/HDFS-12857) | StoragePolicyAdmin should support schema based path | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [YARN-7835](https://issues.apache.org/jira/browse/YARN-7835) | [Atsv2] Race condition in NM while publishing events if second attempt is launched on the same node | Critical | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-7773](https://issues.apache.org/jira/browse/YARN-7773) | YARN Federation used Mysql as state store throw exception, Unknown column 'homeSubCluster' in 'field list' | Blocker | federation | Yiran Wu | Yiran Wu | +| [HDFS-13636](https://issues.apache.org/jira/browse/HDFS-13636) | Cross-Site Scripting vulnerability in HttpServer2 | Major | . | Haibo Yan | Haibo Yan | +| [HDFS-13339](https://issues.apache.org/jira/browse/HDFS-13339) | Volume reference can't be released and may lead to deadlock when DataXceiver does a check volume | Critical | datanode | liaoyuxiangqin | Zsolt Venczel | +| [YARN-8382](https://issues.apache.org/jira/browse/YARN-8382) | cgroup file leak in NM | Major | nodemanager | Hu Ziqian | Hu Ziqian | +| [HDFS-13545](https://issues.apache.org/jira/browse/HDFS-13545) | "guarded" is misspelled as "gaurded" in FSPermissionChecker.java | Trivial | documentation | Jianchao Jia | Jianchao Jia | +| [MAPREDUCE-7103](https://issues.apache.org/jira/browse/MAPREDUCE-7103) | Fix TestHistoryViewerPrinter on windows due to a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15217](https://issues.apache.org/jira/browse/HADOOP-15217) | FsUrlConnection does not handle paths with spaces | Major | fs | Joseph Fourny | Zsolt Venczel | +| [HDFS-12950](https://issues.apache.org/jira/browse/HDFS-12950) | [oiv] ls will fail in secure cluster | Major | . | Brahma Reddy Battula | Wei-Chiu Chuang | +| [YARN-8359](https://issues.apache.org/jira/browse/YARN-8359) | Exclude containermanager.linux test classes on Windows | Major | . | Giovanni Matteo Fumarola | Jason Lowe | +| [HDFS-13664](https://issues.apache.org/jira/browse/HDFS-13664) | Refactor ConfiguredFailoverProxyProvider to make inheritance easier | Minor | hdfs-client | Chao Sun | Chao Sun | +| [HDFS-12670](https://issues.apache.org/jira/browse/HDFS-12670) | can't renew HDFS tokens with only the hdfs client jar | Critical | . | Thomas Graves | Arpit Agarwal | +| [HDFS-13667](https://issues.apache.org/jira/browse/HDFS-13667) | Typo: Marking all "datandoes" as stale | Trivial | namenode | Wei-Chiu Chuang | Nanda kumar | +| [YARN-8405](https://issues.apache.org/jira/browse/YARN-8405) | RM zk-state-store.parent-path ACLs has been changed since HADOOP-14773 | Major | . | Rohith Sharma K S | Íñigo Goiri | +| [YARN-8404](https://issues.apache.org/jira/browse/YARN-8404) | Timeline event publish need to be async to avoid Dispatcher thread leak in case ATS is down | Blocker | . | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-13673](https://issues.apache.org/jira/browse/HDFS-13673) | TestNameNodeMetrics fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HDFS-13676](https://issues.apache.org/jira/browse/HDFS-13676) | TestEditLogRace fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HDFS-13174](https://issues.apache.org/jira/browse/HDFS-13174) | hdfs mover -p /path times out after 20 min | Major | balancer & mover | Istvan Fajth | Istvan Fajth | +| [HADOOP-15523](https://issues.apache.org/jira/browse/HADOOP-15523) | Shell command timeout given is in seconds whereas it is taken as millisec while scheduling | Major | . | Bilwa S T | Bilwa S T | +| [HDFS-13682](https://issues.apache.org/jira/browse/HDFS-13682) | Cannot create encryption zone after KMS auth token expires | Critical | encryption, kms, namenode | Xiao Chen | Xiao Chen | +| [YARN-8444](https://issues.apache.org/jira/browse/YARN-8444) | NodeResourceMonitor crashes on bad swapFree value | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8443](https://issues.apache.org/jira/browse/YARN-8443) | Total #VCores in cluster metrics is wrong when CapacityScheduler reserved some containers | Major | webapp | Tao Yang | Tao Yang | +| [YARN-8457](https://issues.apache.org/jira/browse/YARN-8457) | Compilation is broken with -Pyarn-ui | Major | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-8401](https://issues.apache.org/jira/browse/YARN-8401) | [UI2] new ui is not accessible with out internet connection | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8451](https://issues.apache.org/jira/browse/YARN-8451) | Multiple NM heartbeat thread created when a slow NM resync with RM | Major | nodemanager | Botong Huang | Botong Huang | +| [HADOOP-15548](https://issues.apache.org/jira/browse/HADOOP-15548) | Randomize local dirs | Minor | . | Jim Brennan | Jim Brennan | +| [HDFS-13702](https://issues.apache.org/jira/browse/HDFS-13702) | Remove HTrace hooks from DFSClient to reduce CPU usage | Major | performance | Todd Lipcon | Todd Lipcon | +| [HDFS-13635](https://issues.apache.org/jira/browse/HDFS-13635) | Incorrect message when block is not found | Major | datanode | Wei-Chiu Chuang | Gabor Bota | +| [HADOOP-15571](https://issues.apache.org/jira/browse/HADOOP-15571) | Multiple FileContexts created with the same configuration object should be allowed to have different umask | Critical | . | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | +| [HDFS-13121](https://issues.apache.org/jira/browse/HDFS-13121) | NPE when request file descriptors when SC read | Minor | hdfs-client | Gang Xie | Zsolt Venczel | +| [YARN-6265](https://issues.apache.org/jira/browse/YARN-6265) | yarn.resourcemanager.fail-fast is used inconsistently | Major | resourcemanager | Daniel Templeton | Yuanbo Liu | +| [YARN-8473](https://issues.apache.org/jira/browse/YARN-8473) | Containers being launched as app tears down can leave containers in NEW state | Major | nodemanager | Jason Lowe | Jason Lowe | +| [HDFS-13723](https://issues.apache.org/jira/browse/HDFS-13723) | Occasional "Should be different group" error in TestRefreshUserMappings#testGroupMappingRefresh | Major | security, test | Siyao Meng | Siyao Meng | +| [HDFS-12837](https://issues.apache.org/jira/browse/HDFS-12837) | Intermittent failure in TestReencryptionWithKMS | Major | encryption, test | Surendra Singh Lilhore | Xiao Chen | +| [HDFS-13729](https://issues.apache.org/jira/browse/HDFS-13729) | Fix broken links to RBF documentation | Minor | documentation | jwhitter | Gabor Bota | +| [YARN-8515](https://issues.apache.org/jira/browse/YARN-8515) | container-executor can crash with SIGPIPE after nodemanager restart | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8421](https://issues.apache.org/jira/browse/YARN-8421) | when moving app, activeUsers is increased, even though app does not have outstanding request | Major | . | kyungwan nam | | +| [HDFS-13524](https://issues.apache.org/jira/browse/HDFS-13524) | Occasional "All datanodes are bad" error in TestLargeBlock#testLargeBlockSize | Major | . | Wei-Chiu Chuang | Siyao Meng | +| [HADOOP-15610](https://issues.apache.org/jira/browse/HADOOP-15610) | Hadoop Docker Image Pip Install Fails | Critical | . | Jack Bearden | Jack Bearden | +| [HADOOP-15614](https://issues.apache.org/jira/browse/HADOOP-15614) | TestGroupsCaching.testExceptionOnBackgroundRefreshHandled reliably fails | Major | . | Kihwal Lee | Weiwei Yang | +| [YARN-8548](https://issues.apache.org/jira/browse/YARN-8548) | AllocationRespose proto setNMToken initBuilder not done | Major | . | Bibin A Chundatt | Bilwa S T | +| [YARN-7748](https://issues.apache.org/jira/browse/YARN-7748) | TestContainerResizing.testIncreaseContainerUnreservedWhenApplicationCompleted fails due to multiple container fail events | Major | capacityscheduler | Haibo Chen | Weiwei Yang | +| [YARN-8577](https://issues.apache.org/jira/browse/YARN-8577) | Fix the broken anchor in SLS site-doc | Minor | documentation | Weiwei Yang | Weiwei Yang | +| [YARN-4606](https://issues.apache.org/jira/browse/YARN-4606) | CapacityScheduler: applications could get starved because computation of #activeUsers considers pending apps | Critical | capacity scheduler, capacityscheduler | Karam Singh | Manikandan R | +| [HDFS-13765](https://issues.apache.org/jira/browse/HDFS-13765) | Fix javadoc for FSDirMkdirOp#createParentDirectories | Minor | documentation | Lokesh Jain | Lokesh Jain | +| [YARN-8434](https://issues.apache.org/jira/browse/YARN-8434) | Update federation documentation of Nodemanager configurations | Minor | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8558](https://issues.apache.org/jira/browse/YARN-8558) | NM recovery level db not cleaned up properly on container finish | Critical | . | Bibin A Chundatt | Bibin A Chundatt | +| [HADOOP-15637](https://issues.apache.org/jira/browse/HADOOP-15637) | LocalFs#listLocatedStatus does not filter out hidden .crc files | Minor | fs | Erik Krogen | Erik Krogen | +| [YARN-8397](https://issues.apache.org/jira/browse/YARN-8397) | Potential thread leak in ActivitiesManager | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-6966](https://issues.apache.org/jira/browse/YARN-6966) | NodeManager metrics may return wrong negative values when NM restart | Major | . | Yang Wang | Szilard Nemeth | +| [HDFS-13786](https://issues.apache.org/jira/browse/HDFS-13786) | EC: Display erasure coding policy for sub-directories is not working | Major | erasure-coding | Souryakanta Dwivedy | Ayush Saxena | +| [YARN-8331](https://issues.apache.org/jira/browse/YARN-8331) | Race condition in NM container launched after done | Major | . | Yang Wang | Pradeep Ambati | +| [HADOOP-15638](https://issues.apache.org/jira/browse/HADOOP-15638) | KMS Accept Queue Size default changed from 500 to 128 in Hadoop 3.x | Major | kms | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-13738](https://issues.apache.org/jira/browse/HDFS-13738) | fsck -list-corruptfileblocks has infinite loop if user is not privileged. | Major | tools | Wei-Chiu Chuang | Yuen-Kuei Hsueh | +| [HDFS-13758](https://issues.apache.org/jira/browse/HDFS-13758) | DatanodeManager should throw exception if it has BlockRecoveryCommand but the block is not under construction | Major | namenode | Wei-Chiu Chuang | chencan | +| [YARN-8614](https://issues.apache.org/jira/browse/YARN-8614) | Fix few annotation typos in YarnConfiguration | Trivial | . | Sen Zhao | Sen Zhao | +| [HDFS-13746](https://issues.apache.org/jira/browse/HDFS-13746) | Still occasional "Should be different group" failure in TestRefreshUserMappings#testGroupMappingRefresh | Major | . | Siyao Meng | Siyao Meng | +| [HDFS-10240](https://issues.apache.org/jira/browse/HDFS-10240) | Race between close/recoverLease leads to missing block | Major | . | zhouyingchao | Jinglun | +| [YARN-8612](https://issues.apache.org/jira/browse/YARN-8612) | Fix NM Collector Service Port issue in YarnConfiguration | Major | ATSv2 | Prabha Manepalli | Prabha Manepalli | +| [HDFS-13747](https://issues.apache.org/jira/browse/HDFS-13747) | Statistic for list\_located\_status is incremented incorrectly by listStatusIterator | Minor | hdfs-client | Todd Lipcon | Antal Mihalyi | +| [HADOOP-15674](https://issues.apache.org/jira/browse/HADOOP-15674) | Test failure TestSSLHttpServer.testExcludedCiphers with TLS\_ECDHE\_RSA\_WITH\_AES\_128\_CBC\_SHA256 cipher suite | Major | common | Gabor Bota | Szilard Nemeth | +| [YARN-8640](https://issues.apache.org/jira/browse/YARN-8640) | Restore previous state in container-executor after failure | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8679](https://issues.apache.org/jira/browse/YARN-8679) | [ATSv2] If HBase cluster is down for long time, high chances that NM ContainerManager dispatcher get blocked | Major | . | Rohith Sharma K S | Wangda Tan | +| [HADOOP-14314](https://issues.apache.org/jira/browse/HADOOP-14314) | The OpenSolaris taxonomy link is dead in InterfaceClassification.md | Major | documentation | Daniel Templeton | Rui Gao | +| [YARN-8649](https://issues.apache.org/jira/browse/YARN-8649) | NPE in localizer hearbeat processing if a container is killed while localizing | Major | . | lujie | lujie | +| [YARN-8719](https://issues.apache.org/jira/browse/YARN-8719) | Typo correction for yarn configuration in OpportunisticContainers(federation) docs | Major | documentation, federation | Y. SREENIVASULU REDDY | Y. SREENIVASULU REDDY | +| [HDFS-13731](https://issues.apache.org/jira/browse/HDFS-13731) | ReencryptionUpdater fails with ConcurrentModificationException during processCheckpoints | Major | encryption | Xiao Chen | Zsolt Venczel | +| [HADOOP-15705](https://issues.apache.org/jira/browse/HADOOP-15705) | Typo in the definition of "stable" in the interface classification | Minor | . | Daniel Templeton | Daniel Templeton | +| [HDFS-13863](https://issues.apache.org/jira/browse/HDFS-13863) | FsDatasetImpl should log DiskOutOfSpaceException | Major | hdfs | Fei Hui | Fei Hui | +| [HADOOP-15698](https://issues.apache.org/jira/browse/HADOOP-15698) | KMS log4j is not initialized properly at startup | Major | kms | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15706](https://issues.apache.org/jira/browse/HADOOP-15706) | Typo in compatibility doc: SHOUD -\> SHOULD | Trivial | . | Daniel Templeton | Laszlo Kollar | +| [HDFS-13027](https://issues.apache.org/jira/browse/HDFS-13027) | Handle possible NPEs due to deleted blocks in race condition | Major | namenode | Vinayakumar B | Vinayakumar B | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13632](https://issues.apache.org/jira/browse/HDFS-13632) | Randomize baseDir for MiniJournalCluster in MiniQJMHACluster for TestDFSAdminWithHA | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13651](https://issues.apache.org/jira/browse/HDFS-13651) | TestReencryptionHandler fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [MAPREDUCE-7102](https://issues.apache.org/jira/browse/MAPREDUCE-7102) | Fix TestJavaSerialization for Windows due a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [MAPREDUCE-7105](https://issues.apache.org/jira/browse/MAPREDUCE-7105) | Fix TestNativeCollectorOnlyHandler.testOnCall on Windows because of the path format | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HDFS-13652](https://issues.apache.org/jira/browse/HDFS-13652) | Randomize baseDir for MiniDFSCluster in TestBlockScanner | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13649](https://issues.apache.org/jira/browse/HDFS-13649) | Randomize baseDir for MiniDFSCluster in TestReconstructStripedFile and TestReconstructStripedFileWithRandomECPolicy | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13650](https://issues.apache.org/jira/browse/HDFS-13650) | Randomize baseDir for MiniDFSCluster in TestDFSStripedInputStream and TestDFSStripedInputStreamWithRandomECPolicy | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8370](https://issues.apache.org/jira/browse/YARN-8370) | Some Node Manager tests fail on Windows due to improper path/file separator | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8422](https://issues.apache.org/jira/browse/YARN-8422) | TestAMSimulator failing with NPE | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15532](https://issues.apache.org/jira/browse/HADOOP-15532) | TestBasicDiskValidator fails with NoSuchFileException | Minor | . | Íñigo Goiri | Giovanni Matteo Fumarola | +| [HDFS-13563](https://issues.apache.org/jira/browse/HDFS-13563) | TestDFSAdminWithHA times out on Windows | Minor | . | Anbang Hu | Lukas Majercak | +| [HDFS-13681](https://issues.apache.org/jira/browse/HDFS-13681) | Fix TestStartup.testNNFailToStartOnReadOnlyNNDir test failure on Windows | Major | test | Xiao Liang | Xiao Liang | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-12978](https://issues.apache.org/jira/browse/HDFS-12978) | Fine-grained locking while consuming journal stream. | Major | namenode | Konstantin Shvachko | Konstantin Shvachko | +| [HDFS-13637](https://issues.apache.org/jira/browse/HDFS-13637) | RBF: Router fails when threadIndex (in ConnectionPool) wraps around Integer.MIN\_VALUE | Critical | federation | CR Hota | CR Hota | +| [HDFS-13281](https://issues.apache.org/jira/browse/HDFS-13281) | Namenode#createFile should be /.reserved/raw/ aware. | Critical | encryption | Rushabh S Shah | Rushabh S Shah | +| [YARN-4677](https://issues.apache.org/jira/browse/YARN-4677) | RMNodeResourceUpdateEvent update from scheduler can lead to race condition | Major | graceful, resourcemanager, scheduler | Brook Zhou | Wilfred Spiegelenburg | +| [HADOOP-15506](https://issues.apache.org/jira/browse/HADOOP-15506) | Upgrade Azure Storage Sdk version to 7.0.0 and update corresponding code blocks | Minor | fs/azure | Esfandiar Manii | Esfandiar Manii | +| [HADOOP-15529](https://issues.apache.org/jira/browse/HADOOP-15529) | ContainerLaunch#testInvalidEnvVariableSubstitutionType is not supported in Windows | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15533](https://issues.apache.org/jira/browse/HADOOP-15533) | Make WASB listStatus messages consistent | Trivial | fs/azure | Esfandiar Manii | Esfandiar Manii | +| [HADOOP-15458](https://issues.apache.org/jira/browse/HADOOP-15458) | TestLocalFileSystem#testFSOutputStreamBuilder fails on Windows | Minor | test | Xiao Liang | Xiao Liang | +| [HDFS-13726](https://issues.apache.org/jira/browse/HDFS-13726) | RBF: Fix RBF configuration links | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13475](https://issues.apache.org/jira/browse/HDFS-13475) | RBF: Admin cannot enforce Router enter SafeMode | Major | . | Wei Yan | Chao Sun | +| [HDFS-13733](https://issues.apache.org/jira/browse/HDFS-13733) | RBF: Add Web UI configurations and descriptions to RBF document | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13743](https://issues.apache.org/jira/browse/HDFS-13743) | RBF: Router throws NullPointerException due to the invalid initialization of MountTableResolver | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13750](https://issues.apache.org/jira/browse/HDFS-13750) | RBF: Router ID in RouterRpcClient is always null | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8129](https://issues.apache.org/jira/browse/YARN-8129) | Improve error message for invalid value in fields attribute | Minor | ATSv2 | Charan Hebri | Abhishek Modi | +| [HDFS-13848](https://issues.apache.org/jira/browse/HDFS-13848) | Refactor NameNode failover proxy providers | Major | ha, hdfs-client | Konstantin Shvachko | Konstantin Shvachko | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13788](https://issues.apache.org/jira/browse/HDFS-13788) | Update EC documentation about rack fault tolerance | Major | documentation, erasure-coding | Xiao Chen | Kitti Nanasi | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.4/RELEASENOTES.3.0.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.4/RELEASENOTES.3.0.4.md new file mode 100644 index 00000000000..c9d01aecdce --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.0.4/RELEASENOTES.3.0.4.md @@ -0,0 +1,50 @@ + + +# Apache Hadoop 3.0.4 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-15252](https://issues.apache.org/jira/browse/HADOOP-15252) | *Major* | **Checkstyle version is not compatible with IDEA's checkstyle plugin** + +Updated checkstyle to 8.8 and updated maven-checkstyle-plugin to 3.0.0. + + +--- + +* [HADOOP-15506](https://issues.apache.org/jira/browse/HADOOP-15506) | *Minor* | **Upgrade Azure Storage Sdk version to 7.0.0 and update corresponding code blocks** + +WASB: Fix Spark process hang at shutdown due to use of non-daemon threads by updating Azure Storage Java SDK to 7.0 + + +--- + +* [HDFS-13174](https://issues.apache.org/jira/browse/HDFS-13174) | *Major* | **hdfs mover -p /path times out after 20 min** + +Mover could have fail after 20+ minutes if a block move was enqueued for this long, between two DataNodes due to an internal constant that was introduced for Balancer, but affected Mover as well. +The internal constant can be configured with the dfs.balancer.max-iteration-time parameter after the patch, and affects only the Balancer. Default is 20 minutes. + + +--- + +* [HADOOP-15638](https://issues.apache.org/jira/browse/HADOOP-15638) | *Major* | **KMS Accept Queue Size default changed from 500 to 128 in Hadoop 3.x** + +Restore the KMS accept queue size to 500 in Hadoop 3.x, making it the same as in Hadoop 2.x. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/CHANGES.3.1.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/CHANGELOG.3.1.0.md similarity index 96% rename from hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/CHANGES.3.1.0.md rename to hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/CHANGELOG.3.1.0.md index 3ccbae4147d..dff4a7e5723 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/CHANGES.3.1.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/CHANGELOG.3.1.0.md @@ -18,7 +18,7 @@ --> # Apache Hadoop Changelog -## Release 3.1.0 - 2018-03-30 +## Release 3.1.0 - 2018-04-06 ### INCOMPATIBLE CHANGES: @@ -54,10 +54,15 @@ | [HDFS-206](https://issues.apache.org/jira/browse/HDFS-206) | Support for head in FSShell | Minor | . | Olga Natkovich | Gabor Bota | | [YARN-5079](https://issues.apache.org/jira/browse/YARN-5079) | [Umbrella] Native YARN framework layer for services and beyond | Major | . | Vinod Kumar Vavilapalli | | | [YARN-4757](https://issues.apache.org/jira/browse/YARN-4757) | [Umbrella] Simplified discovery of services via DNS mechanisms | Major | . | Vinod Kumar Vavilapalli | | -| [HADOOP-13786](https://issues.apache.org/jira/browse/HADOOP-13786) | Add S3A committer for zero-rename commits to S3 endpoints | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-13786](https://issues.apache.org/jira/browse/HADOOP-13786) | Add S3A committers for zero-rename commits to S3 endpoints | Major | fs/s3 | Steve Loughran | Steve Loughran | | [HDFS-9806](https://issues.apache.org/jira/browse/HDFS-9806) | Allow HDFS block replicas to be provided by an external storage system | Major | . | Chris Douglas | | | [YARN-6592](https://issues.apache.org/jira/browse/YARN-6592) | [Umbrella] Rich placement constraints in YARN | Major | . | Konstantinos Karanasos | | | [HDFS-12998](https://issues.apache.org/jira/browse/HDFS-12998) | SnapshotDiff - Provide an iterator-based listing API for calculating snapshotDiff | Major | snapshots | Shashikant Banerjee | Shashikant Banerjee | +| [YARN-5764](https://issues.apache.org/jira/browse/YARN-5764) | NUMA awareness support for launching containers | Major | nodemanager, yarn | Olasoji | Devaraj K | +| [YARN-5983](https://issues.apache.org/jira/browse/YARN-5983) | [Umbrella] Support for FPGA as a Resource in YARN | Major | yarn | Zhankun Tang | Zhankun Tang | +| [YARN-6223](https://issues.apache.org/jira/browse/YARN-6223) | [Umbrella] Natively support GPU configuration/discovery/scheduling/isolation on YARN | Major | . | Wangda Tan | Wangda Tan | +| [HADOOP-14898](https://issues.apache.org/jira/browse/HADOOP-14898) | Create official Docker images for development and testing features | Major | . | Elek, Marton | Elek, Marton | +| [HDFS-13553](https://issues.apache.org/jira/browse/HDFS-13553) | RBF: Support global quota | Major | . | Íñigo Goiri | Yiqun Lin | ### IMPROVEMENTS: @@ -213,7 +218,7 @@ | [HADOOP-15311](https://issues.apache.org/jira/browse/HADOOP-15311) | HttpServer2 needs a way to configure the acceptor/selector count | Major | common | Erik Krogen | Erik Krogen | | [HDFS-13235](https://issues.apache.org/jira/browse/HDFS-13235) | DiskBalancer: Update Documentation to add newly added options | Major | diskbalancer, documentation | Bharat Viswanadham | Bharat Viswanadham | | [HDFS-336](https://issues.apache.org/jira/browse/HDFS-336) | dfsadmin -report should report number of blocks from datanode | Minor | . | Lohit Vijayarenu | Bharat Viswanadham | -| [HDFS-11600](https://issues.apache.org/jira/browse/HDFS-11600) | Refactor TestDFSStripedOutputStreamWithFailure test classes | Minor | test | Andrew Wang | SammiChen | +| [HDFS-11600](https://issues.apache.org/jira/browse/HDFS-11600) | Refactor TestDFSStripedOutputStreamWithFailure test classes | Minor | erasure-coding, test | Andrew Wang | Sammi Chen | | [HDFS-13257](https://issues.apache.org/jira/browse/HDFS-13257) | Code cleanup: INode never throws QuotaExceededException | Major | namenode | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze | | [HDFS-13275](https://issues.apache.org/jira/browse/HDFS-13275) | Adding log for BlockPoolManager#refreshNamenodes failures | Minor | datanode | Xiaoyu Yao | Ajay Kumar | | [HDFS-13246](https://issues.apache.org/jira/browse/HDFS-13246) | FileInputStream redundant closes in readReplicasFromCache | Minor | datanode | liaoyuxiangqin | liaoyuxiangqin | @@ -227,6 +232,10 @@ | [YARN-7623](https://issues.apache.org/jira/browse/YARN-7623) | Fix the CapacityScheduler Queue configuration documentation | Major | . | Arun Suresh | Jonathan Hung | | [HDFS-13314](https://issues.apache.org/jira/browse/HDFS-13314) | NameNode should optionally exit if it detects FsImage corruption | Major | namenode | Arpit Agarwal | Arpit Agarwal | | [YARN-8076](https://issues.apache.org/jira/browse/YARN-8076) | Support to specify application tags in distributed shell | Major | distributed-shell | Weiwei Yang | Weiwei Yang | +| [HADOOP-14831](https://issues.apache.org/jira/browse/HADOOP-14831) | Über-jira: S3a phase IV: Hadoop 3.1 features | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-5881](https://issues.apache.org/jira/browse/YARN-5881) | [Umbrella] Enable configuration of queue capacity in terms of absolute resources | Major | . | Sean Po | Sunil Govindan | +| [HADOOP-14841](https://issues.apache.org/jira/browse/HADOOP-14841) | Kms client should disconnect if unable to get output stream from connection. | Major | kms | Xiao Chen | Rushabh S Shah | +| [HDFS-13493](https://issues.apache.org/jira/browse/HDFS-13493) | Reduce the HttpServer2 thread count on DataNodes | Major | datanode | Erik Krogen | Erik Krogen | ### BUG FIXES: @@ -270,7 +279,7 @@ | [YARN-7118](https://issues.apache.org/jira/browse/YARN-7118) | AHS REST API can return NullPointerException | Major | . | Prabhu Joseph | Billie Rinaldi | | [HDFS-12495](https://issues.apache.org/jira/browse/HDFS-12495) | TestPendingInvalidateBlock#testPendingDeleteUnknownBlocks fails intermittently | Major | . | Eric Badger | Eric Badger | | [HADOOP-14822](https://issues.apache.org/jira/browse/HADOOP-14822) | hadoop-project/pom.xml is executable | Minor | . | Akira Ajisaka | Ajay Kumar | -| [YARN-7157](https://issues.apache.org/jira/browse/YARN-7157) | Add admin configuration to filter per-user's apps in secure cluster | Major | webapp | Sunil G | Sunil G | +| [YARN-7157](https://issues.apache.org/jira/browse/YARN-7157) | Add admin configuration to filter per-user's apps in secure cluster | Major | webapp | Sunil Govindan | Sunil Govindan | | [YARN-7257](https://issues.apache.org/jira/browse/YARN-7257) | AggregatedLogsBlock reports a bad 'end' value as a bad 'start' value | Major | log-aggregation | Jason Lowe | Jason Lowe | | [YARN-7084](https://issues.apache.org/jira/browse/YARN-7084) | TestSchedulingMonitor#testRMStarts fails sporadically | Major | . | Jason Lowe | Jason Lowe | | [HDFS-12271](https://issues.apache.org/jira/browse/HDFS-12271) | Incorrect statement in Downgrade section of HDFS Rolling Upgrade document | Minor | documentation | Nanda kumar | Nanda kumar | @@ -302,7 +311,7 @@ | [HADOOP-14977](https://issues.apache.org/jira/browse/HADOOP-14977) | Xenial dockerfile needs ant in main build for findbugs | Trivial | build, test | Allen Wittenauer | Akira Ajisaka | | [HDFS-12579](https://issues.apache.org/jira/browse/HDFS-12579) | JournalNodeSyncer should use fromUrl field of EditLogManifestResponse to construct servlet Url | Major | . | Hanisha Koneru | Hanisha Koneru | | [YARN-7375](https://issues.apache.org/jira/browse/YARN-7375) | Possible NPE in RMWebapp when HA is enabled and the active RM fails | Major | . | Chandni Singh | Chandni Singh | -| [YARN-6747](https://issues.apache.org/jira/browse/YARN-6747) | TestFSAppStarvation.testPreemptionEnable fails intermittently | Major | . | Sunil G | Miklos Szegedi | +| [YARN-6747](https://issues.apache.org/jira/browse/YARN-6747) | TestFSAppStarvation.testPreemptionEnable fails intermittently | Major | . | Sunil Govindan | Miklos Szegedi | | [YARN-7336](https://issues.apache.org/jira/browse/YARN-7336) | Unsafe cast from long to int Resource.hashCode() method | Critical | resourcemanager | Daniel Templeton | Miklos Szegedi | | [HADOOP-14990](https://issues.apache.org/jira/browse/HADOOP-14990) | Clean up jdiff xml files added for 2.8.2 release | Blocker | . | Subru Krishnan | Junping Du | | [HADOOP-14980](https://issues.apache.org/jira/browse/HADOOP-14980) | [JDK9] Upgrade maven-javadoc-plugin to 3.0.0-M1 | Minor | build | ligongyi | ligongyi | @@ -370,11 +379,11 @@ | [HADOOP-14985](https://issues.apache.org/jira/browse/HADOOP-14985) | Remove subversion related code from VersionInfoMojo.java | Minor | build | Akira Ajisaka | Ajay Kumar | | [YARN-7586](https://issues.apache.org/jira/browse/YARN-7586) | Application Placement should be done before ACL checks in ResourceManager | Blocker | . | Suma Shivaprasad | Suma Shivaprasad | | [HDFS-11751](https://issues.apache.org/jira/browse/HDFS-11751) | DFSZKFailoverController daemon exits with wrong status code | Major | auto-failover | Doris Gu | Bharat Viswanadham | -| [HADOOP-15080](https://issues.apache.org/jira/browse/HADOOP-15080) | Aliyun OSS: update oss sdk from 2.8.1 to 2.8.3 to remove its dependency on Cat-x "json-lib" | Blocker | fs/oss | Chris Douglas | SammiChen | +| [HADOOP-15080](https://issues.apache.org/jira/browse/HADOOP-15080) | Aliyun OSS: update oss sdk from 2.8.1 to 2.8.3 to remove its dependency on Cat-x "json-lib" | Blocker | fs/oss | Chris Douglas | Sammi Chen | | [HADOOP-15098](https://issues.apache.org/jira/browse/HADOOP-15098) | TestClusterTopology#testChooseRandom fails intermittently | Major | test | Zsolt Venczel | Zsolt Venczel | | [YARN-7608](https://issues.apache.org/jira/browse/YARN-7608) | Incorrect sTarget column causing DataTable warning on RM application and scheduler web page | Major | resourcemanager, webapp | Weiwei Yang | Gergely Novák | | [HDFS-12891](https://issues.apache.org/jira/browse/HDFS-12891) | Do not invalidate blocks if toInvalidate is empty | Major | . | Zsolt Venczel | Zsolt Venczel | -| [YARN-7635](https://issues.apache.org/jira/browse/YARN-7635) | TestRMWebServicesSchedulerActivities fails in trunk | Major | test | Sunil G | Sunil G | +| [YARN-7635](https://issues.apache.org/jira/browse/YARN-7635) | TestRMWebServicesSchedulerActivities fails in trunk | Major | test | Sunil Govindan | Sunil Govindan | | [HDFS-12833](https://issues.apache.org/jira/browse/HDFS-12833) | Distcp : Update the usage of delete option for dependency with update and overwrite option | Minor | distcp, hdfs | Harshakiran Reddy | usharani | | [YARN-7647](https://issues.apache.org/jira/browse/YARN-7647) | NM print inappropriate error log when node-labels is enabled | Minor | . | Yang Wang | Yang Wang | | [YARN-7536](https://issues.apache.org/jira/browse/YARN-7536) | em-table improvement for better filtering in new YARN UI | Minor | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm | @@ -390,7 +399,7 @@ | [HDFS-12930](https://issues.apache.org/jira/browse/HDFS-12930) | Remove the extra space in HdfsImageViewer.md | Trivial | documentation | Yiqun Lin | Rahul Pathak | | [YARN-7662](https://issues.apache.org/jira/browse/YARN-7662) | [Atsv2] Define new set of configurations for reader and collectors to bind. | Major | . | Rohith Sharma K S | Rohith Sharma K S | | [YARN-7466](https://issues.apache.org/jira/browse/YARN-7466) | ResourceRequest has a different default for allocationRequestId than Container | Major | . | Chandni Singh | Chandni Singh | -| [YARN-7674](https://issues.apache.org/jira/browse/YARN-7674) | Update Timeline Reader web app address in UI2 | Major | . | Rohith Sharma K S | Sunil G | +| [YARN-7674](https://issues.apache.org/jira/browse/YARN-7674) | Update Timeline Reader web app address in UI2 | Major | . | Rohith Sharma K S | Sunil Govindan | | [YARN-7577](https://issues.apache.org/jira/browse/YARN-7577) | Unit Fail: TestAMRestart#testPreemptedAMRestartOnRMRestart | Major | . | Miklos Szegedi | Miklos Szegedi | | [HDFS-12949](https://issues.apache.org/jira/browse/HDFS-12949) | Fix findbugs warning in ImageWriter.java | Major | . | Akira Ajisaka | Akira Ajisaka | | [HDFS-12938](https://issues.apache.org/jira/browse/HDFS-12938) | TestErasureCodigCLI testAll failing consistently. | Major | erasure-coding, hdfs | Rushabh S Shah | Ajay Kumar | @@ -405,7 +414,7 @@ | [HADOOP-15155](https://issues.apache.org/jira/browse/HADOOP-15155) | Error in javadoc of ReconfigurableBase#reconfigureProperty | Minor | . | Ajay Kumar | Ajay Kumar | | [YARN-7585](https://issues.apache.org/jira/browse/YARN-7585) | NodeManager should go unhealthy when state store throws DBException | Major | nodemanager | Wilfred Spiegelenburg | Wilfred Spiegelenburg | | [YARN-6894](https://issues.apache.org/jira/browse/YARN-6894) | RM Apps API returns only active apps when query parameter queue used | Minor | resourcemanager, restapi | Grant Sohn | Gergely Novák | -| [YARN-7692](https://issues.apache.org/jira/browse/YARN-7692) | Skip validating priority acls while recovering applications | Blocker | resourcemanager | Charan Hebri | Sunil G | +| [YARN-7692](https://issues.apache.org/jira/browse/YARN-7692) | Skip validating priority acls while recovering applications | Blocker | resourcemanager | Charan Hebri | Sunil Govindan | | [MAPREDUCE-7028](https://issues.apache.org/jira/browse/MAPREDUCE-7028) | Concurrent task progress updates causing NPE in Application Master | Blocker | mr-am | Gergo Repas | Gergo Repas | | [YARN-7602](https://issues.apache.org/jira/browse/YARN-7602) | NM should reference the singleton JvmMetrics instance | Major | nodemanager | Haibo Chen | Haibo Chen | | [HADOOP-15093](https://issues.apache.org/jira/browse/HADOOP-15093) | Deprecation of yarn.resourcemanager.zk-address is undocumented | Major | documentation | Namit Maheshwari | Ajay Kumar | @@ -416,13 +425,13 @@ | [HDFS-12860](https://issues.apache.org/jira/browse/HDFS-12860) | StripedBlockUtil#getRangesInternalBlocks throws exception for the block group size larger than 2GB | Major | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | | [YARN-7619](https://issues.apache.org/jira/browse/YARN-7619) | Max AM Resource value in Capacity Scheduler UI has to be refreshed for every user | Major | capacity scheduler, yarn | Eric Payne | Eric Payne | | [YARN-7645](https://issues.apache.org/jira/browse/YARN-7645) | TestContainerResourceUsage#testUsageAfterAMRestartWithMultipleContainers is flakey with FairScheduler | Major | test | Robert Kanter | Robert Kanter | -| [YARN-7699](https://issues.apache.org/jira/browse/YARN-7699) | queueUsagePercentage is coming as INF for getApp REST api call | Major | webapp | Sunil G | Sunil G | +| [YARN-7699](https://issues.apache.org/jira/browse/YARN-7699) | queueUsagePercentage is coming as INF for getApp REST api call | Major | webapp | Sunil Govindan | Sunil Govindan | | [HDFS-12985](https://issues.apache.org/jira/browse/HDFS-12985) | NameNode crashes during restart after an OpenForWrite file present in the Snapshot got deleted | Major | hdfs | Manoj Govindassamy | Manoj Govindassamy | | [YARN-4227](https://issues.apache.org/jira/browse/YARN-4227) | Ignore expired containers from removed nodes in FairScheduler | Critical | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | | [YARN-7718](https://issues.apache.org/jira/browse/YARN-7718) | DistributedShell failed to specify resource other than memory/vcores from container\_resources | Critical | . | Wangda Tan | Wangda Tan | | [YARN-7508](https://issues.apache.org/jira/browse/YARN-7508) | NPE in FiCaSchedulerApp when debug log enabled in async-scheduling mode | Major | capacityscheduler | Tao Yang | Tao Yang | -| [YARN-7663](https://issues.apache.org/jira/browse/YARN-7663) | RMAppImpl:Invalid event: START at KILLED | Minor | resourcemanager | lujie | lujie | -| [YARN-6948](https://issues.apache.org/jira/browse/YARN-6948) | Invalid event: ATTEMPT\_ADDED at FINAL\_SAVING | Minor | yarn | lujie | lujie | +| [YARN-7663](https://issues.apache.org/jira/browse/YARN-7663) | RMAppImpl:Invalid event: START at KILLED | Major | resourcemanager | lujie | lujie | +| [YARN-6948](https://issues.apache.org/jira/browse/YARN-6948) | Invalid event: ATTEMPT\_ADDED at FINAL\_SAVING | Major | yarn | lujie | lujie | | [HDFS-12994](https://issues.apache.org/jira/browse/HDFS-12994) | TestReconstructStripedFile.testNNSendsErasureCodingTasks fails due to socket timeout | Major | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | | [YARN-7665](https://issues.apache.org/jira/browse/YARN-7665) | Allow FS scheduler state dump to be turned on/off separately from FS debug log | Major | . | Wilfred Spiegelenburg | Wilfred Spiegelenburg | | [YARN-7689](https://issues.apache.org/jira/browse/YARN-7689) | TestRMContainerAllocator fails after YARN-6124 | Major | scheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | @@ -446,7 +455,7 @@ | [YARN-7750](https://issues.apache.org/jira/browse/YARN-7750) | [UI2] Render time related fields in all pages to the browser timezone | Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm | | [YARN-7740](https://issues.apache.org/jira/browse/YARN-7740) | Fix logging for destroy yarn service cli when app does not exist and some minor bugs | Major | yarn-native-services | Yesha Vora | Jian He | | [YARN-7139](https://issues.apache.org/jira/browse/YARN-7139) | FairScheduler: finished applications are always restored to default queue | Major | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | -| [YARN-7753](https://issues.apache.org/jira/browse/YARN-7753) | [UI2] Application logs has to be pulled from ATS 1.5 instead of ATS2 | Major | yarn-ui-v2 | Sunil G | Sunil G | +| [YARN-7753](https://issues.apache.org/jira/browse/YARN-7753) | [UI2] Meta information about Application logs has to be pulled from ATS 1.5 instead of ATS2 | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | | [HADOOP-14788](https://issues.apache.org/jira/browse/HADOOP-14788) | Credentials readTokenStorageFile to stop wrapping IOEs in IOEs | Minor | security | Steve Loughran | Ajay Kumar | | [HDFS-13039](https://issues.apache.org/jira/browse/HDFS-13039) | StripedBlockReader#createBlockReader leaks socket on IOException | Critical | datanode, erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | | [HADOOP-15181](https://issues.apache.org/jira/browse/HADOOP-15181) | Typo in SecureMode.md | Trivial | documentation | Masahiro Tanaka | Masahiro Tanaka | @@ -457,6 +466,7 @@ | [YARN-7749](https://issues.apache.org/jira/browse/YARN-7749) | [UI2] GPU information tab in left hand side disappears when we click other tabs below | Major | . | Sumana Sathish | Vasudevan Skm | | [YARN-7806](https://issues.apache.org/jira/browse/YARN-7806) | Distributed Shell should use timeline async api's | Major | distributed-shell | Sumana Sathish | Rohith Sharma K S | | [HDFS-13023](https://issues.apache.org/jira/browse/HDFS-13023) | Journal Sync does not work on a secure cluster | Major | journal-node | Namit Maheshwari | Bharat Viswanadham | +| [HADOOP-15121](https://issues.apache.org/jira/browse/HADOOP-15121) | Encounter NullPointerException when using DecayRpcScheduler | Major | . | Tao Jie | Tao Jie | | [MAPREDUCE-7015](https://issues.apache.org/jira/browse/MAPREDUCE-7015) | Possible race condition in JHS if the job is not loaded | Major | jobhistoryserver | Peter Bacsko | Peter Bacsko | | [YARN-7737](https://issues.apache.org/jira/browse/YARN-7737) | prelaunch.err file not found exception on container failure | Major | . | Jonathan Hung | Keqiu Hu | | [YARN-7777](https://issues.apache.org/jira/browse/YARN-7777) | Fix user name format in YARN Registry DNS name | Major | . | Jian He | Jian He | @@ -472,10 +482,9 @@ | [HDFS-12974](https://issues.apache.org/jira/browse/HDFS-12974) | Exception message is not printed when creating an encryption zone fails with AuthorizationException | Minor | encryption | fang zhenyi | fang zhenyi | | [YARN-7698](https://issues.apache.org/jira/browse/YARN-7698) | A misleading variable's name in ApplicationAttemptEventDispatcher | Minor | resourcemanager | Jinjiang Ling | Jinjiang Ling | | [YARN-7790](https://issues.apache.org/jira/browse/YARN-7790) | Improve Capacity Scheduler Async Scheduling to better handle node failures | Critical | . | Sumana Sathish | Wangda Tan | -| [MAPREDUCE-7036](https://issues.apache.org/jira/browse/MAPREDUCE-7036) | ASF License warning in hadoop-mapreduce-client | Minor | test | Takanobu Asanuma | Takanobu Asanuma | | [HDFS-12528](https://issues.apache.org/jira/browse/HDFS-12528) | Add an option to not disable short-circuit reads on failures | Major | hdfs-client, performance | Andre Araujo | Xiao Chen | -| [YARN-7861](https://issues.apache.org/jira/browse/YARN-7861) | [UI2] Logs page shows duplicated containers with ATS | Major | yarn-ui-v2 | Sunil G | Sunil G | -| [YARN-7828](https://issues.apache.org/jira/browse/YARN-7828) | Clicking on yarn service should take to component tab | Major | yarn-ui-v2 | Yesha Vora | Sunil G | +| [YARN-7861](https://issues.apache.org/jira/browse/YARN-7861) | [UI2] Logs page shows duplicated containers with ATS | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-7828](https://issues.apache.org/jira/browse/YARN-7828) | Clicking on yarn service should take to component tab | Major | yarn-ui-v2 | Yesha Vora | Sunil Govindan | | [HDFS-13061](https://issues.apache.org/jira/browse/HDFS-13061) | SaslDataTransferClient#checkTrustAndSend should not trust a partially trusted channel | Major | . | Xiaoyu Yao | Ajay Kumar | | [HDFS-13060](https://issues.apache.org/jira/browse/HDFS-13060) | Adding a BlacklistBasedTrustedChannelResolver for TrustedChannelResolver | Major | datanode, security | Xiaoyu Yao | Ajay Kumar | | [HDFS-12897](https://issues.apache.org/jira/browse/HDFS-12897) | getErasureCodingPolicy should handle .snapshot dir better | Major | erasure-coding, hdfs, snapshots | Harshakiran Reddy | LiXin Ge | @@ -490,17 +499,17 @@ | [YARN-7801](https://issues.apache.org/jira/browse/YARN-7801) | AmFilterInitializer should addFilter after fill all parameters | Critical | . | Sumana Sathish | Wangda Tan | | [YARN-7889](https://issues.apache.org/jira/browse/YARN-7889) | Missing kerberos token when check for RM REST API availability | Major | yarn-native-services | Eric Yang | Eric Yang | | [YARN-7850](https://issues.apache.org/jira/browse/YARN-7850) | [UI2] Log Aggregation status to be displayed in Application Page | Major | yarn-ui-v2 | Yesha Vora | Gergely Novák | -| [YARN-7866](https://issues.apache.org/jira/browse/YARN-7866) | [UI2] Error to be displayed correctly while accessing kerberized cluster without kinit | Major | yarn-ui-v2 | Sumana Sathish | Sunil G | +| [YARN-7866](https://issues.apache.org/jira/browse/YARN-7866) | [UI2] Error to be displayed correctly while accessing kerberized cluster without kinit | Major | yarn-ui-v2 | Sumana Sathish | Sunil Govindan | | [YARN-7890](https://issues.apache.org/jira/browse/YARN-7890) | NPE during container relaunch | Major | . | Billie Rinaldi | Jason Lowe | | [HDFS-11701](https://issues.apache.org/jira/browse/HDFS-11701) | NPE from Unresolved Host causes permanent DFSInputStream failures | Major | hdfs-client | James Moore | Lokesh Jain | | [HDFS-13115](https://issues.apache.org/jira/browse/HDFS-13115) | In getNumUnderConstructionBlocks(), ignore the inodeIds for which the inodes have been deleted | Major | . | Yongjun Zhang | Yongjun Zhang | | [HDFS-12935](https://issues.apache.org/jira/browse/HDFS-12935) | Get ambiguous result for DFSAdmin command in HA mode when only one namenode is up | Major | tools | Jianfei Jiang | Jianfei Jiang | -| [YARN-7827](https://issues.apache.org/jira/browse/YARN-7827) | Stop and Delete Yarn Service from RM UI fails with HTTP ERROR 404 | Critical | yarn-ui-v2 | Yesha Vora | Sunil G | +| [YARN-7827](https://issues.apache.org/jira/browse/YARN-7827) | Stop and Delete Yarn Service from RM UI fails with HTTP ERROR 404 | Critical | yarn-ui-v2 | Yesha Vora | Sunil Govindan | | [HDFS-13120](https://issues.apache.org/jira/browse/HDFS-13120) | Snapshot diff could be corrupted after concat | Major | namenode, snapshots | Xiaoyu Yao | Xiaoyu Yao | | [YARN-7909](https://issues.apache.org/jira/browse/YARN-7909) | YARN service REST API returns charset=null when kerberos enabled | Major | yarn-native-services | Eric Yang | Eric Yang | | [HDFS-13130](https://issues.apache.org/jira/browse/HDFS-13130) | Log object instance get incorrectly in SlowDiskTracker | Minor | . | Jianfei Jiang | Jianfei Jiang | | [YARN-7906](https://issues.apache.org/jira/browse/YARN-7906) | Fix mvn site fails with error: Multiple sources of package comments found for package "o.a.h.y.client.api.impl" | Blocker | build, documentation | Akira Ajisaka | Akira Ajisaka | -| [YARN-5848](https://issues.apache.org/jira/browse/YARN-5848) | Remove unnecessary public/crossdomain.xml from YARN UIv2 sub project | Blocker | yarn-ui-v2 | Allen Wittenauer | Sunil G | +| [YARN-5848](https://issues.apache.org/jira/browse/YARN-5848) | Remove unnecessary public/crossdomain.xml from YARN UIv2 sub project | Blocker | yarn-ui-v2 | Allen Wittenauer | Sunil Govindan | | [YARN-7697](https://issues.apache.org/jira/browse/YARN-7697) | NM goes down with OOM due to leak in log-aggregation | Blocker | . | Santhosh B Gowda | Xuan Gong | | [YARN-7739](https://issues.apache.org/jira/browse/YARN-7739) | DefaultAMSProcessor should properly check customized resource types against minimum/maximum allocation | Blocker | . | Wangda Tan | Wangda Tan | | [HDFS-10453](https://issues.apache.org/jira/browse/HDFS-10453) | ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster. | Major | namenode | He Xiaoqiao | He Xiaoqiao | @@ -571,7 +580,6 @@ | [HDFS-12156](https://issues.apache.org/jira/browse/HDFS-12156) | TestFSImage fails without -Pnative | Major | test | Akira Ajisaka | Akira Ajisaka | | [HDFS-13271](https://issues.apache.org/jira/browse/HDFS-13271) | WebHDFS: Add constructor in SnapshottableDirectoryStatus with HdfsFileStatus as argument | Major | webhdfs | Lokesh Jain | Lokesh Jain | | [HDFS-13239](https://issues.apache.org/jira/browse/HDFS-13239) | Fix non-empty dir warning message when setting default EC policy | Minor | . | Hanisha Koneru | Bharat Viswanadham | -| [HADOOP-15308](https://issues.apache.org/jira/browse/HADOOP-15308) | TestConfiguration fails on Windows because of paths | Major | . | Íñigo Goiri | Xiao Liang | | [YARN-8022](https://issues.apache.org/jira/browse/YARN-8022) | ResourceManager UI cluster/app/\ page fails to render | Blocker | webapp | Tarun Parimi | Tarun Parimi | | [HDFS-13249](https://issues.apache.org/jira/browse/HDFS-13249) | Document webhdfs support for getting snapshottable directory list | Major | documentation, webhdfs | Lokesh Jain | Lokesh Jain | | [MAPREDUCE-7064](https://issues.apache.org/jira/browse/MAPREDUCE-7064) | Flaky test TestTaskAttempt#testReducerCustomResourceTypes | Major | client, test | Peter Bacsko | Peter Bacsko | @@ -592,14 +600,20 @@ | [HDFS-13195](https://issues.apache.org/jira/browse/HDFS-13195) | DataNode conf page cannot display the current value after reconfig | Minor | datanode | maobaolong | maobaolong | | [HADOOP-14067](https://issues.apache.org/jira/browse/HADOOP-14067) | VersionInfo should load version-info.properties from its own classloader | Major | common | Thejas M Nair | Thejas M Nair | | [YARN-8063](https://issues.apache.org/jira/browse/YARN-8063) | DistributedShellTimelinePlugin wrongly check for entityId instead of entityType | Major | . | Rohith Sharma K S | Rohith Sharma K S | -| [YARN-8062](https://issues.apache.org/jira/browse/YARN-8062) | yarn rmadmin -getGroups returns group from which the user has been removed | Critical | . | Sumana Sathish | Sunil G | -| [YARN-8068](https://issues.apache.org/jira/browse/YARN-8068) | Application Priority field causes NPE in app timeline publish when Hadoop 2.7 based clients to 2.8+ | Blocker | yarn | Sunil G | Sunil G | -| [YARN-7794](https://issues.apache.org/jira/browse/YARN-7794) | SLSRunner is not loading timeline service jars causing failure | Blocker | scheduler-load-simulator | Sunil G | Yufei Gu | +| [YARN-8062](https://issues.apache.org/jira/browse/YARN-8062) | yarn rmadmin -getGroups returns group from which the user has been removed | Critical | . | Sumana Sathish | Sunil Govindan | +| [YARN-8068](https://issues.apache.org/jira/browse/YARN-8068) | Application Priority field causes NPE in app timeline publish when Hadoop 2.7 based clients to 2.8+ | Blocker | yarn | Sunil Govindan | Sunil Govindan | | [YARN-8075](https://issues.apache.org/jira/browse/YARN-8075) | DShell does not fail when we ask more GPUs than available even though AM throws 'InvalidResourceRequestException' | Major | . | Sumana Sathish | Wangda Tan | -| [YARN-6629](https://issues.apache.org/jira/browse/YARN-6629) | NPE occurred when container allocation proposal is applied but its resource requests are removed before | Critical | . | Tao Yang | Tao Yang | | [HADOOP-15320](https://issues.apache.org/jira/browse/HADOOP-15320) | Remove customized getFileBlockLocations for hadoop-azure and hadoop-azure-datalake | Major | fs/adl, fs/azure | shanyu zhao | shanyu zhao | | [YARN-8085](https://issues.apache.org/jira/browse/YARN-8085) | ResourceProfilesManager should be set in RMActiveServiceContext | Blocker | capacityscheduler | Tao Yang | Tao Yang | | [YARN-8086](https://issues.apache.org/jira/browse/YARN-8086) | ManagedParentQueue with no leaf queues cause JS error in new UI | Blocker | . | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-6629](https://issues.apache.org/jira/browse/YARN-6629) | NPE occurred when container allocation proposal is applied but its resource requests are removed before | Critical | . | Tao Yang | Tao Yang | +| [MAPREDUCE-7036](https://issues.apache.org/jira/browse/MAPREDUCE-7036) | ASF License warning in hadoop-mapreduce-client | Minor | test | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-7527](https://issues.apache.org/jira/browse/YARN-7527) | Over-allocate node resource in async-scheduling mode of CapacityScheduler | Major | capacityscheduler | Tao Yang | Tao Yang | +| [YARN-7598](https://issues.apache.org/jira/browse/YARN-7598) | Document how to use classpath isolation for aux-services in YARN | Major | . | Xuan Gong | Xuan Gong | +| [HDFS-13136](https://issues.apache.org/jira/browse/HDFS-13136) | Avoid taking FSN lock while doing group member lookup for FSD permission check | Major | namenode | Xiaoyu Yao | Xiaoyu Yao | +| [HADOOP-15450](https://issues.apache.org/jira/browse/HADOOP-15450) | Avoid fsync storm triggered by DiskChecker and handle disk full situation | Blocker | . | Kihwal Lee | Arpit Agarwal | +| [YARN-8346](https://issues.apache.org/jira/browse/YARN-8346) | Upgrading to 3.1 kills running containers with error "Opportunistic container queue is full" | Blocker | . | Rohith Sharma K S | Jason Lowe | +| [HADOOP-9747](https://issues.apache.org/jira/browse/HADOOP-9747) | Reduce unnecessary UGI synchronization | Critical | security | Daryn Sharp | Daryn Sharp | ### TESTS: @@ -611,6 +625,7 @@ | [HADOOP-15117](https://issues.apache.org/jira/browse/HADOOP-15117) | open(PathHandle) contract test should be exhaustive for default options | Major | . | Chris Douglas | Chris Douglas | | [HDFS-13106](https://issues.apache.org/jira/browse/HDFS-13106) | Need to exercise all HDFS APIs for EC | Major | hdfs | Haibo Yan | Haibo Yan | | [HDFS-13107](https://issues.apache.org/jira/browse/HDFS-13107) | Add Mover Cli Unit Tests for Federated cluster | Major | balancer & mover, test | Bharat Viswanadham | Bharat Viswanadham | +| [HDFS-13550](https://issues.apache.org/jira/browse/HDFS-13550) | TestDebugAdmin#testComputeMetaCommand fails on Windows | Minor | . | Anbang Hu | Anbang Hu | ### SUB-TASKS: @@ -649,30 +664,30 @@ | [YARN-5947](https://issues.apache.org/jira/browse/YARN-5947) | Create LeveldbConfigurationStore class using Leveldb as backing store | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-6322](https://issues.apache.org/jira/browse/YARN-6322) | Disable queue refresh when configuration mutation is enabled | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-6593](https://issues.apache.org/jira/browse/YARN-6593) | [API] Introduce Placement Constraint object | Major | . | Konstantinos Karanasos | Konstantinos Karanasos | -| [YARN-6788](https://issues.apache.org/jira/browse/YARN-6788) | Improve performance of resource profile branch | Blocker | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-6788](https://issues.apache.org/jira/browse/YARN-6788) | Improve performance of resource profile branch | Blocker | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [HDFS-12091](https://issues.apache.org/jira/browse/HDFS-12091) | [READ] Check that the replicas served from a {{ProvidedVolumeImpl}} belong to the correct external storage | Major | . | Virajith Jalaparti | Virajith Jalaparti | | [HDFS-12093](https://issues.apache.org/jira/browse/HDFS-12093) | [READ] Share remoteFS between ProvidedReplica instances. | Major | . | Ewan Higgs | Virajith Jalaparti | -| [YARN-6471](https://issues.apache.org/jira/browse/YARN-6471) | Support to add min/max resource configuration for a queue | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-6471](https://issues.apache.org/jira/browse/YARN-6471) | Support to add min/max resource configuration for a queue | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [YARN-6935](https://issues.apache.org/jira/browse/YARN-6935) | ResourceProfilesManagerImpl.parseResource() has no need of the key parameter | Major | resourcemanager | Daniel Templeton | Manikandan R | | [HDFS-12289](https://issues.apache.org/jira/browse/HDFS-12289) | [READ] HDFS-12091 breaks the tests for provided block reads | Major | . | Virajith Jalaparti | Virajith Jalaparti | | [YARN-6994](https://issues.apache.org/jira/browse/YARN-6994) | Remove last uses of Long from resource types code | Minor | resourcemanager | Daniel Templeton | Daniel Templeton | -| [YARN-6892](https://issues.apache.org/jira/browse/YARN-6892) | Improve API implementation in Resources and DominantResourceCalculator class | Major | nodemanager, resourcemanager | Sunil G | Sunil G | -| [YARN-6908](https://issues.apache.org/jira/browse/YARN-6908) | ResourceProfilesManagerImpl is missing @Overrides on methods | Minor | resourcemanager | Daniel Templeton | Sunil G | +| [YARN-6892](https://issues.apache.org/jira/browse/YARN-6892) | Improve API implementation in Resources and DominantResourceCalculator class | Major | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | +| [YARN-6908](https://issues.apache.org/jira/browse/YARN-6908) | ResourceProfilesManagerImpl is missing @Overrides on methods | Minor | resourcemanager | Daniel Templeton | Sunil Govindan | | [YARN-6610](https://issues.apache.org/jira/browse/YARN-6610) | DominantResourceCalculator#getResourceAsValue dominant param is updated to handle multiple resources | Critical | resourcemanager | Daniel Templeton | Daniel Templeton | | [YARN-7030](https://issues.apache.org/jira/browse/YARN-7030) | Performance optimizations in Resource and ResourceUtils class | Critical | nodemanager, resourcemanager | Wangda Tan | Wangda Tan | | [YARN-7042](https://issues.apache.org/jira/browse/YARN-7042) | Clean up unit tests after YARN-6610 | Major | test | Daniel Templeton | Daniel Templeton | -| [YARN-6789](https://issues.apache.org/jira/browse/YARN-6789) | Add Client API to get all supported resource types from RM | Major | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-6789](https://issues.apache.org/jira/browse/YARN-6789) | Add Client API to get all supported resource types from RM | Major | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-6781](https://issues.apache.org/jira/browse/YARN-6781) | ResourceUtils#initializeResourcesMap takes an unnecessary Map parameter | Minor | resourcemanager | Daniel Templeton | Yu-Tang Lin | | [YARN-7043](https://issues.apache.org/jira/browse/YARN-7043) | Cleanup ResourceProfileManager | Critical | . | Wangda Tan | Wangda Tan | | [YARN-7067](https://issues.apache.org/jira/browse/YARN-7067) | Optimize ResourceType information display in UI | Critical | nodemanager, resourcemanager | Wangda Tan | Wangda Tan | -| [YARN-7039](https://issues.apache.org/jira/browse/YARN-7039) | Fix javac and javadoc errors in YARN-3926 branch | Major | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-7039](https://issues.apache.org/jira/browse/YARN-7039) | Fix javac and javadoc errors in YARN-3926 branch | Major | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-7024](https://issues.apache.org/jira/browse/YARN-7024) | Fix issues on recovery in LevelDB store | Major | . | Jonathan Hung | Jonathan Hung | -| [YARN-7093](https://issues.apache.org/jira/browse/YARN-7093) | Improve log message in ResourceUtils | Trivial | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-7093](https://issues.apache.org/jira/browse/YARN-7093) | Improve log message in ResourceUtils | Trivial | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-7075](https://issues.apache.org/jira/browse/YARN-7075) | Better styling for donut charts in new YARN UI | Major | . | Da Ding | Da Ding | | [HADOOP-14103](https://issues.apache.org/jira/browse/HADOOP-14103) | Sort out hadoop-aws contract-test-options.xml | Minor | fs/s3, test | Steve Loughran | John Zhuge | | [YARN-6933](https://issues.apache.org/jira/browse/YARN-6933) | ResourceUtils.DISALLOWED\_NAMES check is duplicated | Major | resourcemanager | Daniel Templeton | Manikandan R | | [YARN-5328](https://issues.apache.org/jira/browse/YARN-5328) | Plan/ResourceAllocation data structure enhancements required to support recurring reservations in ReservationSystem | Major | resourcemanager | Subru Krishnan | Subru Krishnan | -| [YARN-7056](https://issues.apache.org/jira/browse/YARN-7056) | Document Resource Profiles feature | Major | nodemanager, resourcemanager | Sunil G | Sunil G | +| [YARN-7056](https://issues.apache.org/jira/browse/YARN-7056) | Document Resource Profiles feature | Major | nodemanager, resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-7144](https://issues.apache.org/jira/browse/YARN-7144) | Log Aggregation controller should not swallow the exceptions when it calls closeWriter and closeReader. | Major | . | Xuan Gong | Xuan Gong | | [YARN-7104](https://issues.apache.org/jira/browse/YARN-7104) | Improve Nodes Heatmap in new YARN UI with better color coding | Major | . | Da Ding | Da Ding | | [YARN-6600](https://issues.apache.org/jira/browse/YARN-6600) | Introduce default and max lifetime of application at LeafQueue level | Major | capacity scheduler | Rohith Sharma K S | Rohith Sharma K S | @@ -710,21 +725,21 @@ | [YARN-7296](https://issues.apache.org/jira/browse/YARN-7296) | convertToProtoFormat(Resource r) is not setting for all resource types | Major | . | lovekesh bansal | lovekesh bansal | | [HADOOP-14913](https://issues.apache.org/jira/browse/HADOOP-14913) | Sticky bit implementation for rename() operation in Azure WASB | Major | fs, fs/azure | Varada Hemeswari | Varada Hemeswari | | [YARN-6620](https://issues.apache.org/jira/browse/YARN-6620) | Add support in NodeManager to isolate GPU devices by using CGroups | Major | . | Wangda Tan | Wangda Tan | -| [YARN-7205](https://issues.apache.org/jira/browse/YARN-7205) | Log improvements for the ResourceUtils | Major | nodemanager, resourcemanager | Jian He | Sunil G | -| [YARN-7180](https://issues.apache.org/jira/browse/YARN-7180) | Remove class ResourceType | Major | resourcemanager, scheduler | Yufei Gu | Sunil G | +| [YARN-7205](https://issues.apache.org/jira/browse/YARN-7205) | Log improvements for the ResourceUtils | Major | nodemanager, resourcemanager | Jian He | Sunil Govindan | +| [YARN-7180](https://issues.apache.org/jira/browse/YARN-7180) | Remove class ResourceType | Major | resourcemanager, scheduler | Yufei Gu | Sunil Govindan | | [HADOOP-14935](https://issues.apache.org/jira/browse/HADOOP-14935) | Azure: POSIX permissions are taking effect in access() method even when authorization is enabled | Major | fs/azure | Santhosh G Nayak | Santhosh G Nayak | -| [YARN-7254](https://issues.apache.org/jira/browse/YARN-7254) | UI and metrics changes related to absolute resource configuration | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-7254](https://issues.apache.org/jira/browse/YARN-7254) | UI and metrics changes related to absolute resource configuration | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [YARN-7311](https://issues.apache.org/jira/browse/YARN-7311) | Fix TestRMWebServicesReservation parametrization for fair scheduler | Blocker | fairscheduler, reservation system | Yufei Gu | Yufei Gu | | [HDFS-12605](https://issues.apache.org/jira/browse/HDFS-12605) | [READ] TestNameNodeProvidedImplementation#testProvidedDatanodeFailures fails after rebase | Major | . | Virajith Jalaparti | Virajith Jalaparti | | [YARN-7345](https://issues.apache.org/jira/browse/YARN-7345) | GPU Isolation: Incorrect minor device numbers written to devices.deny file | Major | . | Jonathan Hung | Jonathan Hung | -| [YARN-7338](https://issues.apache.org/jira/browse/YARN-7338) | Support same origin policy for cross site scripting prevention. | Major | yarn-ui-v2 | Vrushali C | Sunil G | +| [YARN-7338](https://issues.apache.org/jira/browse/YARN-7338) | Support same origin policy for cross site scripting prevention. | Major | yarn-ui-v2 | Vrushali C | Sunil Govindan | | [YARN-4090](https://issues.apache.org/jira/browse/YARN-4090) | Make Collections.sort() more efficient by caching resource usage | Major | fairscheduler | Xianyin Xin | Yufei Gu | -| [YARN-6984](https://issues.apache.org/jira/browse/YARN-6984) | DominantResourceCalculator.isAnyMajorResourceZero() should test all resources | Major | scheduler | Daniel Templeton | Sunil G | +| [YARN-6984](https://issues.apache.org/jira/browse/YARN-6984) | DominantResourceCalculator.isAnyMajorResourceZero() should test all resources | Major | scheduler | Daniel Templeton | Sunil Govindan | | [YARN-4827](https://issues.apache.org/jira/browse/YARN-4827) | Document configuration of ReservationSystem for FairScheduler | Blocker | capacity scheduler | Subru Krishnan | Yufei Gu | | [YARN-5516](https://issues.apache.org/jira/browse/YARN-5516) | Add REST API for supporting recurring reservations | Major | resourcemanager | Sangeetha Abdu Jyothi | Sean Po | | [MAPREDUCE-6977](https://issues.apache.org/jira/browse/MAPREDUCE-6977) | Moving logging APIs over to slf4j in hadoop-mapreduce-client-common | Major | client | Jinjiang Ling | Jinjiang Ling | | [YARN-6505](https://issues.apache.org/jira/browse/YARN-6505) | Define the strings used in SLS JSON input file format | Major | scheduler-load-simulator | Yufei Gu | Gergely Novák | -| [YARN-7332](https://issues.apache.org/jira/browse/YARN-7332) | Compute effectiveCapacity per each resource vector | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-7332](https://issues.apache.org/jira/browse/YARN-7332) | Compute effectiveCapacity per each resource vector | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [YARN-7224](https://issues.apache.org/jira/browse/YARN-7224) | Support GPU isolation for docker container | Major | . | Wangda Tan | Wangda Tan | | [YARN-7374](https://issues.apache.org/jira/browse/YARN-7374) | Improve performance of DRF comparisons for resource types in fair scheduler | Critical | fairscheduler | Daniel Templeton | Daniel Templeton | | [YARN-6927](https://issues.apache.org/jira/browse/YARN-6927) | Add support for individual resource types requests in MapReduce | Major | resourcemanager | Daniel Templeton | Gergo Repas | @@ -733,7 +748,7 @@ | [YARN-7289](https://issues.apache.org/jira/browse/YARN-7289) | Application lifetime does not work with FairScheduler | Major | resourcemanager | Miklos Szegedi | Miklos Szegedi | | [YARN-7392](https://issues.apache.org/jira/browse/YARN-7392) | Render cluster information on new YARN web ui | Major | webapp | Vasudevan Skm | Vasudevan Skm | | [HDFS-11902](https://issues.apache.org/jira/browse/HDFS-11902) | [READ] Merge BlockFormatProvider and FileRegionProvider. | Major | . | Virajith Jalaparti | Virajith Jalaparti | -| [YARN-7307](https://issues.apache.org/jira/browse/YARN-7307) | Allow client/AM update supported resource types via YARN APIs | Blocker | nodemanager, resourcemanager | Wangda Tan | Sunil G | +| [YARN-7307](https://issues.apache.org/jira/browse/YARN-7307) | Allow client/AM update supported resource types via YARN APIs | Blocker | nodemanager, resourcemanager | Wangda Tan | Sunil Govindan | | [HDFS-12607](https://issues.apache.org/jira/browse/HDFS-12607) | [READ] Even one dead datanode with PROVIDED storage results in ProvidedStorageInfo being marked as FAILED | Major | . | Virajith Jalaparti | Virajith Jalaparti | | [YARN-7394](https://issues.apache.org/jira/browse/YARN-7394) | Merge code paths for Reservation/Plan queues and Auto Created queues | Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad | | [HDFS-12671](https://issues.apache.org/jira/browse/HDFS-12671) | [READ] Test NameNode restarts when PROVIDED is configured | Major | . | Virajith Jalaparti | Virajith Jalaparti | @@ -747,38 +762,38 @@ | [YARN-7442](https://issues.apache.org/jira/browse/YARN-7442) | [YARN-7069] Limit format of resource type name | Blocker | nodemanager, resourcemanager | Wangda Tan | Wangda Tan | | [YARN-7369](https://issues.apache.org/jira/browse/YARN-7369) | Improve the resource types docs | Major | docs | Daniel Templeton | Daniel Templeton | | [YARN-6595](https://issues.apache.org/jira/browse/YARN-6595) | [API] Add Placement Constraints at the application level | Major | . | Konstantinos Karanasos | Arun Suresh | -| [YARN-7411](https://issues.apache.org/jira/browse/YARN-7411) | Inter-Queue preemption's computeFixpointAllocation need to handle absolute resources while computing normalizedGuarantee | Major | resourcemanager | Sunil G | Sunil G | +| [YARN-7411](https://issues.apache.org/jira/browse/YARN-7411) | Inter-Queue preemption's computeFixpointAllocation need to handle absolute resources while computing normalizedGuarantee | Major | resourcemanager | Sunil Govindan | Sunil Govindan | | [YARN-7488](https://issues.apache.org/jira/browse/YARN-7488) | Make ServiceClient.getAppId method public to return ApplicationId for a service name | Major | . | Gour Saha | Gour Saha | | [HADOOP-14993](https://issues.apache.org/jira/browse/HADOOP-14993) | AliyunOSS: Override listFiles and listLocatedStatus | Major | fs/oss | Genmao Yu | Genmao Yu | | [YARN-6953](https://issues.apache.org/jira/browse/YARN-6953) | Clean up ResourceUtils.setMinimumAllocationForMandatoryResources() and setMaximumAllocationForMandatoryResources() | Minor | resourcemanager | Daniel Templeton | Manikandan R | | [HDFS-12775](https://issues.apache.org/jira/browse/HDFS-12775) | [READ] Fix reporting of Provided volumes | Major | . | Virajith Jalaparti | Virajith Jalaparti | -| [YARN-7482](https://issues.apache.org/jira/browse/YARN-7482) | Max applications calculation per queue has to be retrospected with absolute resource support | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-7482](https://issues.apache.org/jira/browse/YARN-7482) | Max applications calculation per queue has to be retrospected with absolute resource support | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [YARN-7486](https://issues.apache.org/jira/browse/YARN-7486) | Race condition in service AM that can cause NPE | Major | . | Jian He | Jian He | | [YARN-7503](https://issues.apache.org/jira/browse/YARN-7503) | Configurable heap size / JVM opts in service AM | Major | . | Jonathan Hung | Jonathan Hung | | [YARN-7419](https://issues.apache.org/jira/browse/YARN-7419) | CapacityScheduler: Allow auto leaf queue creation after queue mapping | Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad | -| [YARN-7483](https://issues.apache.org/jira/browse/YARN-7483) | CapacityScheduler test cases cleanup post YARN-5881 | Major | test | Sunil G | Sunil G | +| [YARN-7483](https://issues.apache.org/jira/browse/YARN-7483) | CapacityScheduler test cases cleanup post YARN-5881 | Major | test | Sunil Govindan | Sunil Govindan | | [HDFS-12801](https://issues.apache.org/jira/browse/HDFS-12801) | RBF: Set MountTableResolver as default file resolver | Minor | . | Íñigo Goiri | Íñigo Goiri | | [YARN-7430](https://issues.apache.org/jira/browse/YARN-7430) | Enable user re-mapping for Docker containers by default | Blocker | security, yarn | Eric Yang | Eric Yang | | [YARN-7218](https://issues.apache.org/jira/browse/YARN-7218) | ApiServer REST API naming convention /ws/v1 is already used in Hadoop v2 | Major | api, applications | Eric Yang | Eric Yang | | [YARN-7448](https://issues.apache.org/jira/browse/YARN-7448) | [API] Add SchedulingRequest to the AllocateRequest | Major | . | Arun Suresh | Panagiotis Garefalakis | | [YARN-7529](https://issues.apache.org/jira/browse/YARN-7529) | TestYarnNativeServices#testRecoverComponentsAfterRMRestart() fails intermittently | Major | . | Chandni Singh | Chandni Singh | | [YARN-6128](https://issues.apache.org/jira/browse/YARN-6128) | Add support for AMRMProxy HA | Major | amrmproxy, nodemanager | Subru Krishnan | Botong Huang | -| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server | Major | fs, fs/oss | SammiChen | SammiChen | +| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server | Major | fs, fs/oss | Sammi Chen | Sammi Chen | | [HDFS-12778](https://issues.apache.org/jira/browse/HDFS-12778) | [READ] Report multiple locations for PROVIDED blocks | Major | . | Virajith Jalaparti | Virajith Jalaparti | | [YARN-5534](https://issues.apache.org/jira/browse/YARN-5534) | Allow user provided Docker volume mount list | Major | yarn | luhuichun | Shane Kumpf | | [YARN-7330](https://issues.apache.org/jira/browse/YARN-7330) | Add support to show GPU in UI including metrics | Blocker | . | Wangda Tan | Wangda Tan | -| [YARN-7538](https://issues.apache.org/jira/browse/YARN-7538) | Fix performance regression introduced by Capacity Scheduler absolute min/max resource refactoring | Major | capacity scheduler | Sunil G | Sunil G | -| [YARN-7544](https://issues.apache.org/jira/browse/YARN-7544) | Use queue-path.capacity/maximum-capacity to specify CapacityScheduler absolute min/max resources | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-7538](https://issues.apache.org/jira/browse/YARN-7538) | Fix performance regression introduced by Capacity Scheduler absolute min/max resource refactoring | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | +| [YARN-7544](https://issues.apache.org/jira/browse/YARN-7544) | Use queue-path.capacity/maximum-capacity to specify CapacityScheduler absolute min/max resources | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [YARN-6168](https://issues.apache.org/jira/browse/YARN-6168) | Restarted RM may not inform AM about all existing containers | Major | . | Billie Rinaldi | Chandni Singh | | [HDFS-12809](https://issues.apache.org/jira/browse/HDFS-12809) | [READ] Fix the randomized selection of locations in {{ProvidedBlocksBuilder}}. | Major | . | Virajith Jalaparti | Virajith Jalaparti | | [HDFS-12858](https://issues.apache.org/jira/browse/HDFS-12858) | RBF: Add router admin commands usage in HDFS commands reference doc | Minor | documentation | Yiqun Lin | Yiqun Lin | -| [YARN-7564](https://issues.apache.org/jira/browse/YARN-7564) | Cleanup to fix checkstyle issues of YARN-5881 branch | Minor | . | Sunil G | Sunil G | +| [YARN-7564](https://issues.apache.org/jira/browse/YARN-7564) | Cleanup to fix checkstyle issues of YARN-5881 branch | Minor | . | Sunil Govindan | Sunil Govindan | | [YARN-7480](https://issues.apache.org/jira/browse/YARN-7480) | Render tooltips on columns where text is clipped in new YARN UI | Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm | -| [YARN-7575](https://issues.apache.org/jira/browse/YARN-7575) | NPE in scheduler UI when max-capacity is not configured | Major | capacity scheduler | Eric Payne | Sunil G | -| [YARN-7533](https://issues.apache.org/jira/browse/YARN-7533) | Documentation for absolute resource support in Capacity Scheduler | Major | capacity scheduler | Sunil G | Sunil G | +| [YARN-7575](https://issues.apache.org/jira/browse/YARN-7575) | NPE in scheduler UI when max-capacity is not configured | Major | capacity scheduler | Eric Payne | Sunil Govindan | +| [YARN-7533](https://issues.apache.org/jira/browse/YARN-7533) | Documentation for absolute resource support in Capacity Scheduler | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | | [HDFS-12835](https://issues.apache.org/jira/browse/HDFS-12835) | RBF: Fix Javadoc parameter errors | Minor | . | Wei Yan | Wei Yan | | [YARN-7541](https://issues.apache.org/jira/browse/YARN-7541) | Node updates don't update the maximum cluster capability for resources other than CPU and memory | Critical | resourcemanager | Daniel Templeton | Daniel Templeton | -| [YARN-7573](https://issues.apache.org/jira/browse/YARN-7573) | Gpu Information page could be empty for nodes without GPU | Major | webapp, yarn-ui-v2 | Sunil G | Sunil G | +| [YARN-7573](https://issues.apache.org/jira/browse/YARN-7573) | Gpu Information page could be empty for nodes without GPU | Major | webapp, yarn-ui-v2 | Sunil Govindan | Sunil Govindan | | [HDFS-12685](https://issues.apache.org/jira/browse/HDFS-12685) | [READ] FsVolumeImpl exception when scanning Provided storage volume | Major | . | Ewan Higgs | Virajith Jalaparti | | [HDFS-12665](https://issues.apache.org/jira/browse/HDFS-12665) | [AliasMap] Create a version of the AliasMap that runs in memory in the Namenode (leveldb) | Major | . | Ewan Higgs | Ewan Higgs | | [YARN-7487](https://issues.apache.org/jira/browse/YARN-7487) | Ensure volume to include GPU base libraries after created by plugin | Major | . | Wangda Tan | Wangda Tan | @@ -841,7 +856,7 @@ | [YARN-7682](https://issues.apache.org/jira/browse/YARN-7682) | Expose canSatisfyConstraints utility function to validate a placement against a constraint | Major | . | Arun Suresh | Panagiotis Garefalakis | | [HDFS-12988](https://issues.apache.org/jira/browse/HDFS-12988) | RBF: Mount table entries not properly updated in the local cache | Major | . | Íñigo Goiri | Íñigo Goiri | | [YARN-7557](https://issues.apache.org/jira/browse/YARN-7557) | It should be possible to specify resource types in the fair scheduler increment value | Critical | fairscheduler | Daniel Templeton | Gergo Repas | -| [YARN-7666](https://issues.apache.org/jira/browse/YARN-7666) | Introduce scheduler specific environment variable support in ApplicationSubmissionContext for better scheduling placement configurations | Major | . | Sunil G | Sunil G | +| [YARN-7666](https://issues.apache.org/jira/browse/YARN-7666) | Introduce scheduler specific environment variable support in ApplicationSubmissionContext for better scheduling placement configurations | Major | . | Sunil Govindan | Sunil Govindan | | [YARN-7242](https://issues.apache.org/jira/browse/YARN-7242) | Support to specify values of different resource types in DistributedShell for easier testing | Critical | nodemanager, resourcemanager | Wangda Tan | Gergely Novák | | [YARN-7704](https://issues.apache.org/jira/browse/YARN-7704) | Document improvement for registry dns | Major | . | Jian He | Jian He | | [HADOOP-15161](https://issues.apache.org/jira/browse/HADOOP-15161) | s3a: Stream and common statistics missing from metrics | Major | . | Sean Mackrory | Sean Mackrory | @@ -866,10 +881,11 @@ | [YARN-7745](https://issues.apache.org/jira/browse/YARN-7745) | Allow DistributedShell to take a placement specification for containers it wants to launch | Major | . | Arun Suresh | Arun Suresh | | [HDFS-12973](https://issues.apache.org/jira/browse/HDFS-12973) | RBF: Document global quota supporting in federation | Major | . | Yiqun Lin | Yiqun Lin | | [HDFS-13028](https://issues.apache.org/jira/browse/HDFS-13028) | RBF: Fix spurious TestRouterRpc#testProxyGetStats | Minor | . | Íñigo Goiri | Íñigo Goiri | -| [YARN-5094](https://issues.apache.org/jira/browse/YARN-5094) | some YARN container events have timestamp of -1 | Critical | timelineserver | Sangjin Lee | Haibo Chen | +| [YARN-5094](https://issues.apache.org/jira/browse/YARN-5094) | some YARN container events have timestamp of -1 | Critical | . | Sangjin Lee | Haibo Chen | | [MAPREDUCE-6995](https://issues.apache.org/jira/browse/MAPREDUCE-6995) | Uploader tool for Distributed Cache Deploy documentation | Major | . | Miklos Szegedi | Miklos Szegedi | | [YARN-7774](https://issues.apache.org/jira/browse/YARN-7774) | Miscellaneous fixes to the PlacementProcessor | Blocker | . | Arun Suresh | Arun Suresh | | [YARN-7763](https://issues.apache.org/jira/browse/YARN-7763) | Allow Constraints specified in the SchedulingRequest to override application level constraints | Blocker | . | Wangda Tan | Weiwei Yang | +| [YARN-7729](https://issues.apache.org/jira/browse/YARN-7729) | Add support for setting the PID namespace mode | Major | nodemanager | Shane Kumpf | Billie Rinaldi | | [YARN-7788](https://issues.apache.org/jira/browse/YARN-7788) | Factor out management of temp tags from AllocationTagsManager | Major | . | Arun Suresh | Arun Suresh | | [YARN-7779](https://issues.apache.org/jira/browse/YARN-7779) | Display allocation tags in RM web UI and expose same through REST API | Major | RM | Weiwei Yang | Weiwei Yang | | [YARN-7782](https://issues.apache.org/jira/browse/YARN-7782) | Enable user re-mapping for Docker containers in yarn-default.xml | Blocker | security, yarn | Eric Yang | Eric Yang | @@ -878,15 +894,14 @@ | [HDFS-12772](https://issues.apache.org/jira/browse/HDFS-12772) | RBF: Federation Router State State Store internal API | Major | . | Íñigo Goiri | Íñigo Goiri | | [YARN-7783](https://issues.apache.org/jira/browse/YARN-7783) | Add validation step to ensure constraints are not violated due to order in which a request is processed | Blocker | . | Arun Suresh | Arun Suresh | | [YARN-7807](https://issues.apache.org/jira/browse/YARN-7807) | Assume intra-app anti-affinity as default for scheduling request inside AppPlacementAllocator | Blocker | . | Wangda Tan | Wangda Tan | -| [YARN-7795](https://issues.apache.org/jira/browse/YARN-7795) | Fix jenkins issues of YARN-6592 branch | Blocker | . | Sunil G | Sunil G | -| [YARN-7810](https://issues.apache.org/jira/browse/YARN-7810) | TestDockerContainerRuntime test failures due to UID lookup of a non-existent user | Major | . | Shane Kumpf | Shane Kumpf | +| [YARN-7795](https://issues.apache.org/jira/browse/YARN-7795) | Fix jenkins issues of YARN-6592 branch | Blocker | . | Sunil Govindan | Sunil Govindan | | [HDFS-13042](https://issues.apache.org/jira/browse/HDFS-13042) | RBF: Heartbeat Router State | Major | . | Íñigo Goiri | Íñigo Goiri | | [YARN-7798](https://issues.apache.org/jira/browse/YARN-7798) | Refactor SLS Reservation Creation | Minor | . | Young Chen | Young Chen | | [HDFS-13049](https://issues.apache.org/jira/browse/HDFS-13049) | RBF: Inconsistent Router OPTS config in branch-2 and branch-3 | Minor | . | Wei Yan | Wei Yan | | [YARN-7814](https://issues.apache.org/jira/browse/YARN-7814) | Remove automatic mounting of the cgroups root directory into Docker containers | Major | . | Shane Kumpf | Shane Kumpf | | [YARN-7784](https://issues.apache.org/jira/browse/YARN-7784) | Fix Cluster metrics when placement processor is enabled | Major | metrics, RM | Weiwei Yang | Arun Suresh | | [YARN-6597](https://issues.apache.org/jira/browse/YARN-6597) | Add RMContainer recovery test to verify tag population in the AllocationTagsManager | Major | . | Konstantinos Karanasos | Panagiotis Garefalakis | -| [YARN-7817](https://issues.apache.org/jira/browse/YARN-7817) | Add Resource reference to RM's NodeInfo object so REST API can get non memory/vcore resource usages. | Major | . | Sumana Sathish | Sunil G | +| [YARN-7817](https://issues.apache.org/jira/browse/YARN-7817) | Add Resource reference to RM's NodeInfo object so REST API can get non memory/vcore resource usages. | Major | . | Sumana Sathish | Sunil Govindan | | [YARN-7797](https://issues.apache.org/jira/browse/YARN-7797) | Docker host network can not obtain IP address for RegistryDNS | Major | nodemanager | Eric Yang | Eric Yang | | [HDFS-12574](https://issues.apache.org/jira/browse/HDFS-12574) | Add CryptoInputStream to WebHdfsFileSystem read call. | Major | encryption, kms, webhdfs | Rushabh S Shah | Rushabh S Shah | | [YARN-5148](https://issues.apache.org/jira/browse/YARN-5148) | [UI2] Add page to new YARN UI to view server side configurations/logs/JVM-metrics | Major | webapp, yarn-ui-v2 | Wangda Tan | Kai Sasaki | @@ -902,7 +917,7 @@ | [YARN-7839](https://issues.apache.org/jira/browse/YARN-7839) | Modify PlacementAlgorithm to Check node capacity before placing request on node | Major | . | Arun Suresh | Panagiotis Garefalakis | | [YARN-7868](https://issues.apache.org/jira/browse/YARN-7868) | Provide improved error message when YARN service is disabled | Major | yarn-native-services | Eric Yang | Eric Yang | | [YARN-7778](https://issues.apache.org/jira/browse/YARN-7778) | Merging of placement constraints defined at different levels | Major | . | Konstantinos Karanasos | Weiwei Yang | -| [YARN-7860](https://issues.apache.org/jira/browse/YARN-7860) | Fix UT failure TestRMWebServiceAppsNodelabel#testAppsRunning | Major | . | Weiwei Yang | Sunil G | +| [YARN-7860](https://issues.apache.org/jira/browse/YARN-7860) | Fix UT failure TestRMWebServiceAppsNodelabel#testAppsRunning | Major | . | Weiwei Yang | Sunil Govindan | | [YARN-7516](https://issues.apache.org/jira/browse/YARN-7516) | Security check for trusted docker image | Major | . | Eric Yang | Eric Yang | | [YARN-7815](https://issues.apache.org/jira/browse/YARN-7815) | Make the YARN mounts added to Docker containers more restrictive | Major | . | Shane Kumpf | Shane Kumpf | | [HADOOP-15214](https://issues.apache.org/jira/browse/HADOOP-15214) | Make Hadoop compatible with Guava 21.0 | Minor | . | Igor Dvorzhak | Igor Dvorzhak | @@ -937,7 +952,7 @@ | [YARN-7446](https://issues.apache.org/jira/browse/YARN-7446) | Docker container privileged mode and --user flag contradict each other | Major | . | Eric Yang | Eric Yang | | [YARN-7954](https://issues.apache.org/jira/browse/YARN-7954) | Component status stays "Ready" when yarn service is stopped | Major | . | Yesha Vora | Gour Saha | | [YARN-7955](https://issues.apache.org/jira/browse/YARN-7955) | Calling stop on an already stopped service says "Successfully stopped service" | Major | . | Gour Saha | Gour Saha | -| [YARN-7637](https://issues.apache.org/jira/browse/YARN-7637) | GPU volume creation command fails when work preserving is disabled at NM | Critical | nodemanager | Sunil G | Zian Chen | +| [YARN-7637](https://issues.apache.org/jira/browse/YARN-7637) | GPU volume creation command fails when work preserving is disabled at NM | Critical | nodemanager | Sunil Govindan | Zian Chen | | [HADOOP-15274](https://issues.apache.org/jira/browse/HADOOP-15274) | Move hadoop-openstack to slf4j | Minor | fs/swift | Steve Loughran | fang zhenyi | | [HADOOP-14652](https://issues.apache.org/jira/browse/HADOOP-14652) | Update metrics-core version to 3.2.4 | Major | . | Ray Chiang | Ray Chiang | | [HDFS-1686](https://issues.apache.org/jira/browse/HDFS-1686) | Federation: Add more Balancer tests with federation setting | Minor | balancer & mover, test | Tsz Wo Nicholas Sze | Bharat Viswanadham | @@ -959,7 +974,7 @@ | [HADOOP-15277](https://issues.apache.org/jira/browse/HADOOP-15277) | remove .FluentPropertyBeanIntrospector from CLI operation log output | Minor | conf | Steve Loughran | Steve Loughran | | [HADOOP-15293](https://issues.apache.org/jira/browse/HADOOP-15293) | TestLogLevel fails on Java 9 | Major | test | Akira Ajisaka | Takanobu Asanuma | | [HDFS-13212](https://issues.apache.org/jira/browse/HDFS-13212) | RBF: Fix router location cache issue | Major | federation, hdfs | Weiwei Wu | Weiwei Wu | -| [HDFS-13232](https://issues.apache.org/jira/browse/HDFS-13232) | RBF: ConnectionPool should return first usable connection | Minor | . | Wei Yan | Ekanth S | +| [HDFS-13232](https://issues.apache.org/jira/browse/HDFS-13232) | RBF: ConnectionPool should return first usable connection | Minor | . | Wei Yan | Ekanth Sethuramalingam | | [HDFS-13240](https://issues.apache.org/jira/browse/HDFS-13240) | RBF: Update some inaccurate document descriptions | Minor | . | Yiqun Lin | Yiqun Lin | | [YARN-7523](https://issues.apache.org/jira/browse/YARN-7523) | Introduce description and version field in Service record | Critical | . | Gour Saha | Chandni Singh | | [HADOOP-15297](https://issues.apache.org/jira/browse/HADOOP-15297) | Make S3A etag =\> checksum feature optional | Blocker | fs/s3 | Steve Loughran | Steve Loughran | @@ -972,19 +987,19 @@ | [HDFS-12587](https://issues.apache.org/jira/browse/HDFS-12587) | Use Parameterized tests in TestBlockInfoStriped and TestLowRedundancyBlockQueues to test all EC policies | Major | erasure-coding, test | Takanobu Asanuma | Takanobu Asanuma | | [YARN-5015](https://issues.apache.org/jira/browse/YARN-5015) | Support sliding window retry capability for container restart | Major | nodemanager | Varun Vasudev | Chandni Singh | | [YARN-7657](https://issues.apache.org/jira/browse/YARN-7657) | Queue Mapping could provide options to provide 'user' specific auto-created queues under a specified group parent queue | Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad | +| [HADOOP-15308](https://issues.apache.org/jira/browse/HADOOP-15308) | TestConfiguration fails on Windows because of paths | Major | test | Íñigo Goiri | Xiao Liang | | [HDFS-12773](https://issues.apache.org/jira/browse/HDFS-12773) | RBF: Improve State Store FS implementation | Major | . | Íñigo Goiri | Íñigo Goiri | | [HADOOP-15294](https://issues.apache.org/jira/browse/HADOOP-15294) | TestUGILoginFromKeytab fails on Java9 | Major | security | Takanobu Asanuma | Takanobu Asanuma | | [YARN-7999](https://issues.apache.org/jira/browse/YARN-7999) | Docker launch fails when user private filecache directory is missing | Major | . | Eric Yang | Jason Lowe | | [HDFS-13198](https://issues.apache.org/jira/browse/HDFS-13198) | RBF: RouterHeartbeatService throws out CachedStateStore related exceptions when starting router | Minor | . | Wei Yan | Wei Yan | | [HADOOP-15278](https://issues.apache.org/jira/browse/HADOOP-15278) | log s3a at info | Major | fs/s3 | Steve Loughran | Steve Loughran | | [HDFS-13224](https://issues.apache.org/jira/browse/HDFS-13224) | RBF: Resolvers to support mount points across multiple subclusters | Major | . | Íñigo Goiri | Íñigo Goiri | -| [YARN-8027](https://issues.apache.org/jira/browse/YARN-8027) | Setting hostname of docker container breaks for --net=host in docker 1.13 | Major | yarn | Jim Brennan | Jim Brennan | | [HDFS-13215](https://issues.apache.org/jira/browse/HDFS-13215) | RBF: Move Router to its own module | Major | . | Íñigo Goiri | Wei Yan | | [YARN-8053](https://issues.apache.org/jira/browse/YARN-8053) | Add hadoop-distcp in exclusion in hbase-server dependencies for timelineservice-hbase packages. | Major | . | Rohith Sharma K S | Rohith Sharma K S | | [HDFS-13250](https://issues.apache.org/jira/browse/HDFS-13250) | RBF: Router to manage requests across multiple subclusters | Major | . | Íñigo Goiri | Íñigo Goiri | | [HDFS-11190](https://issues.apache.org/jira/browse/HDFS-11190) | [READ] Namenode support for data stored in external stores. | Major | . | Virajith Jalaparti | Virajith Jalaparti | | [HDFS-10675](https://issues.apache.org/jira/browse/HDFS-10675) | [READ] Datanode support to read from external stores. | Major | . | Virajith Jalaparti | Virajith Jalaparti | -| [HDFS-13318](https://issues.apache.org/jira/browse/HDFS-13318) | RBF: Fix FindBugs in hadoop-hdfs-rbf | Minor | . | Íñigo Goiri | Ekanth S | +| [HDFS-13318](https://issues.apache.org/jira/browse/HDFS-13318) | RBF: Fix FindBugs in hadoop-hdfs-rbf | Minor | . | Íñigo Goiri | Ekanth Sethuramalingam | | [HDFS-12792](https://issues.apache.org/jira/browse/HDFS-12792) | RBF: Test Router-based federation using HDFSContract | Major | . | Íñigo Goiri | Íñigo Goiri | | [YARN-7581](https://issues.apache.org/jira/browse/YARN-7581) | HBase filters are not constructed correctly in ATSv2 | Major | ATSv2 | Haibo Chen | Haibo Chen | | [YARN-7986](https://issues.apache.org/jira/browse/YARN-7986) | ATSv2 REST API queries do not return results for uppercase application tags | Critical | . | Charan Hebri | Charan Hebri | @@ -996,6 +1011,11 @@ | [YARN-8010](https://issues.apache.org/jira/browse/YARN-8010) | Add config in FederationRMFailoverProxy to not bypass facade cache when failing over | Minor | . | Botong Huang | Botong Huang | | [HDFS-13347](https://issues.apache.org/jira/browse/HDFS-13347) | RBF: Cache datanode reports | Minor | . | Íñigo Goiri | Íñigo Goiri | | [YARN-8069](https://issues.apache.org/jira/browse/YARN-8069) | Clean up example hostnames | Major | . | Billie Rinaldi | Billie Rinaldi | +| [HDFS-13364](https://issues.apache.org/jira/browse/HDFS-13364) | RBF: Support NamenodeProtocol in the Router | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-14651](https://issues.apache.org/jira/browse/HADOOP-14651) | Update okhttp version to 2.7.5 | Major | fs/adl | Ray Chiang | Ray Chiang | +| [YARN-8027](https://issues.apache.org/jira/browse/YARN-8027) | Setting hostname of docker container breaks for --net=host in docker 1.13 | Major | yarn | Jim Brennan | Jim Brennan | +| [YARN-7810](https://issues.apache.org/jira/browse/YARN-7810) | TestDockerContainerRuntime test failures due to UID lookup of a non-existent user | Major | . | Shane Kumpf | Shane Kumpf | +| [HADOOP-15497](https://issues.apache.org/jira/browse/HADOOP-15497) | TestTrash should use proper test path to avoid failing on Windows | Minor | . | Anbang Hu | Anbang Hu | ### OTHER: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/RELEASENOTES.3.1.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/RELEASENOTES.3.1.0.md index 9e3c65d7682..5eb9aa2c528 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/RELEASENOTES.3.1.0.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/RELEASENOTES.3.1.0.md @@ -196,4 +196,11 @@ HBase integration module was mixed up with for hbase-server and hbase-client dep The HADOOP\_CONF\_DIR environment variable is no longer unconditionally inherited by containers even if it does not appear in the nodemanager whitelist variables specified by the yarn.nodemanager.env-whitelist property. If the whitelist property has been modified from the default to not include HADOOP\_CONF\_DIR yet containers need it to be inherited from the nodemanager's environment then the whitelist settings need to be updated to include HADOOP\_CONF\_DIR. +--- + +* [HDFS-13553](https://issues.apache.org/jira/browse/HDFS-13553) | *Major* | **RBF: Support global quota** + +Federation supports and controls global quota at mount table level. + +In a federated environment, a folder can be spread across multiple subclusters. Router aggregates quota that queried from these subclusters and uses that for the quota-verification. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.1/CHANGELOG.3.1.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.1/CHANGELOG.3.1.1.md new file mode 100644 index 00000000000..7e3deddb798 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.1/CHANGELOG.3.1.1.md @@ -0,0 +1,502 @@ + + +# Apache Hadoop Changelog + +## Release 3.1.1 - 2018-08-07 + + + +### IMPORTANT ISSUES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-14667](https://issues.apache.org/jira/browse/HADOOP-14667) | Flexible Visual Studio support | Major | build | Allen Wittenauer | Allen Wittenauer | + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13056](https://issues.apache.org/jira/browse/HDFS-13056) | Expose file-level composite CRCs in HDFS which are comparable across different instances/layouts | Major | datanode, distcp, erasure-coding, federation, hdfs | Dennis Huo | Dennis Huo | +| [HDFS-13283](https://issues.apache.org/jira/browse/HDFS-13283) | Percentage based Reserved Space Calculation for DataNode | Major | datanode, hdfs | Lukas Majercak | Lukas Majercak | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-8028](https://issues.apache.org/jira/browse/YARN-8028) | Support authorizeUserAccessToQueue in RMWebServices | Major | . | Wangda Tan | Wangda Tan | +| [HADOOP-15332](https://issues.apache.org/jira/browse/HADOOP-15332) | Fix typos in hadoop-aws markdown docs | Minor | . | Gabor Bota | Gabor Bota | +| [HADOOP-15330](https://issues.apache.org/jira/browse/HADOOP-15330) | Remove jdk1.7 profile from hadoop-annotations module | Minor | . | Akira Ajisaka | fang zhenyi | +| [HADOOP-15342](https://issues.apache.org/jira/browse/HADOOP-15342) | Update ADLS connector to use the current SDK version (2.2.7) | Major | fs/adl | Atul Sikaria | Atul Sikaria | +| [YARN-1151](https://issues.apache.org/jira/browse/YARN-1151) | Ability to configure auxiliary services from HDFS-based JAR files | Major | nodemanager | john lilley | Xuan Gong | +| [HDFS-13418](https://issues.apache.org/jira/browse/HDFS-13418) | NetworkTopology should be configurable when enable DFSNetworkTopology | Major | . | Tao Jie | Tao Jie | +| [HDFS-13439](https://issues.apache.org/jira/browse/HDFS-13439) | Add test case for read block operation when it is moved | Major | . | Ajay Kumar | Ajay Kumar | +| [HDFS-13462](https://issues.apache.org/jira/browse/HDFS-13462) | Add BIND\_HOST configuration for JournalNode's HTTP and RPC Servers | Major | hdfs, journal-node | Lukas Majercak | Lukas Majercak | +| [YARN-8140](https://issues.apache.org/jira/browse/YARN-8140) | Improve log message when launch cmd is ran for stopped yarn service | Major | yarn-native-services | Yesha Vora | Eric Yang | +| [MAPREDUCE-7086](https://issues.apache.org/jira/browse/MAPREDUCE-7086) | Add config to allow FileInputFormat to ignore directories when recursive=false | Major | . | Sergey Shelukhin | Sergey Shelukhin | +| [HDFS-12981](https://issues.apache.org/jira/browse/HDFS-12981) | renameSnapshot a Non-Existent snapshot to itself should throw error | Minor | hdfs | Sailesh Patel | Kitti Nanasi | +| [YARN-8239](https://issues.apache.org/jira/browse/YARN-8239) | [UI2] Clicking on Node Manager UI under AM container info / App Attempt page goes to old RM UI | Major | yarn-ui-v2 | Sumana Sathish | Sunil Govindan | +| [YARN-8260](https://issues.apache.org/jira/browse/YARN-8260) | [UI2] Per-application tracking URL is no longer available in YARN UI2 | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-8201](https://issues.apache.org/jira/browse/YARN-8201) | Skip stacktrace of few exception from ClientRMService | Minor | . | Bibin A Chundatt | Bilwa S T | +| [HADOOP-15441](https://issues.apache.org/jira/browse/HADOOP-15441) | Log kms url and token service at debug level. | Minor | . | Wei-Chiu Chuang | Gabor Bota | +| [HDFS-13544](https://issues.apache.org/jira/browse/HDFS-13544) | Improve logging for JournalNode in federated cluster | Major | federation, hdfs | Hanisha Koneru | Hanisha Koneru | +| [YARN-8249](https://issues.apache.org/jira/browse/YARN-8249) | Few REST api's in RMWebServices are missing static user check | Critical | webapp, yarn | Sunil Govindan | Sunil Govindan | +| [HDFS-13512](https://issues.apache.org/jira/browse/HDFS-13512) | WebHdfs getFileStatus doesn't return ecPolicy | Major | . | Ajay Kumar | Ajay Kumar | +| [HADOOP-15250](https://issues.apache.org/jira/browse/HADOOP-15250) | Split-DNS MultiHomed Server Network Cluster Network IPC Client Bind Addr Wrong | Critical | ipc, net | Greg Senia | Ajay Kumar | +| [HDFS-13589](https://issues.apache.org/jira/browse/HDFS-13589) | Add dfsAdmin command to query if "upgrade" is finalized | Major | hdfs | Hanisha Koneru | Hanisha Koneru | +| [HADOOP-15486](https://issues.apache.org/jira/browse/HADOOP-15486) | Make NetworkTopology#netLock fair | Major | net | Nanda kumar | Nanda kumar | +| [YARN-8213](https://issues.apache.org/jira/browse/YARN-8213) | Add Capacity Scheduler performance metrics | Critical | capacityscheduler, metrics | Weiwei Yang | Weiwei Yang | +| [HDFS-13628](https://issues.apache.org/jira/browse/HDFS-13628) | Update Archival Storage doc for Provided Storage | Major | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-15449](https://issues.apache.org/jira/browse/HADOOP-15449) | Increase default timeout of ZK session to avoid frequent NameNode failover | Critical | common | Karthik Palanisamy | Karthik Palanisamy | +| [YARN-8333](https://issues.apache.org/jira/browse/YARN-8333) | Load balance YARN services using RegistryDNS multiple A records | Major | yarn-native-services | Eric Yang | Eric Yang | +| [HDFS-13602](https://issues.apache.org/jira/browse/HDFS-13602) | Add checkOperation(WRITE) checks in FSNamesystem | Major | ha, namenode | Erik Krogen | Chao Sun | +| [HDFS-13155](https://issues.apache.org/jira/browse/HDFS-13155) | BlockPlacementPolicyDefault.chooseTargetInOrder Not Checking Return Value for NULL | Minor | namenode | BELUGA BEHR | Zsolt Venczel | +| [YARN-8389](https://issues.apache.org/jira/browse/YARN-8389) | Improve the description of machine-list property in Federation docs | Major | documentation, federation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13511](https://issues.apache.org/jira/browse/HDFS-13511) | Provide specialized exception when block length cannot be obtained | Major | . | Ted Yu | Gabor Bota | +| [HDFS-13659](https://issues.apache.org/jira/browse/HDFS-13659) | Add more test coverage for contentSummary for snapshottable path | Major | namenode, test | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-8400](https://issues.apache.org/jira/browse/YARN-8400) | Fix typos in YARN Federation documentation page | Trivial | . | Bibin A Chundatt | Giovanni Matteo Fumarola | +| [HADOOP-15499](https://issues.apache.org/jira/browse/HADOOP-15499) | Performance severe drop when running RawErasureCoderBenchmark with NativeRSRawErasureCoder | Major | . | Sammi Chen | Sammi Chen | +| [HDFS-13653](https://issues.apache.org/jira/browse/HDFS-13653) | Make dfs.client.failover.random.order a per nameservice configuration | Major | federation | Ekanth Sethuramalingam | Ekanth Sethuramalingam | +| [YARN-8394](https://issues.apache.org/jira/browse/YARN-8394) | Improve data locality documentation for Capacity Scheduler | Major | . | Weiwei Yang | Weiwei Yang | +| [HDFS-13641](https://issues.apache.org/jira/browse/HDFS-13641) | Add metrics for edit log tailing | Major | metrics | Chao Sun | Chao Sun | +| [HDFS-13686](https://issues.apache.org/jira/browse/HDFS-13686) | Add overall metrics for FSNamesystemLock | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | +| [HDFS-13692](https://issues.apache.org/jira/browse/HDFS-13692) | StorageInfoDefragmenter floods log when compacting StorageInfo TreeSet | Minor | . | Yiqun Lin | Bharat Viswanadham | +| [YARN-8214](https://issues.apache.org/jira/browse/YARN-8214) | Change default RegistryDNS port | Major | . | Billie Rinaldi | Billie Rinaldi | +| [HDFS-13703](https://issues.apache.org/jira/browse/HDFS-13703) | Avoid allocation of CorruptedBlocks hashmap when no corrupted blocks are hit | Major | performance | Todd Lipcon | Todd Lipcon | +| [HADOOP-15554](https://issues.apache.org/jira/browse/HADOOP-15554) | Improve JIT performance for Configuration parsing | Minor | conf, performance | Todd Lipcon | Todd Lipcon | +| [HDFS-13714](https://issues.apache.org/jira/browse/HDFS-13714) | Fix TestNameNodePrunesMissingStorages test failures on Windows | Major | hdfs, namenode, test | Lukas Majercak | Lukas Majercak | +| [HDFS-13712](https://issues.apache.org/jira/browse/HDFS-13712) | BlockReaderRemote.read() logging improvement | Minor | hdfs-client | Gergo Repas | Gergo Repas | +| [YARN-8302](https://issues.apache.org/jira/browse/YARN-8302) | ATS v2 should handle HBase connection issue properly | Major | ATSv2 | Yesha Vora | Billie Rinaldi | +| [HDFS-13674](https://issues.apache.org/jira/browse/HDFS-13674) | Improve documentation on Metrics | Minor | documentation, metrics | Chao Sun | Chao Sun | +| [HDFS-13719](https://issues.apache.org/jira/browse/HDFS-13719) | Docs around dfs.image.transfer.timeout are misleading | Major | . | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15598](https://issues.apache.org/jira/browse/HADOOP-15598) | DataChecksum calculate checksum is contented on hashtable synchronization | Major | common | Prasanth Jayachandran | Prasanth Jayachandran | +| [YARN-8501](https://issues.apache.org/jira/browse/YARN-8501) | Reduce complexity of RMWebServices' getApps method | Major | restapi | Szilard Nemeth | Szilard Nemeth | +| [YARN-8155](https://issues.apache.org/jira/browse/YARN-8155) | Improve ATSv2 client logging in RM and NM publisher | Major | . | Rohith Sharma K S | Abhishek Modi | +| [HADOOP-15476](https://issues.apache.org/jira/browse/HADOOP-15476) | fix logging for split-dns multihome | Major | . | Ajay Kumar | Ajay Kumar | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-8040](https://issues.apache.org/jira/browse/YARN-8040) | [UI2] New YARN UI webapp does not respect current pathname for REST api | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [HADOOP-15062](https://issues.apache.org/jira/browse/HADOOP-15062) | TestCryptoStreamsWithOpensslAesCtrCryptoCodec fails on Debian 9 | Major | . | Miklos Szegedi | Miklos Szegedi | +| [HDFS-11043](https://issues.apache.org/jira/browse/HDFS-11043) | TestWebHdfsTimeouts fails | Major | webhdfs | Andrew Wang | Chao Sun | +| [HADOOP-15331](https://issues.apache.org/jira/browse/HADOOP-15331) | Fix a race condition causing parsing error of java.io.BufferedInputStream in class org.apache.hadoop.conf.Configuration | Major | common | Miklos Szegedi | Miklos Szegedi | +| [HDFS-11900](https://issues.apache.org/jira/browse/HDFS-11900) | Hedged reads thread pool creation not synchronized | Major | hdfs-client | John Zhuge | John Zhuge | +| [YARN-8032](https://issues.apache.org/jira/browse/YARN-8032) | Yarn service should expose failuresValidityInterval to users and use it for launching containers | Major | . | Chandni Singh | Chandni Singh | +| [YARN-8043](https://issues.apache.org/jira/browse/YARN-8043) | Add the exception message for failed launches running under LCE | Major | . | Shane Kumpf | Shane Kumpf | +| [YARN-7734](https://issues.apache.org/jira/browse/YARN-7734) | YARN-5418 breaks TestContainerLogsPage.testContainerLogPageAccess | Major | . | Miklos Szegedi | Tao Yang | +| [HDFS-13087](https://issues.apache.org/jira/browse/HDFS-13087) | Snapshotted encryption zone information should be immutable | Major | encryption | LiXin Ge | LiXin Ge | +| [HADOOP-12862](https://issues.apache.org/jira/browse/HADOOP-12862) | LDAP Group Mapping over SSL can not specify trust store | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-15317](https://issues.apache.org/jira/browse/HADOOP-15317) | Improve NetworkTopology chooseRandom's loop | Major | . | Xiao Chen | Xiao Chen | +| [HADOOP-15355](https://issues.apache.org/jira/browse/HADOOP-15355) | TestCommonConfigurationFields is broken by HADOOP-15312 | Major | test | Konstantin Shvachko | LiXin Ge | +| [YARN-7764](https://issues.apache.org/jira/browse/YARN-7764) | Findbugs warning: Resource#getResources may expose internal representation | Major | api | Weiwei Yang | Weiwei Yang | +| [YARN-8106](https://issues.apache.org/jira/browse/YARN-8106) | Update LogAggregationIndexedFileController to use readFull instead read to avoid IOException while loading log meta | Critical | log-aggregation | Prabhu Joseph | Prabhu Joseph | +| [YARN-8115](https://issues.apache.org/jira/browse/YARN-8115) | [UI2] URL data like nodeHTTPAddress must be encoded in UI before using to access NM | Major | yarn-ui-v2 | Sunil Govindan | Sreenath Somarajapuram | +| [HDFS-13350](https://issues.apache.org/jira/browse/HDFS-13350) | Negative legacy block ID will confuse Erasure Coding to be considered as striped block | Major | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | +| [YARN-8119](https://issues.apache.org/jira/browse/YARN-8119) | [UI2] Timeline Server address' url scheme should be removed while accessing via KNOX | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-8083](https://issues.apache.org/jira/browse/YARN-8083) | [UI2] All YARN related configurations are paged together in conf page | Major | yarn-ui-v2 | Zoltan Haindrich | Gergely Novák | +| [HADOOP-15366](https://issues.apache.org/jira/browse/HADOOP-15366) | Add a helper shutdown routine in HadoopExecutor to ensure clean shutdown | Minor | . | Shashikant Banerjee | Shashikant Banerjee | +| [YARN-7905](https://issues.apache.org/jira/browse/YARN-7905) | Parent directory permission incorrect during public localization | Critical | . | Bibin A Chundatt | Bilwa S T | +| [HADOOP-15374](https://issues.apache.org/jira/browse/HADOOP-15374) | Add links of the new features of 3.1.0 to the top page | Major | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-7804](https://issues.apache.org/jira/browse/YARN-7804) | Refresh action on Grid view page should not be redirected to graph view | Major | yarn-ui-v2 | Yesha Vora | Gergely Novák | +| [HDFS-13420](https://issues.apache.org/jira/browse/HDFS-13420) | License header is displayed in ArchivalStorage/MemoryStorage html pages | Minor | documentation | Akira Ajisaka | Akira Ajisaka | +| [HDFS-13328](https://issues.apache.org/jira/browse/HDFS-13328) | Abstract ReencryptionHandler recursive logic in separate class. | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HADOOP-15357](https://issues.apache.org/jira/browse/HADOOP-15357) | Configuration.getPropsWithPrefix no longer does variable substitution | Major | . | Jim Brennan | Jim Brennan | +| [YARN-7984](https://issues.apache.org/jira/browse/YARN-7984) | Delete registry entries from ZK on ServiceClient stop and clean up stop/destroy behavior | Critical | yarn-native-services | Billie Rinaldi | Billie Rinaldi | +| [YARN-8133](https://issues.apache.org/jira/browse/YARN-8133) | Doc link broken for yarn-service from overview page. | Blocker | yarn-native-services | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8116](https://issues.apache.org/jira/browse/YARN-8116) | Nodemanager fails with NumberFormatException: For input string: "" | Critical | . | Yesha Vora | Chandni Singh | +| [MAPREDUCE-7062](https://issues.apache.org/jira/browse/MAPREDUCE-7062) | Update mapreduce.job.tags description for making use for ATSv2 purpose. | Major | . | Charan Hebri | Charan Hebri | +| [YARN-8073](https://issues.apache.org/jira/browse/YARN-8073) | TimelineClientImpl doesn't honor yarn.timeline-service.versions configuration | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8127](https://issues.apache.org/jira/browse/YARN-8127) | Resource leak when async scheduling is enabled | Critical | . | Weiwei Yang | Tao Yang | +| [HDFS-13427](https://issues.apache.org/jira/browse/HDFS-13427) | Fix the section titles of transparent encryption document | Minor | documentation | Akira Ajisaka | Akira Ajisaka | +| [HDFS-7101](https://issues.apache.org/jira/browse/HDFS-7101) | Potential null dereference in DFSck#doWork() | Minor | . | Ted Yu | skrho | +| [HDFS-13426](https://issues.apache.org/jira/browse/HDFS-13426) | Fix javadoc in FsDatasetAsyncDiskService#removeVolume | Minor | hdfs | Shashikant Banerjee | Shashikant Banerjee | +| [YARN-8120](https://issues.apache.org/jira/browse/YARN-8120) | JVM can crash with SIGSEGV when exiting due to custom leveldb logger | Major | nodemanager, resourcemanager | Jason Lowe | Jason Lowe | +| [YARN-8147](https://issues.apache.org/jira/browse/YARN-8147) | TestClientRMService#testGetApplications sporadically fails | Major | test | Jason Lowe | Jason Lowe | +| [HDFS-13436](https://issues.apache.org/jira/browse/HDFS-13436) | Fix javadoc of package-info.java | Major | documentation | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-15379](https://issues.apache.org/jira/browse/HADOOP-15379) | Make IrqHandler.bind() public | Minor | util | Steve Loughran | Ajay Kumar | +| [YARN-8154](https://issues.apache.org/jira/browse/YARN-8154) | Fix missing titles in PlacementConstraints document | Minor | documentation | Akira Ajisaka | Weiwei Yang | +| [YARN-8153](https://issues.apache.org/jira/browse/YARN-8153) | Guaranteed containers always stay in SCHEDULED on NM after restart | Major | . | Yang Wang | Yang Wang | +| [HADOOP-14970](https://issues.apache.org/jira/browse/HADOOP-14970) | MiniHadoopClusterManager doesn't respect lack of format option | Minor | . | Erik Krogen | Erik Krogen | +| [HDFS-13438](https://issues.apache.org/jira/browse/HDFS-13438) | Fix javadoc in FsVolumeList#removeVolume | Minor | . | Shashikant Banerjee | Shashikant Banerjee | +| [YARN-8142](https://issues.apache.org/jira/browse/YARN-8142) | yarn service application stops when AM is killed with SIGTERM | Major | yarn-native-services | Yesha Vora | Billie Rinaldi | +| [MAPREDUCE-7077](https://issues.apache.org/jira/browse/MAPREDUCE-7077) | Pipe mapreduce job fails with Permission denied for jobTokenPassword | Critical | . | Yesha Vora | Akira Ajisaka | +| [HDFS-13330](https://issues.apache.org/jira/browse/HDFS-13330) | ShortCircuitCache#fetchOrCreate never retries | Major | . | Wei-Chiu Chuang | Gabor Bota | +| [YARN-8156](https://issues.apache.org/jira/browse/YARN-8156) | Increase the default value of yarn.timeline-service.app-collector.linger-period.ms | Major | . | Rohith Sharma K S | Charan Hebri | +| [YARN-8165](https://issues.apache.org/jira/browse/YARN-8165) | Incorrect queue name logging in AbstractContainerAllocator | Trivial | capacityscheduler | Weiwei Yang | Weiwei Yang | +| [HDFS-12828](https://issues.apache.org/jira/browse/HDFS-12828) | OIV ReverseXML Processor fails with escaped characters | Critical | hdfs | Erik Krogen | Erik Krogen | +| [HADOOP-15391](https://issues.apache.org/jira/browse/HADOOP-15391) | Add missing css file in hadoop-aws, hadoop-aliyun, hadoop-azure and hadoop-azure-datalake modules | Major | documentation | Yiqun Lin | Yiqun Lin | +| [YARN-8171](https://issues.apache.org/jira/browse/YARN-8171) | [UI2] AM Node link from attempt page should not redirect to new tab | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-8145](https://issues.apache.org/jira/browse/YARN-8145) | yarn rmadmin -getGroups doesn't return updated groups for user | Major | . | Sumana Sathish | Sunil Govindan | +| [HDFS-13463](https://issues.apache.org/jira/browse/HDFS-13463) | Fix javadoc in FsDatasetImpl#checkAndUpdate | Minor | datanode | Shashikant Banerjee | Shashikant Banerjee | +| [HDFS-13464](https://issues.apache.org/jira/browse/HDFS-13464) | Fix javadoc in FsVolumeList#handleVolumeFailures | Minor | documentation | Shashikant Banerjee | Shashikant Banerjee | +| [HADOOP-15396](https://issues.apache.org/jira/browse/HADOOP-15396) | Some java source files are executable | Minor | . | Akira Ajisaka | Shashikant Banerjee | +| [YARN-6827](https://issues.apache.org/jira/browse/YARN-6827) | [ATS1/1.5] NPE exception while publishing recovering applications into ATS during RM restart. | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8182](https://issues.apache.org/jira/browse/YARN-8182) | [UI2] Proxy- Clicking on nodes under Nodes HeatMap gives 401 error | Critical | . | Sumana Sathish | Sunil Govindan | +| [YARN-8189](https://issues.apache.org/jira/browse/YARN-8189) | [UI2] Nodes page column headers are half truncated | Major | . | Sunil Govindan | Sunil Govindan | +| [YARN-7830](https://issues.apache.org/jira/browse/YARN-7830) | [UI2] Post selecting grid view in Attempt page, attempt info page should also be opened with grid view | Major | yarn-ui-v2 | Yesha Vora | Gergely Novák | +| [YARN-7786](https://issues.apache.org/jira/browse/YARN-7786) | NullPointerException while launching ApplicationMaster | Major | . | lujie | lujie | +| [HDFS-10183](https://issues.apache.org/jira/browse/HDFS-10183) | Prevent race condition during class initialization | Minor | fs | Pavel Avgustinov | Pavel Avgustinov | +| [HDFS-13388](https://issues.apache.org/jira/browse/HDFS-13388) | RequestHedgingProxyProvider calls multiple configured NNs all the time | Major | hdfs-client | Jinglun | Jinglun | +| [YARN-7956](https://issues.apache.org/jira/browse/YARN-7956) | [UI2] Avoid duplicating Components link under Services/\/Components | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [HDFS-13433](https://issues.apache.org/jira/browse/HDFS-13433) | webhdfs requests can be routed incorrectly in federated cluster | Critical | webhdfs | Arpit Agarwal | Arpit Agarwal | +| [HDFS-13408](https://issues.apache.org/jira/browse/HDFS-13408) | MiniDFSCluster to support being built on randomized base directory | Major | test | Xiao Liang | Xiao Liang | +| [HDFS-13356](https://issues.apache.org/jira/browse/HDFS-13356) | Balancer:Set default value of minBlockSize to 10mb | Major | balancer & mover | Bharat Viswanadham | Bharat Viswanadham | +| [HADOOP-15390](https://issues.apache.org/jira/browse/HADOOP-15390) | Yarn RM logs flooded by DelegationTokenRenewer trying to renew KMS tokens | Critical | . | Xiao Chen | Xiao Chen | +| [HDFS-13336](https://issues.apache.org/jira/browse/HDFS-13336) | Test cases of TestWriteToReplica failed in windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-8196](https://issues.apache.org/jira/browse/YARN-8196) | yarn.webapp.api-service.enable should be highlighted in the quickstart | Trivial | documentation | Davide Vergari | Billie Rinaldi | +| [YARN-8183](https://issues.apache.org/jira/browse/YARN-8183) | Fix ConcurrentModificationException inside RMAppAttemptMetrics#convertAtomicLongMaptoLongMap | Critical | yarn | Sumana Sathish | Suma Shivaprasad | +| [YARN-8188](https://issues.apache.org/jira/browse/YARN-8188) | RM Nodes UI data table index for sorting column need to be corrected post Application tags display | Major | resourcemanager, webapp | Weiwei Yang | Weiwei Yang | +| [HADOOP-15411](https://issues.apache.org/jira/browse/HADOOP-15411) | AuthenticationFilter should use Configuration.getPropsWithPrefix instead of iterator | Critical | . | Suma Shivaprasad | Suma Shivaprasad | +| [MAPREDUCE-7042](https://issues.apache.org/jira/browse/MAPREDUCE-7042) | Killed MR job data does not move to mapreduce.jobhistory.done-dir when ATS v2 is enabled | Major | . | Yesha Vora | Xuan Gong | +| [YARN-8205](https://issues.apache.org/jira/browse/YARN-8205) | Application State is not updated to ATS if AM launching is delayed. | Critical | . | Sumana Sathish | Rohith Sharma K S | +| [YARN-8004](https://issues.apache.org/jira/browse/YARN-8004) | Add unit tests for inter queue preemption for dominant resource calculator | Critical | yarn | Sumana Sathish | Zian Chen | +| [YARN-8208](https://issues.apache.org/jira/browse/YARN-8208) | Add log statement for Docker client configuration file at INFO level | Minor | yarn-native-services | Yesha Vora | Yesha Vora | +| [YARN-8211](https://issues.apache.org/jira/browse/YARN-8211) | Yarn registry dns log finds BufferUnderflowException on port ping | Major | yarn-native-services | Yesha Vora | Eric Yang | +| [YARN-8221](https://issues.apache.org/jira/browse/YARN-8221) | RMWebServices also need to honor yarn.resourcemanager.display.per-user-apps | Major | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-8210](https://issues.apache.org/jira/browse/YARN-8210) | AMRMClient logging on every heartbeat to track updation of AM RM token causes too many log lines to be generated in AM logs | Major | yarn | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-8005](https://issues.apache.org/jira/browse/YARN-8005) | Add unit tests for queue priority with dominant resource calculator | Critical | . | Sumana Sathish | Zian Chen | +| [YARN-8225](https://issues.apache.org/jira/browse/YARN-8225) | YARN precommit build failing in TestPlacementConstraintTransformations | Critical | . | Billie Rinaldi | Shane Kumpf | +| [HDFS-13509](https://issues.apache.org/jira/browse/HDFS-13509) | Bug fix for breakHardlinks() of ReplicaInfo/LocalReplica, and fix TestFileAppend failures on Windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-8187](https://issues.apache.org/jira/browse/YARN-8187) | [UI2] Individual Node page does not contain breadcrumb trail | Critical | yarn-ui-v2 | Sumana Sathish | Zian Chen | +| [YARN-7799](https://issues.apache.org/jira/browse/YARN-7799) | YARN Service dependency follow up work | Critical | client, resourcemanager | Gour Saha | Billie Rinaldi | +| [MAPREDUCE-7073](https://issues.apache.org/jira/browse/MAPREDUCE-7073) | Optimize TokenCache#obtainTokensForNamenodesInternal | Major | . | Bibin A Chundatt | Bibin A Chundatt | +| [HADOOP-15406](https://issues.apache.org/jira/browse/HADOOP-15406) | hadoop-nfs dependencies for mockito and junit are not test scope | Major | nfs | Jason Lowe | Jason Lowe | +| [YARN-6385](https://issues.apache.org/jira/browse/YARN-6385) | Fix checkstyle warnings in TestFileSystemApplicationHistoryStore | Minor | . | Yiqun Lin | Yiqun Lin | +| [YARN-8222](https://issues.apache.org/jira/browse/YARN-8222) | Fix potential NPE when gets RMApp from RM context | Critical | . | Tao Yang | Tao Yang | +| [YARN-8209](https://issues.apache.org/jira/browse/YARN-8209) | NPE in DeletionService | Critical | . | Chandni Singh | Eric Badger | +| [HDFS-13481](https://issues.apache.org/jira/browse/HDFS-13481) | TestRollingFileSystemSinkWithHdfs#testFlushThread: test failed intermittently | Major | hdfs | Gabor Bota | Gabor Bota | +| [YARN-8217](https://issues.apache.org/jira/browse/YARN-8217) | RmAuthenticationFilterInitializer /TimelineAuthenticationFilterInitializer should use Configuration.getPropsWithPrefix instead of iterator | Major | . | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-7818](https://issues.apache.org/jira/browse/YARN-7818) | Remove privileged operation warnings during container launch for the ContainerRuntimes | Major | . | Yesha Vora | Shane Kumpf | +| [YARN-8223](https://issues.apache.org/jira/browse/YARN-8223) | ClassNotFoundException when auxiliary service is loaded from HDFS | Blocker | . | Charan Hebri | Zian Chen | +| [YARN-8079](https://issues.apache.org/jira/browse/YARN-8079) | Support static and archive unmodified local resources in service AM | Critical | . | Wangda Tan | Suma Shivaprasad | +| [YARN-8025](https://issues.apache.org/jira/browse/YARN-8025) | UsersManangers#getComputedResourceLimitForActiveUsers throws NPE due to preComputedActiveUserLimit is empty | Major | yarn | Jiandan Yang | Tao Yang | +| [YARN-8251](https://issues.apache.org/jira/browse/YARN-8251) | [UI2] Clicking on Application link at the header goes to Diagnostics Tab instead of AppAttempt Tab | Major | yarn-ui-v2 | Sumana Sathish | Yesha Vora | +| [YARN-8232](https://issues.apache.org/jira/browse/YARN-8232) | RMContainer lost queue name when RM HA happens | Major | resourcemanager | Hu Ziqian | Hu Ziqian | +| [YARN-7894](https://issues.apache.org/jira/browse/YARN-7894) | Improve ATS response for DS\_CONTAINER when container launch fails | Major | timelineserver | Charan Hebri | Chandni Singh | +| [YARN-8264](https://issues.apache.org/jira/browse/YARN-8264) | [UI2 GPU] GPU Info tab disappears if we click any sub link under List of Applications or List of Containers | Major | . | Sumana Sathish | Sunil Govindan | +| [HDFS-13537](https://issues.apache.org/jira/browse/HDFS-13537) | TestHdfsHelper does not generate jceks path properly for relative path in Windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-8202](https://issues.apache.org/jira/browse/YARN-8202) | DefaultAMSProcessor should properly check units of requested custom resource types against minimum/maximum allocation | Blocker | . | Szilard Nemeth | Szilard Nemeth | +| [HADOOP-15446](https://issues.apache.org/jira/browse/HADOOP-15446) | WASB: PageBlobInputStream.skip breaks HBASE replication | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | +| [YARN-7003](https://issues.apache.org/jira/browse/YARN-7003) | DRAINING state of queues is not recovered after RM restart | Major | capacityscheduler | Tao Yang | Tao Yang | +| [YARN-8244](https://issues.apache.org/jira/browse/YARN-8244) | TestContainerSchedulerQueuing.testStartMultipleContainers failed | Major | . | Miklos Szegedi | Jim Brennan | +| [YARN-8265](https://issues.apache.org/jira/browse/YARN-8265) | Service AM should retrieve new IP for docker container relaunched by NM | Critical | yarn-native-services | Eric Yang | Billie Rinaldi | +| [YARN-8271](https://issues.apache.org/jira/browse/YARN-8271) | [UI2] Improve labeling of certain tables | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [YARN-8288](https://issues.apache.org/jira/browse/YARN-8288) | Fix wrong number of table columns in Resource Model doc | Major | . | Weiwei Yang | Weiwei Yang | +| [HDFS-13539](https://issues.apache.org/jira/browse/HDFS-13539) | DFSStripedInputStream NPE when reportCheckSumFailure | Major | . | Xiao Chen | Xiao Chen | +| [YARN-8266](https://issues.apache.org/jira/browse/YARN-8266) | [UI2] Clicking on application from cluster view should redirect to application attempt page | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [YARN-8166](https://issues.apache.org/jira/browse/YARN-8166) | [UI2] Service page header links are broken | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [YARN-8236](https://issues.apache.org/jira/browse/YARN-8236) | Invalid kerberos principal file name cause NPE in native service | Critical | yarn-native-services | Sunil Govindan | Gour Saha | +| [YARN-8278](https://issues.apache.org/jira/browse/YARN-8278) | DistributedScheduling is not working in HA | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [HADOOP-15442](https://issues.apache.org/jira/browse/HADOOP-15442) | ITestS3AMetrics.testMetricsRegister can't know metrics source's name | Major | fs/s3, metrics | Sean Mackrory | Sean Mackrory | +| [YARN-8300](https://issues.apache.org/jira/browse/YARN-8300) | Fix NPE in DefaultUpgradeComponentsFinder | Major | yarn | Suma Shivaprasad | Suma Shivaprasad | +| [HDFS-13581](https://issues.apache.org/jira/browse/HDFS-13581) | DN UI logs link is broken when https is enabled | Minor | datanode | Namit Maheshwari | Shashikant Banerjee | +| [YARN-8128](https://issues.apache.org/jira/browse/YARN-8128) | Document better the per-node per-app file limit in YARN log aggregation | Major | . | Xuan Gong | Xuan Gong | +| [YARN-8293](https://issues.apache.org/jira/browse/YARN-8293) | In YARN Services UI, "User Name for service" should be completely removed in secure clusters | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-8141](https://issues.apache.org/jira/browse/YARN-8141) | YARN Native Service: Respect YARN\_CONTAINER\_RUNTIME\_DOCKER\_LOCAL\_RESOURCE\_MOUNTS specified in service spec | Critical | yarn-native-services | Wangda Tan | Chandni Singh | +| [YARN-8296](https://issues.apache.org/jira/browse/YARN-8296) | Update YarnServiceApi documentation and yarn service UI code to remove references to unique\_component\_support | Major | yarn-native-services, yarn-ui-v2 | Suma Shivaprasad | Suma Shivaprasad | +| [HDFS-13586](https://issues.apache.org/jira/browse/HDFS-13586) | Fsync fails on directories on Windows | Critical | datanode, hdfs | Lukas Majercak | Lukas Majercak | +| [HADOOP-15478](https://issues.apache.org/jira/browse/HADOOP-15478) | WASB: hflush() and hsync() regression | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | +| [YARN-8179](https://issues.apache.org/jira/browse/YARN-8179) | Preemption does not happen due to natural\_termination\_factor when DRF is used | Major | . | kyungwan nam | kyungwan nam | +| [HADOOP-15450](https://issues.apache.org/jira/browse/HADOOP-15450) | Avoid fsync storm triggered by DiskChecker and handle disk full situation | Blocker | . | Kihwal Lee | Arpit Agarwal | +| [YARN-8290](https://issues.apache.org/jira/browse/YARN-8290) | SystemMetricsPublisher.appACLsUpdated should be invoked after application information is published to ATS to avoid "User is not set in the application report" Exception | Critical | . | Yesha Vora | Eric Yang | +| [YARN-8332](https://issues.apache.org/jira/browse/YARN-8332) | Incorrect min/max allocation property name in resource types doc | Critical | documentation | Weiwei Yang | Weiwei Yang | +| [HDFS-13601](https://issues.apache.org/jira/browse/HDFS-13601) | Optimize ByteString conversions in PBHelper | Major | . | Andrew Wang | Andrew Wang | +| [HDFS-13540](https://issues.apache.org/jira/browse/HDFS-13540) | DFSStripedInputStream should only allocate new buffers when reading | Major | . | Xiao Chen | Xiao Chen | +| [YARN-8297](https://issues.apache.org/jira/browse/YARN-8297) | Incorrect ATS Url used for Wire encrypted cluster | Blocker | yarn-ui-v2 | Yesha Vora | Sunil Govindan | +| [HDFS-13588](https://issues.apache.org/jira/browse/HDFS-13588) | Fix TestFsDatasetImpl test failures on Windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-8310](https://issues.apache.org/jira/browse/YARN-8310) | Handle old NMTokenIdentifier, AMRMTokenIdentifier, and ContainerTokenIdentifier formats | Major | . | Robert Kanter | Robert Kanter | +| [YARN-8344](https://issues.apache.org/jira/browse/YARN-8344) | Missing nm.stop() in TestNodeManagerResync to fix testKillContainersOnResync | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8327](https://issues.apache.org/jira/browse/YARN-8327) | Fix TestAggregatedLogFormat#testReadAcontainerLogs1 on Windows | Major | log-aggregation | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HDFS-13611](https://issues.apache.org/jira/browse/HDFS-13611) | Unsafe use of Text as a ConcurrentHashMap key in PBHelperClient | Major | . | Andrew Wang | Andrew Wang | +| [YARN-8316](https://issues.apache.org/jira/browse/YARN-8316) | Diagnostic message should improve when yarn service fails to launch due to ATS unavailability | Major | yarn-native-services | Yesha Vora | Billie Rinaldi | +| [YARN-8357](https://issues.apache.org/jira/browse/YARN-8357) | Yarn Service: NPE when service is saved first and then started. | Critical | . | Chandni Singh | Chandni Singh | +| [HDFS-13618](https://issues.apache.org/jira/browse/HDFS-13618) | Fix TestDataNodeFaultInjector test failures on Windows | Major | test | Xiao Liang | Xiao Liang | +| [HADOOP-15473](https://issues.apache.org/jira/browse/HADOOP-15473) | Configure serialFilter in KeyProvider to avoid UnrecoverableKeyException caused by JDK-8189997 | Critical | kms | Gabor Bota | Gabor Bota | +| [YARN-8292](https://issues.apache.org/jira/browse/YARN-8292) | Fix the dominant resource preemption cannot happen when some of the resource vector becomes negative | Critical | yarn | Sumana Sathish | Wangda Tan | +| [YARN-8338](https://issues.apache.org/jira/browse/YARN-8338) | TimelineService V1.5 doesn't come up after HADOOP-15406 | Critical | . | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | +| [YARN-8339](https://issues.apache.org/jira/browse/YARN-8339) | Service AM should localize static/archive resource types to container working directory instead of 'resources' | Critical | yarn-native-services | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-8369](https://issues.apache.org/jira/browse/YARN-8369) | Javadoc build failed due to "bad use of '\>'" | Critical | build, docs | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8362](https://issues.apache.org/jira/browse/YARN-8362) | Number of remaining retries are updated twice after a container failure in NM | Critical | . | Chandni Singh | Chandni Singh | +| [YARN-8377](https://issues.apache.org/jira/browse/YARN-8377) | Javadoc build failed in hadoop-yarn-server-nodemanager | Critical | build, docs | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8368](https://issues.apache.org/jira/browse/YARN-8368) | yarn app start cli should print applicationId | Critical | . | Yesha Vora | Rohith Sharma K S | +| [YARN-8350](https://issues.apache.org/jira/browse/YARN-8350) | NPE in service AM related to placement policy | Critical | yarn-native-services | Billie Rinaldi | Gour Saha | +| [YARN-8367](https://issues.apache.org/jira/browse/YARN-8367) | Fix NPE in SingleConstraintAppPlacementAllocator when placement constraint in SchedulingRequest is null | Major | scheduler | Gour Saha | Weiwei Yang | +| [YARN-8197](https://issues.apache.org/jira/browse/YARN-8197) | Tracking URL in the app state does not get redirected to MR ApplicationMaster for Running applications | Critical | yarn | Sumana Sathish | Sunil Govindan | +| [YARN-8308](https://issues.apache.org/jira/browse/YARN-8308) | Yarn service app fails due to issues with Renew Token | Major | yarn-native-services | Yesha Vora | Gour Saha | +| [HDFS-13636](https://issues.apache.org/jira/browse/HDFS-13636) | Cross-Site Scripting vulnerability in HttpServer2 | Major | . | Haibo Yan | Haibo Yan | +| [YARN-7962](https://issues.apache.org/jira/browse/YARN-7962) | Race Condition When Stopping DelegationTokenRenewer causes RM crash during failover | Critical | resourcemanager | BELUGA BEHR | BELUGA BEHR | +| [YARN-8372](https://issues.apache.org/jira/browse/YARN-8372) | Distributed shell app master should not release containers when shutdown if keep-container is true | Critical | distributed-shell | Charan Hebri | Suma Shivaprasad | +| [YARN-8319](https://issues.apache.org/jira/browse/YARN-8319) | More YARN pages need to honor yarn.resourcemanager.display.per-user-apps | Major | webapp | Vinod Kumar Vavilapalli | Sunil Govindan | +| [MAPREDUCE-7097](https://issues.apache.org/jira/browse/MAPREDUCE-7097) | MapReduce JHS should honor yarn.webapp.filter-entity-list-by-user | Major | . | Vinod Kumar Vavilapalli | Sunil Govindan | +| [YARN-8276](https://issues.apache.org/jira/browse/YARN-8276) | [UI2] After version field became mandatory, form-based submission of new YARN service doesn't work | Critical | yarn-ui-v2 | Gergely Novák | Gergely Novák | +| [HDFS-13339](https://issues.apache.org/jira/browse/HDFS-13339) | Volume reference can't be released and may lead to deadlock when DataXceiver does a check volume | Critical | datanode | liaoyuxiangqin | Zsolt Venczel | +| [YARN-8382](https://issues.apache.org/jira/browse/YARN-8382) | cgroup file leak in NM | Major | nodemanager | Hu Ziqian | Hu Ziqian | +| [YARN-8365](https://issues.apache.org/jira/browse/YARN-8365) | Revisit the record type used by Registry DNS for upstream resolution | Major | yarn-native-services | Shane Kumpf | Shane Kumpf | +| [HDFS-13545](https://issues.apache.org/jira/browse/HDFS-13545) | "guarded" is misspelled as "gaurded" in FSPermissionChecker.java | Trivial | documentation | Jianchao Jia | Jianchao Jia | +| [YARN-8396](https://issues.apache.org/jira/browse/YARN-8396) | Click on an individual container continuously spins and doesn't load the page | Blocker | . | Charan Hebri | Sunil Govindan | +| [MAPREDUCE-7103](https://issues.apache.org/jira/browse/MAPREDUCE-7103) | Fix TestHistoryViewerPrinter on windows due to a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15217](https://issues.apache.org/jira/browse/HADOOP-15217) | FsUrlConnection does not handle paths with spaces | Major | fs | Joseph Fourny | Zsolt Venczel | +| [HDFS-12950](https://issues.apache.org/jira/browse/HDFS-12950) | [oiv] ls will fail in secure cluster | Major | . | Brahma Reddy Battula | Wei-Chiu Chuang | +| [YARN-8386](https://issues.apache.org/jira/browse/YARN-8386) | App log can not be viewed from Logs tab in secure cluster | Critical | yarn-ui-v2 | Yesha Vora | Sunil Govindan | +| [YARN-8359](https://issues.apache.org/jira/browse/YARN-8359) | Exclude containermanager.linux test classes on Windows | Major | . | Giovanni Matteo Fumarola | Jason Lowe | +| [HDFS-13664](https://issues.apache.org/jira/browse/HDFS-13664) | Refactor ConfiguredFailoverProxyProvider to make inheritance easier | Minor | hdfs-client | Chao Sun | Chao Sun | +| [HDFS-12670](https://issues.apache.org/jira/browse/HDFS-12670) | can't renew HDFS tokens with only the hdfs client jar | Critical | . | Thomas Graves | Arpit Agarwal | +| [HDFS-13667](https://issues.apache.org/jira/browse/HDFS-13667) | Typo: Marking all "datandoes" as stale | Trivial | namenode | Wei-Chiu Chuang | Nanda kumar | +| [YARN-8413](https://issues.apache.org/jira/browse/YARN-8413) | Flow activity page is failing with "Timeline server failed with an error" | Major | yarn-ui-v2 | Yesha Vora | Sunil Govindan | +| [YARN-8405](https://issues.apache.org/jira/browse/YARN-8405) | RM zk-state-store.parent-path ACLs has been changed since HADOOP-14773 | Major | . | Rohith Sharma K S | Íñigo Goiri | +| [YARN-8419](https://issues.apache.org/jira/browse/YARN-8419) | [UI2] User cannot submit a new service as submit button is always disabled | Major | . | Suma Shivaprasad | Suma Shivaprasad | +| [MAPREDUCE-7108](https://issues.apache.org/jira/browse/MAPREDUCE-7108) | TestFileOutputCommitter fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [MAPREDUCE-7101](https://issues.apache.org/jira/browse/MAPREDUCE-7101) | Add config parameter to allow JHS to alway scan user dir irrespective of modTime | Critical | . | Wangda Tan | Thomas Marquardt | +| [HADOOP-15527](https://issues.apache.org/jira/browse/HADOOP-15527) | loop until TIMEOUT before sending kill -9 | Major | . | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | +| [YARN-8404](https://issues.apache.org/jira/browse/YARN-8404) | Timeline event publish need to be async to avoid Dispatcher thread leak in case ATS is down | Blocker | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8410](https://issues.apache.org/jira/browse/YARN-8410) | Registry DNS lookup fails to return for CNAMEs | Major | yarn-native-services | Shane Kumpf | Shane Kumpf | +| [HDFS-13675](https://issues.apache.org/jira/browse/HDFS-13675) | Speed up TestDFSAdminWithHA | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | +| [HDFS-13673](https://issues.apache.org/jira/browse/HDFS-13673) | TestNameNodeMetrics fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HDFS-13676](https://issues.apache.org/jira/browse/HDFS-13676) | TestEditLogRace fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HDFS-13174](https://issues.apache.org/jira/browse/HDFS-13174) | hdfs mover -p /path times out after 20 min | Major | balancer & mover | Istvan Fajth | Istvan Fajth | +| [HADOOP-15523](https://issues.apache.org/jira/browse/HADOOP-15523) | Shell command timeout given is in seconds whereas it is taken as millisec while scheduling | Major | . | Bilwa S T | Bilwa S T | +| [HDFS-13682](https://issues.apache.org/jira/browse/HDFS-13682) | Cannot create encryption zone after KMS auth token expires | Critical | encryption, kms, namenode | Xiao Chen | Xiao Chen | +| [YARN-8445](https://issues.apache.org/jira/browse/YARN-8445) | YARN native service doesn't allow service name equals to component name | Major | . | Chandni Singh | Chandni Singh | +| [YARN-8444](https://issues.apache.org/jira/browse/YARN-8444) | NodeResourceMonitor crashes on bad swapFree value | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8326](https://issues.apache.org/jira/browse/YARN-8326) | Yarn 3.0 seems runs slower than Yarn 2.6 | Major | yarn | Hsin-Liang Huang | Shane Kumpf | +| [YARN-8443](https://issues.apache.org/jira/browse/YARN-8443) | Total #VCores in cluster metrics is wrong when CapacityScheduler reserved some containers | Major | webapp | Tao Yang | Tao Yang | +| [YARN-8457](https://issues.apache.org/jira/browse/YARN-8457) | Compilation is broken with -Pyarn-ui | Major | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-8464](https://issues.apache.org/jira/browse/YARN-8464) | Async scheduling thread could be interrupted when there are no NodeManagers in cluster | Blocker | capacity scheduler | Charan Hebri | Sunil Govindan | +| [YARN-8423](https://issues.apache.org/jira/browse/YARN-8423) | GPU does not get released even though the application gets killed. | Critical | yarn | Sumana Sathish | Sunil Govindan | +| [YARN-8401](https://issues.apache.org/jira/browse/YARN-8401) | [UI2] new ui is not accessible with out internet connection | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [HDFS-13705](https://issues.apache.org/jira/browse/HDFS-13705) | The native ISA-L library loading failure should be made warning rather than an error message | Minor | erasure-coding | Nilotpal Nandi | Shashikant Banerjee | +| [YARN-8409](https://issues.apache.org/jira/browse/YARN-8409) | ActiveStandbyElectorBasedElectorService is failing with NPE | Major | . | Yesha Vora | Chandni Singh | +| [YARN-8379](https://issues.apache.org/jira/browse/YARN-8379) | Improve balancing resources in already satisfied queues by using Capacity Scheduler preemption | Major | . | Wangda Tan | Zian Chen | +| [YARN-8455](https://issues.apache.org/jira/browse/YARN-8455) | Add basic ACL check for all ATS v2 REST APIs | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8469](https://issues.apache.org/jira/browse/YARN-8469) | [UI2] URL needs to be trimmed to handle index.html redirection while accessing via knox | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-8451](https://issues.apache.org/jira/browse/YARN-8451) | Multiple NM heartbeat thread created when a slow NM resync with RM | Major | nodemanager | Botong Huang | Botong Huang | +| [HADOOP-15548](https://issues.apache.org/jira/browse/HADOOP-15548) | Randomize local dirs | Minor | . | Jim Brennan | Jim Brennan | +| [HADOOP-15574](https://issues.apache.org/jira/browse/HADOOP-15574) | Suppress build error if there are no docs after excluding private annotations | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13702](https://issues.apache.org/jira/browse/HDFS-13702) | Remove HTrace hooks from DFSClient to reduce CPU usage | Major | performance | Todd Lipcon | Todd Lipcon | +| [HDFS-13635](https://issues.apache.org/jira/browse/HDFS-13635) | Incorrect message when block is not found | Major | datanode | Wei-Chiu Chuang | Gabor Bota | +| [YARN-8415](https://issues.apache.org/jira/browse/YARN-8415) | TimelineWebServices.getEntity should throw ForbiddenException instead of 404 when ACL checks fail | Major | . | Sumana Sathish | Suma Shivaprasad | +| [HDFS-13715](https://issues.apache.org/jira/browse/HDFS-13715) | diskbalancer does not work if one of the blockpools are empty on a Federated cluster | Major | diskbalancer | Namit Maheshwari | Bharat Viswanadham | +| [YARN-8459](https://issues.apache.org/jira/browse/YARN-8459) | Improve Capacity Scheduler logs to debug invalid states | Major | capacity scheduler | Wangda Tan | Wangda Tan | +| [HADOOP-15571](https://issues.apache.org/jira/browse/HADOOP-15571) | Multiple FileContexts created with the same configuration object should be allowed to have different umask | Critical | . | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | +| [HDFS-13121](https://issues.apache.org/jira/browse/HDFS-13121) | NPE when request file descriptors when SC read | Minor | hdfs-client | Gang Xie | Zsolt Venczel | +| [YARN-6265](https://issues.apache.org/jira/browse/YARN-6265) | yarn.resourcemanager.fail-fast is used inconsistently | Major | resourcemanager | Daniel Templeton | Yuanbo Liu | +| [YARN-8473](https://issues.apache.org/jira/browse/YARN-8473) | Containers being launched as app tears down can leave containers in NEW state | Major | nodemanager | Jason Lowe | Jason Lowe | +| [YARN-8512](https://issues.apache.org/jira/browse/YARN-8512) | ATSv2 entities are not published to HBase from second attempt onwards | Major | . | Yesha Vora | Rohith Sharma K S | +| [YARN-8491](https://issues.apache.org/jira/browse/YARN-8491) | TestServiceCLI#testEnableFastLaunch fail when umask is 077 | Major | . | K G Bakthavachalam | K G Bakthavachalam | +| [HADOOP-15541](https://issues.apache.org/jira/browse/HADOOP-15541) | AWS SDK can mistake stream timeouts for EOF and throw SdkClientExceptions | Major | fs/s3 | Sean Mackrory | Sean Mackrory | +| [HDFS-13723](https://issues.apache.org/jira/browse/HDFS-13723) | Occasional "Should be different group" error in TestRefreshUserMappings#testGroupMappingRefresh | Major | security, test | Siyao Meng | Siyao Meng | +| [HDFS-12837](https://issues.apache.org/jira/browse/HDFS-12837) | Intermittent failure in TestReencryptionWithKMS | Major | encryption, test | Surendra Singh Lilhore | Xiao Chen | +| [HDFS-13729](https://issues.apache.org/jira/browse/HDFS-13729) | Fix broken links to RBF documentation | Minor | documentation | jwhitter | Gabor Bota | +| [YARN-8518](https://issues.apache.org/jira/browse/YARN-8518) | test-container-executor test\_is\_empty() is broken | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8515](https://issues.apache.org/jira/browse/YARN-8515) | container-executor can crash with SIGPIPE after nodemanager restart | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8421](https://issues.apache.org/jira/browse/YARN-8421) | when moving app, activeUsers is increased, even though app does not have outstanding request | Major | . | kyungwan nam | | +| [YARN-8511](https://issues.apache.org/jira/browse/YARN-8511) | When AM releases a container, RM removes allocation tags before it is released by NM | Major | capacity scheduler | Weiwei Yang | Weiwei Yang | +| [HDFS-13524](https://issues.apache.org/jira/browse/HDFS-13524) | Occasional "All datanodes are bad" error in TestLargeBlock#testLargeBlockSize | Major | . | Wei-Chiu Chuang | Siyao Meng | +| [YARN-8538](https://issues.apache.org/jira/browse/YARN-8538) | Fix valgrind leak check on container executor | Major | . | Billie Rinaldi | Billie Rinaldi | +| [HADOOP-15610](https://issues.apache.org/jira/browse/HADOOP-15610) | Hadoop Docker Image Pip Install Fails | Critical | . | Jack Bearden | Jack Bearden | +| [HADOOP-15614](https://issues.apache.org/jira/browse/HADOOP-15614) | TestGroupsCaching.testExceptionOnBackgroundRefreshHandled reliably fails | Major | . | Kihwal Lee | Weiwei Yang | +| [MAPREDUCE-7118](https://issues.apache.org/jira/browse/MAPREDUCE-7118) | Distributed cache conflicts breaks backwards compatability | Blocker | mrv2 | Jason Lowe | Jason Lowe | +| [YARN-8528](https://issues.apache.org/jira/browse/YARN-8528) | Final states in ContainerAllocation might be modified externally causing unexpected allocation results | Major | capacity scheduler | Xintong Song | Xintong Song | +| [YARN-8541](https://issues.apache.org/jira/browse/YARN-8541) | RM startup failure on recovery after user deletion | Blocker | resourcemanager | yimeng | Bibin A Chundatt | +| [HADOOP-15593](https://issues.apache.org/jira/browse/HADOOP-15593) | UserGroupInformation TGT renewer throws NPE | Blocker | security | Wei-Chiu Chuang | Gabor Bota | +| [HDFS-13765](https://issues.apache.org/jira/browse/HDFS-13765) | Fix javadoc for FSDirMkdirOp#createParentDirectories | Minor | documentation | Lokesh Jain | Lokesh Jain | +| [YARN-8508](https://issues.apache.org/jira/browse/YARN-8508) | On NodeManager container gets cleaned up before its pid file is created | Critical | . | Sumana Sathish | Chandni Singh | +| [YARN-8434](https://issues.apache.org/jira/browse/YARN-8434) | Update federation documentation of Nodemanager configurations | Minor | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8591](https://issues.apache.org/jira/browse/YARN-8591) | [ATSv2] NPE while checking for entity acl in non-secure cluster | Major | timelinereader, timelineserver | Akhil PB | Rohith Sharma K S | +| [YARN-8558](https://issues.apache.org/jira/browse/YARN-8558) | NM recovery level db not cleaned up properly on container finish | Critical | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8418](https://issues.apache.org/jira/browse/YARN-8418) | App local logs could leaked if log aggregation fails to initialize for the app | Critical | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8522](https://issues.apache.org/jira/browse/YARN-8522) | Application fails with InvalidResourceRequestException | Critical | . | Yesha Vora | Zian Chen | +| [YARN-8606](https://issues.apache.org/jira/browse/YARN-8606) | Opportunistic scheduling does not work post RM failover | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8600](https://issues.apache.org/jira/browse/YARN-8600) | RegistryDNS hang when remote lookup does not reply | Critical | yarn | Eric Yang | Eric Yang | +| [YARN-8603](https://issues.apache.org/jira/browse/YARN-8603) | [UI2] Latest run application should be listed first in the RM UI | Major | yarn-ui-v2 | Sumana Sathish | Akhil PB | +| [YARN-8608](https://issues.apache.org/jira/browse/YARN-8608) | [UI2] No information available per application appAttempt about 'Total Outstanding Resource Requests' | Major | yarn-ui-v2 | Sumana Sathish | Akhil PB | +| [YARN-8399](https://issues.apache.org/jira/browse/YARN-8399) | NodeManager is giving 403 GSS exception post upgrade to 3.1 in secure mode | Major | timelineservice | Sunil Govindan | Sunil Govindan | +| [HDFS-13746](https://issues.apache.org/jira/browse/HDFS-13746) | Still occasional "Should be different group" failure in TestRefreshUserMappings#testGroupMappingRefresh | Major | . | Siyao Meng | Siyao Meng | +| [HADOOP-14314](https://issues.apache.org/jira/browse/HADOOP-14314) | The OpenSolaris taxonomy link is dead in InterfaceClassification.md | Major | documentation | Daniel Templeton | Rui Gao | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [MAPREDUCE-7066](https://issues.apache.org/jira/browse/MAPREDUCE-7066) | TestQueue fails on Java9 | Major | test | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-15313](https://issues.apache.org/jira/browse/HADOOP-15313) | TestKMS should close providers | Major | kms, test | Xiao Chen | Xiao Chen | +| [HDFS-13129](https://issues.apache.org/jira/browse/HDFS-13129) | Add a test for DfsAdmin refreshSuperUserGroupsConfiguration | Minor | namenode | Mukul Kumar Singh | Mukul Kumar Singh | +| [HDFS-13503](https://issues.apache.org/jira/browse/HDFS-13503) | Fix TestFsck test failures on Windows | Major | hdfs | Xiao Liang | Xiao Liang | +| [HDFS-13315](https://issues.apache.org/jira/browse/HDFS-13315) | Add a test for the issue reported in HDFS-11481 which is fixed by HDFS-10997. | Major | . | Yongjun Zhang | Yongjun Zhang | +| [HDFS-13542](https://issues.apache.org/jira/browse/HDFS-13542) | TestBlockManager#testNeededReplicationWhileAppending fails due to improper cluster shutdown in TestBlockManager#testBlockManagerMachinesArray on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13551](https://issues.apache.org/jira/browse/HDFS-13551) | TestMiniDFSCluster#testClusterSetStorageCapacity does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-11700](https://issues.apache.org/jira/browse/HDFS-11700) | TestHDFSServerPorts#testBackupNodePorts doesn't pass on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13548](https://issues.apache.org/jira/browse/HDFS-13548) | TestResolveHdfsSymlink#testFcResolveAfs fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13567](https://issues.apache.org/jira/browse/HDFS-13567) | TestNameNodeMetrics#testGenerateEDEKTime,TestNameNodeMetrics#testResourceCheck should use a different cluster basedir | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13557](https://issues.apache.org/jira/browse/HDFS-13557) | TestDFSAdmin#testListOpenFiles fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13559](https://issues.apache.org/jira/browse/HDFS-13559) | TestBlockScanner does not close TestContext properly | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13570](https://issues.apache.org/jira/browse/HDFS-13570) | TestQuotaByStorageType,TestQuota,TestDFSOutputStream fail on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13558](https://issues.apache.org/jira/browse/HDFS-13558) | TestDatanodeHttpXFrame does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13554](https://issues.apache.org/jira/browse/HDFS-13554) | TestDatanodeRegistration#testForcedRegistration does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13556](https://issues.apache.org/jira/browse/HDFS-13556) | TestNestedEncryptionZones does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13560](https://issues.apache.org/jira/browse/HDFS-13560) | Insufficient system resources exist to complete the requested service for some tests on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13592](https://issues.apache.org/jira/browse/HDFS-13592) | TestNameNodePrunesMissingStorages#testNameNodePrunesUnreportedStorages does not shut down cluster properly | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13593](https://issues.apache.org/jira/browse/HDFS-13593) | TestBlockReaderLocalLegacy#testBlockReaderLocalLegacyWithAppend fails on Windows | Minor | test | Anbang Hu | Anbang Hu | +| [HDFS-13587](https://issues.apache.org/jira/browse/HDFS-13587) | TestQuorumJournalManager fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13619](https://issues.apache.org/jira/browse/HDFS-13619) | TestAuditLoggerWithCommands fails on Windows | Minor | test | Anbang Hu | Anbang Hu | +| [HDFS-13620](https://issues.apache.org/jira/browse/HDFS-13620) | Randomize the test directory path for TestHDFSFileSystemContract | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13627](https://issues.apache.org/jira/browse/HDFS-13627) | TestErasureCodingExerciseAPIs fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13591](https://issues.apache.org/jira/browse/HDFS-13591) | TestDFSShell#testSetrepLow fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13632](https://issues.apache.org/jira/browse/HDFS-13632) | Randomize baseDir for MiniJournalCluster in MiniQJMHACluster for TestDFSAdminWithHA | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13629](https://issues.apache.org/jira/browse/HDFS-13629) | Some tests in TestDiskBalancerCommand fail on Windows due to MiniDFSCluster path conflict and improper path usage | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13631](https://issues.apache.org/jira/browse/HDFS-13631) | TestDFSAdmin#testCheckNumOfBlocksInReportCommand should use a separate MiniDFSCluster path | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13651](https://issues.apache.org/jira/browse/HDFS-13651) | TestReencryptionHandler fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13648](https://issues.apache.org/jira/browse/HDFS-13648) | Fix TestGetConf#testGetJournalNodes on Windows due to a mismatch line separator | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [MAPREDUCE-7102](https://issues.apache.org/jira/browse/MAPREDUCE-7102) | Fix TestJavaSerialization for Windows due a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [MAPREDUCE-7105](https://issues.apache.org/jira/browse/MAPREDUCE-7105) | Fix TestNativeCollectorOnlyHandler.testOnCall on Windows because of the path format | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HDFS-13652](https://issues.apache.org/jira/browse/HDFS-13652) | Randomize baseDir for MiniDFSCluster in TestBlockScanner | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13649](https://issues.apache.org/jira/browse/HDFS-13649) | Randomize baseDir for MiniDFSCluster in TestReconstructStripedFile and TestReconstructStripedFileWithRandomECPolicy | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13650](https://issues.apache.org/jira/browse/HDFS-13650) | Randomize baseDir for MiniDFSCluster in TestDFSStripedInputStream and TestDFSStripedInputStreamWithRandomECPolicy | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8370](https://issues.apache.org/jira/browse/YARN-8370) | Some Node Manager tests fail on Windows due to improper path/file separator | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8422](https://issues.apache.org/jira/browse/YARN-8422) | TestAMSimulator failing with NPE | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15532](https://issues.apache.org/jira/browse/HADOOP-15532) | TestBasicDiskValidator fails with NoSuchFileException | Minor | . | Íñigo Goiri | Giovanni Matteo Fumarola | +| [HDFS-13563](https://issues.apache.org/jira/browse/HDFS-13563) | TestDFSAdminWithHA times out on Windows | Minor | . | Anbang Hu | Lukas Majercak | +| [HDFS-13681](https://issues.apache.org/jira/browse/HDFS-13681) | Fix TestStartup.testNNFailToStartOnReadOnlyNNDir test failure on Windows | Major | test | Xiao Liang | Xiao Liang | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-8002](https://issues.apache.org/jira/browse/YARN-8002) | Support NOT\_SELF and ALL namespace types for allocation tag | Major | resourcemanager | Weiwei Yang | Weiwei Yang | +| [HDFS-13289](https://issues.apache.org/jira/browse/HDFS-13289) | RBF: TestConnectionManager#testCleanup() test case need correction | Minor | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [YARN-8013](https://issues.apache.org/jira/browse/YARN-8013) | Support application tags when defining application namespaces for placement constraints | Major | . | Weiwei Yang | Weiwei Yang | +| [YARN-6936](https://issues.apache.org/jira/browse/YARN-6936) | [Atsv2] Retrospect storing entities into sub application table from client perspective | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-13353](https://issues.apache.org/jira/browse/HDFS-13353) | RBF: TestRouterWebHDFSContractCreate failed | Major | test | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8107](https://issues.apache.org/jira/browse/YARN-8107) | Give an informative message when incorrect format is used in ATSv2 filter attributes | Major | ATSv2 | Charan Hebri | Rohith Sharma K S | +| [YARN-8110](https://issues.apache.org/jira/browse/YARN-8110) | AMRMProxy recover should catch for all throwable to avoid premature exit | Major | . | Botong Huang | Botong Huang | +| [YARN-8048](https://issues.apache.org/jira/browse/YARN-8048) | Support auto-spawning of admin configured services during bootstrap of rm/apiserver | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-13402](https://issues.apache.org/jira/browse/HDFS-13402) | RBF: Fix java doc for StateStoreFileSystemImpl | Minor | hdfs | Yiran Wu | Yiran Wu | +| [YARN-7574](https://issues.apache.org/jira/browse/YARN-7574) | Add support for Node Labels on Auto Created Leaf Queue Template | Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad | +| [HDFS-13410](https://issues.apache.org/jira/browse/HDFS-13410) | RBF: Support federation with no subclusters | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13384](https://issues.apache.org/jira/browse/HDFS-13384) | RBF: Improve timeout RPC call mechanism | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-15376](https://issues.apache.org/jira/browse/HADOOP-15376) | Remove double semi colons on imports that make Clover fall over. | Minor | . | Ewan Higgs | Ewan Higgs | +| [YARN-7973](https://issues.apache.org/jira/browse/YARN-7973) | Support ContainerRelaunch for Docker containers | Major | . | Shane Kumpf | Shane Kumpf | +| [YARN-7941](https://issues.apache.org/jira/browse/YARN-7941) | Transitive dependencies for component are not resolved | Major | . | Rohith Sharma K S | Billie Rinaldi | +| [HADOOP-15346](https://issues.apache.org/jira/browse/HADOOP-15346) | S3ARetryPolicy for 400/BadArgument to be "fail" | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [HDFS-13045](https://issues.apache.org/jira/browse/HDFS-13045) | RBF: Improve error message returned from subcluster | Minor | . | Wei Yan | Íñigo Goiri | +| [HDFS-13428](https://issues.apache.org/jira/browse/HDFS-13428) | RBF: Remove LinkedList From StateStoreFileImpl.java | Trivial | federation | BELUGA BEHR | BELUGA BEHR | +| [HDFS-13386](https://issues.apache.org/jira/browse/HDFS-13386) | RBF: Wrong date information in list file(-ls) result | Minor | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [YARN-7221](https://issues.apache.org/jira/browse/YARN-7221) | Add security check for privileged docker container | Major | security | Eric Yang | Eric Yang | +| [YARN-7936](https://issues.apache.org/jira/browse/YARN-7936) | Add default service AM Xmx | Major | . | Jian He | Jian He | +| [YARN-8018](https://issues.apache.org/jira/browse/YARN-8018) | Yarn Service Upgrade: Add support for initiating service upgrade | Major | . | Chandni Singh | Chandni Singh | +| [HADOOP-14999](https://issues.apache.org/jira/browse/HADOOP-14999) | AliyunOSS: provide one asynchronous multi-part based uploading mechanism | Major | fs/oss | Genmao Yu | Genmao Yu | +| [YARN-7142](https://issues.apache.org/jira/browse/YARN-7142) | Support placement policy in yarn native services | Major | yarn-native-services | Billie Rinaldi | Gour Saha | +| [YARN-8138](https://issues.apache.org/jira/browse/YARN-8138) | Add unit test to validate queue priority preemption works under node partition. | Minor | . | Charan Hebri | Zian Chen | +| [YARN-8060](https://issues.apache.org/jira/browse/YARN-8060) | Create default readiness check for service components | Major | yarn-native-services | Billie Rinaldi | Billie Rinaldi | +| [HDFS-13435](https://issues.apache.org/jira/browse/HDFS-13435) | RBF: Improve the error loggings for printing the stack trace | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-8126](https://issues.apache.org/jira/browse/YARN-8126) | Support auto-spawning of admin configured services during bootstrap of RM | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-7996](https://issues.apache.org/jira/browse/YARN-7996) | Allow user supplied Docker client configurations with YARN native services | Major | . | Shane Kumpf | Shane Kumpf | +| [HDFS-13466](https://issues.apache.org/jira/browse/HDFS-13466) | RBF: Add more router-related information to the UI | Minor | . | Wei Yan | Wei Yan | +| [YARN-5888](https://issues.apache.org/jira/browse/YARN-5888) | [UI2] Improve unit tests for new YARN UI | Minor | yarn-ui-v2 | Akhil PB | Akhil PB | +| [HDFS-13453](https://issues.apache.org/jira/browse/HDFS-13453) | RBF: getMountPointDates should fetch latest subdir time/date when parent dir is not present but /parent/child dirs are present in mount table | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [YARN-8111](https://issues.apache.org/jira/browse/YARN-8111) | Simplify PlacementConstraints API by removing allocationTagToIntraApp | Minor | resourcemanager | Weiwei Yang | Weiwei Yang | +| [YARN-8064](https://issues.apache.org/jira/browse/YARN-8064) | Docker ".cmd" files should not be put in hadoop.tmp.dir | Critical | . | Eric Badger | Eric Badger | +| [HDFS-13478](https://issues.apache.org/jira/browse/HDFS-13478) | RBF: Disabled Nameservice store API | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-8177](https://issues.apache.org/jira/browse/YARN-8177) | Documentation changes for auto creation of Leaf Queues with node label | Major | . | Suma Shivaprasad | Suma Shivaprasad | +| [HDFS-13490](https://issues.apache.org/jira/browse/HDFS-13490) | RBF: Fix setSafeMode in the Router | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13484](https://issues.apache.org/jira/browse/HDFS-13484) | RBF: Disable Nameservices from the federation | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-7939](https://issues.apache.org/jira/browse/YARN-7939) | Yarn Service Upgrade: add support to upgrade a component instance | Major | . | Chandni Singh | Chandni Singh | +| [HDFS-13326](https://issues.apache.org/jira/browse/HDFS-13326) | RBF: Improve the interfaces to modify and view mount tables | Minor | . | Wei Yan | Gang Li | +| [YARN-8122](https://issues.apache.org/jira/browse/YARN-8122) | Component health threshold monitor | Major | . | Gour Saha | Gour Saha | +| [HDFS-13499](https://issues.apache.org/jira/browse/HDFS-13499) | RBF: Show disabled name services in the UI | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-8215](https://issues.apache.org/jira/browse/YARN-8215) | ATS v2 returns invalid YARN\_CONTAINER\_ALLOCATED\_HOST\_HTTP\_ADDRESS from NM | Critical | ATSv2 | Yesha Vora | Rohith Sharma K S | +| [YARN-8152](https://issues.apache.org/jira/browse/YARN-8152) | Add chart in SLS to illustrate the throughput of the scheduler | Major | scheduler-load-simulator | Weiwei Yang | Tao Yang | +| [YARN-8204](https://issues.apache.org/jira/browse/YARN-8204) | Yarn Service Upgrade: Add a flag to disable upgrade | Major | . | Chandni Singh | Chandni Singh | +| [YARN-7781](https://issues.apache.org/jira/browse/YARN-7781) | Update YARN-Services-Examples.md to be in sync with the latest code | Major | . | Gour Saha | Gour Saha | +| [HDFS-13508](https://issues.apache.org/jira/browse/HDFS-13508) | RBF: Normalize paths (automatically) when adding, updating, removing or listing mount table entries | Minor | . | Ekanth Sethuramalingam | Ekanth Sethuramalingam | +| [HDFS-13434](https://issues.apache.org/jira/browse/HDFS-13434) | RBF: Fix dead links in RBF document | Major | documentation | Akira Ajisaka | Chetna Chaudhari | +| [YARN-8195](https://issues.apache.org/jira/browse/YARN-8195) | Fix constraint cardinality check in the presence of multiple target allocation tags | Critical | . | Weiwei Yang | Weiwei Yang | +| [YARN-8228](https://issues.apache.org/jira/browse/YARN-8228) | Docker does not support hostnames greater than 64 characters | Critical | yarn-native-services | Yesha Vora | Shane Kumpf | +| [YARN-8212](https://issues.apache.org/jira/browse/YARN-8212) | Pending backlog for async allocation threads should be configurable | Major | . | Weiwei Yang | Tao Yang | +| [YARN-2674](https://issues.apache.org/jira/browse/YARN-2674) | Distributed shell AM may re-launch containers if RM work preserving restart happens | Major | applications, resourcemanager | Chun Chen | Shane Kumpf | +| [HDFS-13488](https://issues.apache.org/jira/browse/HDFS-13488) | RBF: Reject requests when a Router is overloaded | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-8113](https://issues.apache.org/jira/browse/YARN-8113) | Update placement constraints doc with application namespaces and inter-app constraints | Major | documentation | Weiwei Yang | Weiwei Yang | +| [YARN-8194](https://issues.apache.org/jira/browse/YARN-8194) | Exception when reinitializing a container using LinuxContainerExecutor | Blocker | . | Chandni Singh | Chandni Singh | +| [YARN-7961](https://issues.apache.org/jira/browse/YARN-7961) | Improve status response when yarn application is destroyed | Major | yarn-native-services | Yesha Vora | Gour Saha | +| [HDFS-13525](https://issues.apache.org/jira/browse/HDFS-13525) | RBF: Add unit test TestStateStoreDisabledNameservice | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-5151](https://issues.apache.org/jira/browse/YARN-5151) | [UI2] Support kill application from new YARN UI | Major | . | Wangda Tan | Gergely Novák | +| [YARN-8253](https://issues.apache.org/jira/browse/YARN-8253) | HTTPS Ats v2 api call fails with "bad HTTP parsed" | Critical | ATSv2 | Yesha Vora | Charan Hebri | +| [YARN-8207](https://issues.apache.org/jira/browse/YARN-8207) | Docker container launch use popen have risk of shell expansion | Blocker | yarn-native-services | Eric Yang | Eric Yang | +| [YARN-8261](https://issues.apache.org/jira/browse/YARN-8261) | Docker container launch fails due to .cmd file creation failure | Blocker | . | Eric Badger | Jason Lowe | +| [HADOOP-15454](https://issues.apache.org/jira/browse/HADOOP-15454) | TestRollingFileSystemSinkWithLocal fails on Windows | Major | test | Xiao Liang | Xiao Liang | +| [HDFS-13346](https://issues.apache.org/jira/browse/HDFS-13346) | RBF: Fix synchronization of router quota and nameservice quota | Major | . | liuhongtong | Yiqun Lin | +| [YARN-8243](https://issues.apache.org/jira/browse/YARN-8243) | Flex down should remove instance with largest component instance ID first | Critical | yarn-native-services | Gour Saha | Gour Saha | +| [YARN-7654](https://issues.apache.org/jira/browse/YARN-7654) | Support ENTRY\_POINT for docker container | Blocker | yarn | Eric Yang | Eric Yang | +| [YARN-8247](https://issues.apache.org/jira/browse/YARN-8247) | Incorrect HTTP status code returned by ATSv2 for non-whitelisted users | Critical | ATSv2 | Charan Hebri | Rohith Sharma K S | +| [YARN-8130](https://issues.apache.org/jira/browse/YARN-8130) | Race condition when container events are published for KILLED applications | Major | ATSv2 | Charan Hebri | Rohith Sharma K S | +| [YARN-8081](https://issues.apache.org/jira/browse/YARN-8081) | Yarn Service Upgrade: Add support to upgrade a component | Major | . | Chandni Singh | Chandni Singh | +| [YARN-8284](https://issues.apache.org/jira/browse/YARN-8284) | get\_docker\_command refactoring | Minor | . | Jason Lowe | Eric Badger | +| [HADOOP-15469](https://issues.apache.org/jira/browse/HADOOP-15469) | S3A directory committer commit job fails if \_temporary directory created under dest | Major | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-8206](https://issues.apache.org/jira/browse/YARN-8206) | Sending a kill does not immediately kill docker containers | Major | . | Eric Badger | Eric Badger | +| [YARN-7960](https://issues.apache.org/jira/browse/YARN-7960) | Add no-new-privileges flag to docker run | Major | . | Eric Badger | Eric Badger | +| [YARN-7530](https://issues.apache.org/jira/browse/YARN-7530) | hadoop-yarn-services-api should be part of hadoop-yarn-services | Blocker | yarn-native-services | Eric Yang | Chandni Singh | +| [YARN-6919](https://issues.apache.org/jira/browse/YARN-6919) | Add default volume mount list | Major | yarn | Eric Badger | Eric Badger | +| [HADOOP-15498](https://issues.apache.org/jira/browse/HADOOP-15498) | TestHadoopArchiveLogs (#testGenerateScript, #testPrepareWorkingDir) fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8329](https://issues.apache.org/jira/browse/YARN-8329) | Docker client configuration can still be set incorrectly | Major | . | Shane Kumpf | Shane Kumpf | +| [HDFS-12978](https://issues.apache.org/jira/browse/HDFS-12978) | Fine-grained locking while consuming journal stream. | Major | namenode | Konstantin Shvachko | Konstantin Shvachko | +| [YARN-8384](https://issues.apache.org/jira/browse/YARN-8384) | stdout.txt, stderr.txt logs of a launched docker container is coming with primary group of submit user instead of hadoop | Critical | yarn-native-services | Sunil Govindan | Eric Yang | +| [YARN-8349](https://issues.apache.org/jira/browse/YARN-8349) | Remove YARN registry entries when a service is killed by the RM | Critical | yarn-native-services | Shane Kumpf | Billie Rinaldi | +| [HDFS-13637](https://issues.apache.org/jira/browse/HDFS-13637) | RBF: Router fails when threadIndex (in ConnectionPool) wraps around Integer.MIN\_VALUE | Critical | federation | CR Hota | CR Hota | +| [YARN-8342](https://issues.apache.org/jira/browse/YARN-8342) | Using docker image from a non-privileged registry, the launch\_command is not honored | Critical | . | Wangda Tan | Eric Yang | +| [HDFS-13281](https://issues.apache.org/jira/browse/HDFS-13281) | Namenode#createFile should be /.reserved/raw/ aware. | Critical | encryption | Rushabh S Shah | Rushabh S Shah | +| [YARN-4677](https://issues.apache.org/jira/browse/YARN-4677) | RMNodeResourceUpdateEvent update from scheduler can lead to race condition | Major | graceful, resourcemanager, scheduler | Brook Zhou | Wilfred Spiegelenburg | +| [HADOOP-15137](https://issues.apache.org/jira/browse/HADOOP-15137) | ClassNotFoundException: org.apache.hadoop.yarn.server.api.DistributedSchedulingAMProtocol when using hadoop-client-minicluster | Major | . | Jeff Zhang | Bharat Viswanadham | +| [HDFS-13547](https://issues.apache.org/jira/browse/HDFS-13547) | Add ingress port based sasl resolver | Major | security | Chen Liang | Chen Liang | +| [HADOOP-15514](https://issues.apache.org/jira/browse/HADOOP-15514) | NoClassDefFoundError for TimelineCollectorManager when starting MiniYARNCluster | Major | . | Jeff Zhang | Rohith Sharma K S | +| [HADOOP-15516](https://issues.apache.org/jira/browse/HADOOP-15516) | Add test cases to cover FileUtil#readLink | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15506](https://issues.apache.org/jira/browse/HADOOP-15506) | Upgrade Azure Storage Sdk version to 7.0.0 and update corresponding code blocks | Minor | fs/azure | Esfandiar Manii | Esfandiar Manii | +| [HADOOP-15529](https://issues.apache.org/jira/browse/HADOOP-15529) | ContainerLaunch#testInvalidEnvVariableSubstitutionType is not supported in Windows | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8411](https://issues.apache.org/jira/browse/YARN-8411) | Enable stopped system services to be started during RM start | Critical | . | Billie Rinaldi | Billie Rinaldi | +| [YARN-8259](https://issues.apache.org/jira/browse/YARN-8259) | Revisit liveliness checks for Docker containers | Blocker | . | Shane Kumpf | Shane Kumpf | +| [HADOOP-15533](https://issues.apache.org/jira/browse/HADOOP-15533) | Make WASB listStatus messages consistent | Trivial | fs/azure | Esfandiar Manii | Esfandiar Manii | +| [HADOOP-15458](https://issues.apache.org/jira/browse/HADOOP-15458) | TestLocalFileSystem#testFSOutputStreamBuilder fails on Windows | Minor | test | Xiao Liang | Xiao Liang | +| [YARN-8465](https://issues.apache.org/jira/browse/YARN-8465) | Dshell docker container gets marked as lost after NM restart | Major | yarn-native-services | Yesha Vora | Shane Kumpf | +| [YARN-8485](https://issues.apache.org/jira/browse/YARN-8485) | Priviledged container app launch is failing intermittently | Major | yarn-native-services | Yesha Vora | Eric Yang | +| [HDFS-13528](https://issues.apache.org/jira/browse/HDFS-13528) | RBF: If a directory exceeds quota limit then quota usage is not refreshed for other mount entries | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [HDFS-13710](https://issues.apache.org/jira/browse/HDFS-13710) | RBF: setQuota and getQuotaUsage should check the dfs.federation.router.quota.enable | Major | federation, hdfs | yanghuafeng | yanghuafeng | +| [HADOOP-15384](https://issues.apache.org/jira/browse/HADOOP-15384) | distcp numListstatusThreads option doesn't get to -delete scan | Major | tools/distcp | Steve Loughran | Steve Loughran | +| [HDFS-13726](https://issues.apache.org/jira/browse/HDFS-13726) | RBF: Fix RBF configuration links | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13475](https://issues.apache.org/jira/browse/HDFS-13475) | RBF: Admin cannot enforce Router enter SafeMode | Major | . | Wei Yan | Chao Sun | +| [HDFS-13733](https://issues.apache.org/jira/browse/HDFS-13733) | RBF: Add Web UI configurations and descriptions to RBF document | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8301](https://issues.apache.org/jira/browse/YARN-8301) | Yarn Service Upgrade: Add documentation | Critical | . | Chandni Singh | Chandni Singh | +| [YARN-8546](https://issues.apache.org/jira/browse/YARN-8546) | Resource leak caused by a reserved container being released more than once under async scheduling | Major | capacity scheduler | Weiwei Yang | Tao Yang | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-8091](https://issues.apache.org/jira/browse/YARN-8091) | Revisit checkUserAccessToQueue RM REST API | Critical | . | Wangda Tan | Wangda Tan | +| [YARN-8274](https://issues.apache.org/jira/browse/YARN-8274) | Docker command error during container relaunch | Critical | . | Billie Rinaldi | Jason Lowe | +| [YARN-8080](https://issues.apache.org/jira/browse/YARN-8080) | YARN native service should support component restart policy | Critical | . | Wangda Tan | Suma Shivaprasad | +| [HADOOP-15483](https://issues.apache.org/jira/browse/HADOOP-15483) | Upgrade jquery to version 3.3.1 | Major | . | Lokesh Jain | Lokesh Jain | +| [YARN-8506](https://issues.apache.org/jira/browse/YARN-8506) | Make GetApplicationsRequestPBImpl thread safe | Critical | . | Wangda Tan | Wangda Tan | + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.1/RELEASENOTES.3.1.1.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.1/RELEASENOTES.3.1.1.md new file mode 100644 index 00000000000..9331231e41a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.1/RELEASENOTES.3.1.1.md @@ -0,0 +1,81 @@ + + +# Apache Hadoop 3.1.1 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-14667](https://issues.apache.org/jira/browse/HADOOP-14667) | *Major* | **Flexible Visual Studio support** + + + +This change updates the Microsoft Windows build directions to be more flexible with regards to Visual Studio compiler versions: + +* Any version of Visual Studio 2010 Pro or higher may be used. +* MSBuild Solution files are converted to the version of VS at build time +* Example command file to set command paths prior to using maven so that conversion works + +Additionally, Snappy and ISA-L that use bin as the location of the DLL will now be recognized without having to set their respective lib paths if the prefix is set. + +Note to contributors: + +It is very important that solutions for any patches remain at the VS 2010-level. + + +--- + +* [HADOOP-15446](https://issues.apache.org/jira/browse/HADOOP-15446) | *Major* | **WASB: PageBlobInputStream.skip breaks HBASE replication** + +WASB: Bug fix to support non-sequential page blob reads. Required for HBASE replication. + + +--- + +* [HADOOP-15478](https://issues.apache.org/jira/browse/HADOOP-15478) | *Major* | **WASB: hflush() and hsync() regression** + +WASB: Bug fix for recent regression in hflush() and hsync(). + + +--- + +* [HDFS-13589](https://issues.apache.org/jira/browse/HDFS-13589) | *Major* | **Add dfsAdmin command to query if "upgrade" is finalized** + +New command is added to dfsadmin. +hdfs dfsadmin [-upgrade [query \| finalize] +1. -upgrade query gives the upgradeStatus +2. -upgrade finalize is equivalent to -finalizeUpgrade. + + +--- + +* [HADOOP-15506](https://issues.apache.org/jira/browse/HADOOP-15506) | *Minor* | **Upgrade Azure Storage Sdk version to 7.0.0 and update corresponding code blocks** + +WASB: Fix Spark process hang at shutdown due to use of non-daemon threads by updating Azure Storage Java SDK to 7.0 + + +--- + +* [HDFS-13174](https://issues.apache.org/jira/browse/HDFS-13174) | *Major* | **hdfs mover -p /path times out after 20 min** + +Mover could have fail after 20+ minutes if a block move was enqueued for this long, between two DataNodes due to an internal constant that was introduced for Balancer, but affected Mover as well. +The internal constant can be configured with the dfs.balancer.max-iteration-time parameter after the patch, and affects only the Balancer. Default is 20 minutes. + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.2/CHANGELOG.3.1.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.2/CHANGELOG.3.1.2.md new file mode 100644 index 00000000000..b0b655f2a79 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.2/CHANGELOG.3.1.2.md @@ -0,0 +1,158 @@ + + +# Apache Hadoop Changelog + +## Release 3.1.2 - Unreleased (as of 2018-09-02) + + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13448](https://issues.apache.org/jira/browse/HDFS-13448) | HDFS Block Placement - Ignore Locality for First Block Replica | Minor | block placement, hdfs-client | BELUGA BEHR | BELUGA BEHR | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-15609](https://issues.apache.org/jira/browse/HADOOP-15609) | Retry KMS calls when SSLHandshakeException occurs | Major | common, kms | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15612](https://issues.apache.org/jira/browse/HADOOP-15612) | Improve exception when tfile fails to load LzoCodec | Major | . | Gera Shegalov | Gera Shegalov | +| [HDFS-11060](https://issues.apache.org/jira/browse/HDFS-11060) | make DEFAULT\_MAX\_CORRUPT\_FILEBLOCKS\_RETURNED configurable | Minor | hdfs | Lantao Jin | Lantao Jin | +| [HDFS-13727](https://issues.apache.org/jira/browse/HDFS-13727) | Log full stack trace if DiskBalancer exits with an unhandled exception | Minor | diskbalancer | Stephen O'Donnell | Gabor Bota | +| [YARN-8584](https://issues.apache.org/jira/browse/YARN-8584) | Several typos in Log Aggregation related classes | Minor | . | Szilard Nemeth | Szilard Nemeth | +| [HDFS-13728](https://issues.apache.org/jira/browse/HDFS-13728) | Disk Balancer should not fail if volume usage is greater than capacity | Minor | diskbalancer | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-13447](https://issues.apache.org/jira/browse/HDFS-13447) | Fix Typos - Node Not Chosen | Trivial | namenode | BELUGA BEHR | BELUGA BEHR | +| [YARN-8601](https://issues.apache.org/jira/browse/YARN-8601) | Print ExecutionType in Container report CLI | Minor | . | Bilwa S T | Bilwa S T | +| [HDFS-13658](https://issues.apache.org/jira/browse/HDFS-13658) | Expose HighestPriorityLowRedundancy blocks statistics | Major | hdfs | Kitti Nanasi | Kitti Nanasi | +| [YARN-8568](https://issues.apache.org/jira/browse/YARN-8568) | Replace the deprecated zk-address property in the HA config example in ResourceManagerHA.md | Minor | yarn | Antal Bálint Steinbach | Antal Bálint Steinbach | +| [HDFS-13735](https://issues.apache.org/jira/browse/HDFS-13735) | Make QJM HTTP URL connection timeout configurable | Minor | qjm | Chao Sun | Chao Sun | +| [HDFS-13814](https://issues.apache.org/jira/browse/HDFS-13814) | Remove super user privilege requirement for NameNode.getServiceStatus | Minor | namenode | Chao Sun | Chao Sun | +| [YARN-8559](https://issues.apache.org/jira/browse/YARN-8559) | Expose mutable-conf scheduler's configuration in RM /scheduler-conf endpoint | Major | resourcemanager | Anna Savarin | Weiwei Yang | +| [HDFS-13813](https://issues.apache.org/jira/browse/HDFS-13813) | Exit NameNode if dangling child inode is detected when saving FsImage | Major | hdfs, namenode | Siyao Meng | Siyao Meng | +| [HADOOP-14212](https://issues.apache.org/jira/browse/HADOOP-14212) | Expose SecurityEnabled boolean field in JMX for other services besides NameNode | Minor | . | Ray Burgemeestre | Adam Antal | +| [HDFS-13217](https://issues.apache.org/jira/browse/HDFS-13217) | Audit log all EC policy names during addErasureCodingPolicies | Major | erasure-coding | liaoyuxiangqin | liaoyuxiangqin | +| [HDFS-13732](https://issues.apache.org/jira/browse/HDFS-13732) | ECAdmin should print the policy name when an EC policy is set | Trivial | erasure-coding, tools | Soumyapn | Zsolt Venczel | +| [HADOOP-9214](https://issues.apache.org/jira/browse/HADOOP-9214) | Create a new touch command to allow modifying atime and mtime | Minor | tools | Brian Burton | Hrishikesh Gadre | +| [YARN-8242](https://issues.apache.org/jira/browse/YARN-8242) | YARN NM: OOM error while reading back the state store on recovery | Critical | yarn | Kanwaljeet Sachdev | Pradeep Ambati | +| [HDFS-13821](https://issues.apache.org/jira/browse/HDFS-13821) | RBF: Add dfs.federation.router.mount-table.cache.enable so that users can disable cache | Major | hdfs | Fei Hui | Fei Hui | +| [HDFS-13861](https://issues.apache.org/jira/browse/HDFS-13861) | RBF: Illegal Router Admin command leads to printing usage for all commands | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-13831](https://issues.apache.org/jira/browse/HDFS-13831) | Make block increment deletion number configurable | Major | . | Yiqun Lin | Ryan Wu | +| [YARN-8051](https://issues.apache.org/jira/browse/YARN-8051) | TestRMEmbeddedElector#testCallbackSynchronization is flakey | Major | test | Robert Kanter | Robert Kanter | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-7773](https://issues.apache.org/jira/browse/YARN-7773) | YARN Federation used Mysql as state store throw exception, Unknown column 'homeSubCluster' in 'field list' | Blocker | federation | Yiran Wu | Yiran Wu | +| [YARN-8426](https://issues.apache.org/jira/browse/YARN-8426) | Upgrade jquery-ui to 1.12.1 in YARN | Major | webapp | Sunil Govindan | Sunil Govindan | +| [HDFS-13721](https://issues.apache.org/jira/browse/HDFS-13721) | NPE in DataNode due to uninitialized DiskBalancer | Major | datanode, diskbalancer | Xiao Chen | Xiao Chen | +| [YARN-8360](https://issues.apache.org/jira/browse/YARN-8360) | Yarn service conflict between restart policy and NM configuration | Critical | yarn | Chandni Singh | Suma Shivaprasad | +| [YARN-8380](https://issues.apache.org/jira/browse/YARN-8380) | Support bind propagation options for mounts in docker runtime | Major | . | Billie Rinaldi | Billie Rinaldi | +| [YARN-8544](https://issues.apache.org/jira/browse/YARN-8544) | [DS] AM registration fails when hadoop authorization is enabled | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8548](https://issues.apache.org/jira/browse/YARN-8548) | AllocationRespose proto setNMToken initBuilder not done | Major | . | Bibin A Chundatt | Bilwa S T | +| [YARN-7748](https://issues.apache.org/jira/browse/YARN-7748) | TestContainerResizing.testIncreaseContainerUnreservedWhenApplicationCompleted fails due to multiple container fail events | Major | capacityscheduler | Haibo Chen | Weiwei Yang | +| [YARN-8577](https://issues.apache.org/jira/browse/YARN-8577) | Fix the broken anchor in SLS site-doc | Minor | documentation | Weiwei Yang | Weiwei Yang | +| [YARN-4606](https://issues.apache.org/jira/browse/YARN-4606) | CapacityScheduler: applications could get starved because computation of #activeUsers considers pending apps | Critical | capacity scheduler, capacityscheduler | Karam Singh | Manikandan R | +| [YARN-8330](https://issues.apache.org/jira/browse/YARN-8330) | Avoid publishing reserved container to ATS from RM | Critical | yarn-native-services | Yesha Vora | Suma Shivaprasad | +| [YARN-8429](https://issues.apache.org/jira/browse/YARN-8429) | Improve diagnostic message when artifact is not set properly | Major | . | Yesha Vora | Gour Saha | +| [YARN-8571](https://issues.apache.org/jira/browse/YARN-8571) | Validate service principal format prior to launching yarn service | Major | security, yarn | Eric Yang | Eric Yang | +| [HADOOP-15637](https://issues.apache.org/jira/browse/HADOOP-15637) | LocalFs#listLocatedStatus does not filter out hidden .crc files | Minor | fs | Erik Krogen | Erik Krogen | +| [YARN-8579](https://issues.apache.org/jira/browse/YARN-8579) | New AM attempt could not retrieve previous attempt component data | Critical | . | Yesha Vora | Gour Saha | +| [YARN-8397](https://issues.apache.org/jira/browse/YARN-8397) | Potential thread leak in ActivitiesManager | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8595](https://issues.apache.org/jira/browse/YARN-8595) | [UI2] Container diagnostic information is missing from container page | Major | yarn-ui-v2 | Akhil PB | Akhil PB | +| [YARN-8403](https://issues.apache.org/jira/browse/YARN-8403) | Nodemanager logs failed to download file with INFO level | Major | yarn | Eric Yang | Eric Yang | +| [YARN-8610](https://issues.apache.org/jira/browse/YARN-8610) | Yarn Service Upgrade: Typo in Error message | Major | . | Chandni Singh | Chandni Singh | +| [YARN-8593](https://issues.apache.org/jira/browse/YARN-8593) | Add RM web service endpoint to get user information | Major | resourcemanager | Akhil PB | Akhil PB | +| [YARN-8594](https://issues.apache.org/jira/browse/YARN-8594) | [UI2] Display current logged in user | Major | . | Akhil PB | Akhil PB | +| [YARN-8592](https://issues.apache.org/jira/browse/YARN-8592) | [UI2] rmip:port/ui2 endpoint shows a blank page in windows OS and Chrome browser | Major | . | Akhil S Naik | Akhil PB | +| [YARN-8318](https://issues.apache.org/jira/browse/YARN-8318) | [UI2] IP address in component page shows N/A | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [YARN-6966](https://issues.apache.org/jira/browse/YARN-6966) | NodeManager metrics may return wrong negative values when NM restart | Major | . | Yang Wang | Szilard Nemeth | +| [YARN-8620](https://issues.apache.org/jira/browse/YARN-8620) | [UI2] YARN Services UI new submission failures are not debuggable | Major | yarn-ui-v2 | Akhil PB | Akhil PB | +| [YARN-8615](https://issues.apache.org/jira/browse/YARN-8615) | [UI2] Resource Usage tab shows only memory related info. No info available for vcores/gpu. | Major | yarn-ui-v2 | Sumana Sathish | Akhil PB | +| [HDFS-13792](https://issues.apache.org/jira/browse/HDFS-13792) | Fix FSN read/write lock metrics name | Trivial | documentation, metrics | Chao Sun | Chao Sun | +| [YARN-8629](https://issues.apache.org/jira/browse/YARN-8629) | Container cleanup fails while trying to delete Cgroups | Critical | . | Yesha Vora | Suma Shivaprasad | +| [YARN-8407](https://issues.apache.org/jira/browse/YARN-8407) | Container launch exception in AM log should be printed in ERROR level | Major | . | Yesha Vora | Yesha Vora | +| [HDFS-13799](https://issues.apache.org/jira/browse/HDFS-13799) | TestEditLogTailer#testTriggersLogRollsForAllStandbyNN fails due to missing synchronization between rollEditsRpcExecutor and tailerThread shutdown | Minor | ha | Hrishikesh Gadre | Hrishikesh Gadre | +| [HDFS-13786](https://issues.apache.org/jira/browse/HDFS-13786) | EC: Display erasure coding policy for sub-directories is not working | Major | erasure-coding | Souryakanta Dwivedy | Ayush Saxena | +| [HDFS-13785](https://issues.apache.org/jira/browse/HDFS-13785) | EC: "removePolicy" is not working for built-in/system Erasure Code policies | Minor | documentation, erasure-coding | Souryakanta Dwivedy | Ayush Saxena | +| [YARN-8633](https://issues.apache.org/jira/browse/YARN-8633) | Update DataTables version in yarn-common in line with JQuery 3 upgrade | Major | yarn | Akhil PB | Akhil PB | +| [YARN-8331](https://issues.apache.org/jira/browse/YARN-8331) | Race condition in NM container launched after done | Major | . | Yang Wang | Pradeep Ambati | +| [YARN-8521](https://issues.apache.org/jira/browse/YARN-8521) | NPE in AllocationTagsManager when a container is removed more than once | Major | resourcemanager | Weiwei Yang | Weiwei Yang | +| [YARN-8575](https://issues.apache.org/jira/browse/YARN-8575) | Avoid committing allocation proposal to unavailable nodes in async scheduling | Major | capacityscheduler | Tao Yang | Tao Yang | +| [HDFS-13668](https://issues.apache.org/jira/browse/HDFS-13668) | FSPermissionChecker may throws AIOOE when check inode permission | Major | namenode | He Xiaoqiao | He Xiaoqiao | +| [HADOOP-15638](https://issues.apache.org/jira/browse/HADOOP-15638) | KMS Accept Queue Size default changed from 500 to 128 in Hadoop 3.x | Major | kms | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-13823](https://issues.apache.org/jira/browse/HDFS-13823) | NameNode UI : "Utilities -\> Browse the file system -\> open a file -\> Head the file" is not working | Major | ui | Nanda kumar | Nanda kumar | +| [HDFS-13738](https://issues.apache.org/jira/browse/HDFS-13738) | fsck -list-corruptfileblocks has infinite loop if user is not privileged. | Major | tools | Wei-Chiu Chuang | Yuen-Kuei Hsueh | +| [HDFS-13758](https://issues.apache.org/jira/browse/HDFS-13758) | DatanodeManager should throw exception if it has BlockRecoveryCommand but the block is not under construction | Major | namenode | Wei-Chiu Chuang | chencan | +| [YARN-8614](https://issues.apache.org/jira/browse/YARN-8614) | Fix few annotation typos in YarnConfiguration | Trivial | . | Sen Zhao | Sen Zhao | +| [HDFS-13819](https://issues.apache.org/jira/browse/HDFS-13819) | TestDirectoryScanner#testDirectoryScannerInFederatedCluster is flaky | Minor | hdfs | Daniel Templeton | Daniel Templeton | +| [YARN-8656](https://issues.apache.org/jira/browse/YARN-8656) | container-executor should not write cgroup tasks files for docker containers | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8474](https://issues.apache.org/jira/browse/YARN-8474) | sleeper service fails to launch with "Authentication Required" | Critical | yarn | Sumana Sathish | Billie Rinaldi | +| [YARN-8667](https://issues.apache.org/jira/browse/YARN-8667) | Cleanup symlinks when container restarted by NM to solve issue "find: File system loop detected;" for tar ball artifacts. | Critical | . | Rohith Sharma K S | Chandni Singh | +| [HDFS-10240](https://issues.apache.org/jira/browse/HDFS-10240) | Race between close/recoverLease leads to missing block | Major | . | zhouyingchao | Jinglun | +| [HADOOP-15655](https://issues.apache.org/jira/browse/HADOOP-15655) | Enhance KMS client retry behavior | Critical | kms | Kitti Nanasi | Kitti Nanasi | +| [YARN-8612](https://issues.apache.org/jira/browse/YARN-8612) | Fix NM Collector Service Port issue in YarnConfiguration | Major | ATSv2 | Prabha Manepalli | Prabha Manepalli | +| [HDFS-13747](https://issues.apache.org/jira/browse/HDFS-13747) | Statistic for list\_located\_status is incremented incorrectly by listStatusIterator | Minor | hdfs-client | Todd Lipcon | Antal Mihalyi | +| [HADOOP-15674](https://issues.apache.org/jira/browse/HADOOP-15674) | Test failure TestSSLHttpServer.testExcludedCiphers with TLS\_ECDHE\_RSA\_WITH\_AES\_128\_CBC\_SHA256 cipher suite | Major | common | Gabor Bota | Szilard Nemeth | +| [YARN-8640](https://issues.apache.org/jira/browse/YARN-8640) | Restore previous state in container-executor after failure | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8679](https://issues.apache.org/jira/browse/YARN-8679) | [ATSv2] If HBase cluster is down for long time, high chances that NM ContainerManager dispatcher get blocked | Major | . | Rohith Sharma K S | Wangda Tan | +| [HDFS-13772](https://issues.apache.org/jira/browse/HDFS-13772) | Erasure coding: Unnecessary NameNode Logs displaying for Enabling/Disabling Erasure coding policies which are already enabled/disabled | Trivial | erasure-coding | Souryakanta Dwivedy | Ayush Saxena | +| [YARN-8649](https://issues.apache.org/jira/browse/YARN-8649) | NPE in localizer hearbeat processing if a container is killed while localizing | Major | . | lujie | lujie | +| [YARN-8719](https://issues.apache.org/jira/browse/YARN-8719) | Typo correction for yarn configuration in OpportunisticContainers(federation) docs | Major | documentation, federation | Y. SREENIVASULU REDDY | Y. SREENIVASULU REDDY | +| [YARN-8675](https://issues.apache.org/jira/browse/YARN-8675) | Setting hostname of docker container breaks with "host" networking mode for Apps which do not run as a YARN service | Major | . | Yesha Vora | Suma Shivaprasad | +| [HDFS-13858](https://issues.apache.org/jira/browse/HDFS-13858) | RBF: Add check to have single valid argument to safemode command | Major | federation | Soumyapn | Ayush Saxena | +| [HDFS-13731](https://issues.apache.org/jira/browse/HDFS-13731) | ReencryptionUpdater fails with ConcurrentModificationException during processCheckpoints | Major | encryption | Xiao Chen | Zsolt Venczel | +| [YARN-8723](https://issues.apache.org/jira/browse/YARN-8723) | Fix a typo in CS init error message when resource calculator is not correctly set | Minor | . | Weiwei Yang | Abhishek Modi | +| [HADOOP-15705](https://issues.apache.org/jira/browse/HADOOP-15705) | Typo in the definition of "stable" in the interface classification | Minor | . | Daniel Templeton | Daniel Templeton | +| [HDFS-13863](https://issues.apache.org/jira/browse/HDFS-13863) | FsDatasetImpl should log DiskOutOfSpaceException | Major | hdfs | Fei Hui | Fei Hui | +| [HADOOP-15698](https://issues.apache.org/jira/browse/HADOOP-15698) | KMS log4j is not initialized properly at startup | Major | kms | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15680](https://issues.apache.org/jira/browse/HADOOP-15680) | ITestNativeAzureFileSystemConcurrencyLive times out | Major | . | Andras Bokor | Andras Bokor | +| [HADOOP-15706](https://issues.apache.org/jira/browse/HADOOP-15706) | Typo in compatibility doc: SHOUD -\> SHOULD | Trivial | . | Daniel Templeton | Laszlo Kollar | +| [HDFS-13027](https://issues.apache.org/jira/browse/HDFS-13027) | Handle possible NPEs due to deleted blocks in race condition | Major | namenode | Vinayakumar B | Vinayakumar B | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-13743](https://issues.apache.org/jira/browse/HDFS-13743) | RBF: Router throws NullPointerException due to the invalid initialization of MountTableResolver | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13583](https://issues.apache.org/jira/browse/HDFS-13583) | RBF: Router admin clrQuota is not synchronized with nameservice | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [YARN-8263](https://issues.apache.org/jira/browse/YARN-8263) | DockerClient still touches hadoop.tmp.dir | Minor | . | Jason Lowe | Craig Condit | +| [YARN-8287](https://issues.apache.org/jira/browse/YARN-8287) | Update documentation and yarn-default related to the Docker runtime | Minor | . | Shane Kumpf | Craig Condit | +| [YARN-8624](https://issues.apache.org/jira/browse/YARN-8624) | Cleanup ENTRYPOINT documentation | Minor | . | Craig Condit | Craig Condit | +| [YARN-8136](https://issues.apache.org/jira/browse/YARN-8136) | Add version attribute to site doc examples and quickstart | Major | site | Gour Saha | Eric Yang | +| [YARN-8588](https://issues.apache.org/jira/browse/YARN-8588) | Logging improvements for better debuggability | Major | . | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-8520](https://issues.apache.org/jira/browse/YARN-8520) | Document best practice for user management | Major | documentation, yarn | Eric Yang | Eric Yang | +| [HDFS-13750](https://issues.apache.org/jira/browse/HDFS-13750) | RBF: Router ID in RouterRpcClient is always null | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8129](https://issues.apache.org/jira/browse/YARN-8129) | Improve error message for invalid value in fields attribute | Minor | ATSv2 | Charan Hebri | Abhishek Modi | +| [HDFS-13848](https://issues.apache.org/jira/browse/HDFS-13848) | Refactor NameNode failover proxy providers | Major | ha, hdfs-client | Konstantin Shvachko | Konstantin Shvachko | +| [HDFS-13634](https://issues.apache.org/jira/browse/HDFS-13634) | RBF: Configurable value in xml for async connection request queue size. | Major | federation | CR Hota | CR Hota | +| [YARN-8642](https://issues.apache.org/jira/browse/YARN-8642) | Add support for tmpfs mounts with the Docker runtime | Major | . | Shane Kumpf | Craig Condit | +| [HADOOP-15107](https://issues.apache.org/jira/browse/HADOOP-15107) | Stabilize/tune S3A committers; review correctness & docs | Blocker | fs/s3 | Steve Loughran | Steve Loughran | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-8545](https://issues.apache.org/jira/browse/YARN-8545) | YARN native service should return container if launch failed | Critical | . | Wangda Tan | Chandni Singh | +| [HDFS-13788](https://issues.apache.org/jira/browse/HDFS-13788) | Update EC documentation about rack fault tolerance | Major | documentation, erasure-coding | Xiao Chen | Kitti Nanasi | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.2/RELEASENOTES.3.1.2.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.2/RELEASENOTES.3.1.2.md new file mode 100644 index 00000000000..7bf877624cc --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.2/RELEASENOTES.3.1.2.md @@ -0,0 +1,28 @@ + + +# Apache Hadoop 3.1.2 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-15638](https://issues.apache.org/jira/browse/HADOOP-15638) | *Major* | **KMS Accept Queue Size default changed from 500 to 128 in Hadoop 3.x** + +Restore the KMS accept queue size to 500 in Hadoop 3.x, making it the same as in Hadoop 2.x. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.0/CHANGELOG.3.2.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.0/CHANGELOG.3.2.0.md new file mode 100644 index 00000000000..1f4fcf23f9f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.0/CHANGELOG.3.2.0.md @@ -0,0 +1,881 @@ + + +# Apache Hadoop Changelog + +## Release 3.2.0 - Unreleased (as of 2018-09-02) + +### INCOMPATIBLE CHANGES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-6257](https://issues.apache.org/jira/browse/YARN-6257) | CapacityScheduler REST API produces incorrect JSON - JSON object operationsInfo contains deplicate key | Minor | capacityscheduler | Tao Yang | Tao Yang | +| [HADOOP-15146](https://issues.apache.org/jira/browse/HADOOP-15146) | Remove DataOutputByteBuffer | Minor | common | BELUGA BEHR | BELUGA BEHR | +| [YARN-8191](https://issues.apache.org/jira/browse/YARN-8191) | Fair scheduler: queue deletion without RM restart | Major | fairscheduler | Gergo Repas | Gergo Repas | +| [HADOOP-15495](https://issues.apache.org/jira/browse/HADOOP-15495) | Upgrade common-lang version to 3.7 in hadoop-common-project and hadoop-tools | Major | . | Takanobu Asanuma | Takanobu Asanuma | + + +### IMPORTANT ISSUES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HADOOP-14667](https://issues.apache.org/jira/browse/HADOOP-14667) | Flexible Visual Studio support | Major | build | Allen Wittenauer | Allen Wittenauer | + + +### NEW FEATURES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-5764](https://issues.apache.org/jira/browse/YARN-5764) | NUMA awareness support for launching containers | Major | nodemanager, yarn | Olasoji | Devaraj K | +| [HDFS-13056](https://issues.apache.org/jira/browse/HDFS-13056) | Expose file-level composite CRCs in HDFS which are comparable across different instances/layouts | Major | datanode, distcp, erasure-coding, federation, hdfs | Dennis Huo | Dennis Huo | +| [HDFS-13283](https://issues.apache.org/jira/browse/HDFS-13283) | Percentage based Reserved Space Calculation for DataNode | Major | datanode, hdfs | Lukas Majercak | Lukas Majercak | +| [HDFS-13448](https://issues.apache.org/jira/browse/HDFS-13448) | HDFS Block Placement - Ignore Locality for First Block Replica | Minor | block placement, hdfs-client | BELUGA BEHR | BELUGA BEHR | +| [YARN-7812](https://issues.apache.org/jira/browse/YARN-7812) | Improvements to Rich Placement Constraints in YARN | Major | . | Arun Suresh | | + + +### IMPROVEMENTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-8028](https://issues.apache.org/jira/browse/YARN-8028) | Support authorizeUserAccessToQueue in RMWebServices | Major | . | Wangda Tan | Wangda Tan | +| [HADOOP-15332](https://issues.apache.org/jira/browse/HADOOP-15332) | Fix typos in hadoop-aws markdown docs | Minor | . | Gabor Bota | Gabor Bota | +| [HADOOP-15330](https://issues.apache.org/jira/browse/HADOOP-15330) | Remove jdk1.7 profile from hadoop-annotations module | Minor | . | Akira Ajisaka | fang zhenyi | +| [HADOOP-15295](https://issues.apache.org/jira/browse/HADOOP-15295) | Remove redundant logging related to tags from Configuration | Major | . | Ajay Kumar | Ajay Kumar | +| [HADOOP-15339](https://issues.apache.org/jira/browse/HADOOP-15339) | Support additional key/value propereties in JMX bean registration | Major | common | Elek, Marton | Elek, Marton | +| [YARN-8077](https://issues.apache.org/jira/browse/YARN-8077) | The vmemLimit parameter in ContainersMonitorImpl#isProcessTreeOverLimit is confusing | Trivial | nodemanager | Sen Zhao | Sen Zhao | +| [HDFS-13357](https://issues.apache.org/jira/browse/HDFS-13357) | Improve AclException message "Invalid ACL: only directories may have a default ACL." | Minor | . | Wei-Chiu Chuang | Gabor Bota | +| [HADOOP-15342](https://issues.apache.org/jira/browse/HADOOP-15342) | Update ADLS connector to use the current SDK version (2.2.7) | Major | fs/adl | Atul Sikaria | Atul Sikaria | +| [YARN-8082](https://issues.apache.org/jira/browse/YARN-8082) | Include LocalizedResource size information in the NM download log for localization | Minor | . | Kuhu Shukla | Kuhu Shukla | +| [YARN-1151](https://issues.apache.org/jira/browse/YARN-1151) | Ability to configure auxiliary services from HDFS-based JAR files | Major | nodemanager | john lilley | Xuan Gong | +| [HDFS-13363](https://issues.apache.org/jira/browse/HDFS-13363) | Record file path when FSDirAclOp throws AclException | Minor | . | Wei-Chiu Chuang | Gabor Bota | +| [MAPREDUCE-7069](https://issues.apache.org/jira/browse/MAPREDUCE-7069) | Add ability to specify user environment variables individually | Major | . | Jim Brennan | Jim Brennan | +| [HDFS-13418](https://issues.apache.org/jira/browse/HDFS-13418) | NetworkTopology should be configurable when enable DFSNetworkTopology | Major | . | Tao Jie | Tao Jie | +| [HDFS-13439](https://issues.apache.org/jira/browse/HDFS-13439) | Add test case for read block operation when it is moved | Major | . | Ajay Kumar | Ajay Kumar | +| [HDFS-13462](https://issues.apache.org/jira/browse/HDFS-13462) | Add BIND\_HOST configuration for JournalNode's HTTP and RPC Servers | Major | hdfs, journal-node | Lukas Majercak | Lukas Majercak | +| [HADOOP-15393](https://issues.apache.org/jira/browse/HADOOP-15393) | Upgrade the version of commons-lang3 to 3.7 | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-7966](https://issues.apache.org/jira/browse/YARN-7966) | Remove method AllocationConfiguration#getQueueAcl and related unit tests | Minor | fairscheduler | Yufei Gu | Sen Zhao | +| [YARN-8169](https://issues.apache.org/jira/browse/YARN-8169) | Cleanup RackResolver.java | Minor | yarn | BELUGA BEHR | BELUGA BEHR | +| [YARN-8185](https://issues.apache.org/jira/browse/YARN-8185) | Improve log in DirectoryCollection constructor | Major | nodemanager | Yufei Gu | Yufei Gu | +| [HDFS-13468](https://issues.apache.org/jira/browse/HDFS-13468) | Add erasure coding metrics into ReadStatistics | Major | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | +| [YARN-8140](https://issues.apache.org/jira/browse/YARN-8140) | Improve log message when launch cmd is ran for stopped yarn service | Major | yarn-native-services | Yesha Vora | Eric Yang | +| [MAPREDUCE-7086](https://issues.apache.org/jira/browse/MAPREDUCE-7086) | Add config to allow FileInputFormat to ignore directories when recursive=false | Major | . | Sergey Shelukhin | Sergey Shelukhin | +| [HADOOP-15377](https://issues.apache.org/jira/browse/HADOOP-15377) | Improve debug messages in MetricsConfig.java | Minor | common | BELUGA BEHR | BELUGA BEHR | +| [HADOOP-15382](https://issues.apache.org/jira/browse/HADOOP-15382) | Log kinit output in credential renewal thread | Minor | security | Wei-Chiu Chuang | Gabor Bota | +| [YARN-8163](https://issues.apache.org/jira/browse/YARN-8163) | Add support for Node Labels in opportunistic scheduling. | Major | . | Abhishek Modi | Abhishek Modi | +| [HDFS-5926](https://issues.apache.org/jira/browse/HDFS-5926) | Documentation should clarify dfs.datanode.du.reserved impact from reserved disk capacity | Minor | documentation | Alexander Fahlke | Gabor Bota | +| [MAPREDUCE-7093](https://issues.apache.org/jira/browse/MAPREDUCE-7093) | Use assertEquals instead of assertTrue(a == b) in TestMapReduceJobControlWithMocks | Minor | test | Akira Ajisaka | Abhishek Modi | +| [HDFS-12981](https://issues.apache.org/jira/browse/HDFS-12981) | renameSnapshot a Non-Existent snapshot to itself should throw error | Minor | hdfs | Sailesh Patel | Kitti Nanasi | +| [YARN-8239](https://issues.apache.org/jira/browse/YARN-8239) | [UI2] Clicking on Node Manager UI under AM container info / App Attempt page goes to old RM UI | Major | yarn-ui-v2 | Sumana Sathish | Sunil Govindan | +| [YARN-8260](https://issues.apache.org/jira/browse/YARN-8260) | [UI2] Per-application tracking URL is no longer available in YARN UI2 | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-8201](https://issues.apache.org/jira/browse/YARN-8201) | Skip stacktrace of few exception from ClientRMService | Minor | . | Bibin A Chundatt | Bilwa S T | +| [HADOOP-15354](https://issues.apache.org/jira/browse/HADOOP-15354) | hadoop-aliyun & hadoop-azure modules to mark hadoop-common as provided | Major | build, fs/azure, fs/oss | Steve Loughran | Steve Loughran | +| [YARN-3610](https://issues.apache.org/jira/browse/YARN-3610) | FairScheduler: Add steady-fair-shares to the REST API documentation | Major | documentation, fairscheduler | Karthik Kambatla | Ray Chiang | +| [HADOOP-15441](https://issues.apache.org/jira/browse/HADOOP-15441) | Log kms url and token service at debug level. | Minor | . | Wei-Chiu Chuang | Gabor Bota | +| [HDFS-13544](https://issues.apache.org/jira/browse/HDFS-13544) | Improve logging for JournalNode in federated cluster | Major | federation, hdfs | Hanisha Koneru | Hanisha Koneru | +| [YARN-8249](https://issues.apache.org/jira/browse/YARN-8249) | Few REST api's in RMWebServices are missing static user check | Critical | webapp, yarn | Sunil Govindan | Sunil Govindan | +| [YARN-8123](https://issues.apache.org/jira/browse/YARN-8123) | Skip compiling old hamlet package when the Java version is 10 or upper | Major | webapp | Akira Ajisaka | Dinesh Chitlangia | +| [HDFS-13512](https://issues.apache.org/jira/browse/HDFS-13512) | WebHdfs getFileStatus doesn't return ecPolicy | Major | . | Ajay Kumar | Ajay Kumar | +| [HADOOP-15250](https://issues.apache.org/jira/browse/HADOOP-15250) | Split-DNS MultiHomed Server Network Cluster Network IPC Client Bind Addr Wrong | Critical | ipc, net | Greg Senia | Ajay Kumar | +| [HADOOP-15154](https://issues.apache.org/jira/browse/HADOOP-15154) | Abstract new method assertCapability for StreamCapabilities testing | Minor | test | Xiao Chen | Zsolt Venczel | +| [HADOOP-15457](https://issues.apache.org/jira/browse/HADOOP-15457) | Add Security-Related HTTP Response Header in WEBUIs. | Major | . | Kanwaljeet Sachdev | Kanwaljeet Sachdev | +| [HDFS-13589](https://issues.apache.org/jira/browse/HDFS-13589) | Add dfsAdmin command to query if "upgrade" is finalized | Major | hdfs | Hanisha Koneru | Hanisha Koneru | +| [HADOOP-15486](https://issues.apache.org/jira/browse/HADOOP-15486) | Make NetworkTopology#netLock fair | Major | net | Nanda kumar | Nanda kumar | +| [HDFS-13493](https://issues.apache.org/jira/browse/HDFS-13493) | Reduce the HttpServer2 thread count on DataNodes | Major | datanode | Erik Krogen | Erik Krogen | +| [HDFS-13598](https://issues.apache.org/jira/browse/HDFS-13598) | Reduce unnecessary byte-to-string transform operation in INodesInPath#toString | Minor | . | Yiqun Lin | Gabor Bota | +| [YARN-8213](https://issues.apache.org/jira/browse/YARN-8213) | Add Capacity Scheduler performance metrics | Critical | capacityscheduler, metrics | Weiwei Yang | Weiwei Yang | +| [HADOOP-15477](https://issues.apache.org/jira/browse/HADOOP-15477) | Make unjar in RunJar overrideable | Trivial | . | Johan Gustavsson | Johan Gustavsson | +| [HDFS-13628](https://issues.apache.org/jira/browse/HDFS-13628) | Update Archival Storage doc for Provided Storage | Major | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-15449](https://issues.apache.org/jira/browse/HADOOP-15449) | Increase default timeout of ZK session to avoid frequent NameNode failover | Critical | common | Karthik Palanisamy | Karthik Palanisamy | +| [MAPREDUCE-7098](https://issues.apache.org/jira/browse/MAPREDUCE-7098) | Upgrade common-langs version to 3.7 in hadoop-mapreduce-project | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8333](https://issues.apache.org/jira/browse/YARN-8333) | Load balance YARN services using RegistryDNS multiple A records | Major | yarn-native-services | Eric Yang | Eric Yang | +| [HDFS-13440](https://issues.apache.org/jira/browse/HDFS-13440) | Log HDFS file name when client fails to connect | Trivial | . | Wei-Chiu Chuang | Gabor Bota | +| [HADOOP-14783](https://issues.apache.org/jira/browse/HADOOP-14783) | [KMS] Add missing configuration properties into kms-default.xml | Minor | kms | Wei-Chiu Chuang | Chetna Chaudhari | +| [HDFS-13602](https://issues.apache.org/jira/browse/HDFS-13602) | Add checkOperation(WRITE) checks in FSNamesystem | Major | ha, namenode | Erik Krogen | Chao Sun | +| [HDFS-13155](https://issues.apache.org/jira/browse/HDFS-13155) | BlockPlacementPolicyDefault.chooseTargetInOrder Not Checking Return Value for NULL | Minor | namenode | BELUGA BEHR | Zsolt Venczel | +| [YARN-8389](https://issues.apache.org/jira/browse/YARN-8389) | Improve the description of machine-list property in Federation docs | Major | documentation, federation | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-15507](https://issues.apache.org/jira/browse/HADOOP-15507) | Add MapReduce counters about EC bytes read | Major | . | Xiao Chen | Xiao Chen | +| [HDFS-13511](https://issues.apache.org/jira/browse/HDFS-13511) | Provide specialized exception when block length cannot be obtained | Major | . | Ted Yu | Gabor Bota | +| [HADOOP-15512](https://issues.apache.org/jira/browse/HADOOP-15512) | clean up Shell from JDK7 workarounds | Minor | common | Steve Loughran | Zsolt Venczel | +| [HDFS-13659](https://issues.apache.org/jira/browse/HDFS-13659) | Add more test coverage for contentSummary for snapshottable path | Major | namenode, test | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [YARN-6677](https://issues.apache.org/jira/browse/YARN-6677) | Preempt opportunistic containers when root container cgroup goes over memory limit | Major | nodemanager | Haibo Chen | Haibo Chen | +| [YARN-8400](https://issues.apache.org/jira/browse/YARN-8400) | Fix typos in YARN Federation documentation page | Trivial | . | Bibin A Chundatt | Giovanni Matteo Fumarola | +| [HADOOP-15499](https://issues.apache.org/jira/browse/HADOOP-15499) | Performance severe drop when running RawErasureCoderBenchmark with NativeRSRawErasureCoder | Major | . | Sammi Chen | Sammi Chen | +| [YARN-8322](https://issues.apache.org/jira/browse/YARN-8322) | Change log level when there is an IOException when the allocation file is loaded | Minor | fairscheduler | Haibo Chen | Szilard Nemeth | +| [YARN-8321](https://issues.apache.org/jira/browse/YARN-8321) | AllocationFileLoaderService. getAllocationFile() should be declared as VisibleForTest | Trivial | fairscheduler | Haibo Chen | Szilard Nemeth | +| [HDFS-13653](https://issues.apache.org/jira/browse/HDFS-13653) | Make dfs.client.failover.random.order a per nameservice configuration | Major | federation | Ekanth Sethuramalingam | Ekanth Sethuramalingam | +| [YARN-8363](https://issues.apache.org/jira/browse/YARN-8363) | Upgrade commons-lang version to 3.7 in hadoop-yarn-project | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8325](https://issues.apache.org/jira/browse/YARN-8325) | Miscellaneous QueueManager code clean up | Minor | fairscheduler | Haibo Chen | Szilard Nemeth | +| [YARN-8394](https://issues.apache.org/jira/browse/YARN-8394) | Improve data locality documentation for Capacity Scheduler | Major | . | Weiwei Yang | Weiwei Yang | +| [HDFS-13641](https://issues.apache.org/jira/browse/HDFS-13641) | Add metrics for edit log tailing | Major | metrics | Chao Sun | Chao Sun | +| [HDFS-13582](https://issues.apache.org/jira/browse/HDFS-13582) | Improve backward compatibility for HDFS-13176 (WebHdfs file path gets truncated when having semicolon (;) inside) | Major | . | Zsolt Venczel | Zsolt Venczel | +| [HDFS-13686](https://issues.apache.org/jira/browse/HDFS-13686) | Add overall metrics for FSNamesystemLock | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | +| [HDFS-13621](https://issues.apache.org/jira/browse/HDFS-13621) | Upgrade common-lang version to 3.7 in hadoop-hdfs-project | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [MAPREDUCE-7063](https://issues.apache.org/jira/browse/MAPREDUCE-7063) | Fix log level inconsistency in CombineFileInputFormat.java | Minor | client | BELUGA BEHR | Vidura Bhathiya Mudalige | +| [YARN-8440](https://issues.apache.org/jira/browse/YARN-8440) | Typo in YarnConfiguration javadoc: "Miniumum request grant-able.." | Trivial | . | Szilard Nemeth | Szilard Nemeth | +| [YARN-7449](https://issues.apache.org/jira/browse/YARN-7449) | Split up class TestYarnClient to TestYarnClient and TestYarnClientImpl | Minor | client, yarn | Yufei Gu | Szilard Nemeth | +| [YARN-8442](https://issues.apache.org/jira/browse/YARN-8442) | Strange characters and missing spaces in FairScheduler documentation | Major | . | Szilard Nemeth | Szilard Nemeth | +| [YARN-8441](https://issues.apache.org/jira/browse/YARN-8441) | Typo in CSQueueUtils local variable names: queueGuranteedResource | Trivial | resourcemanager | Szilard Nemeth | Szilard Nemeth | +| [MAPREDUCE-7113](https://issues.apache.org/jira/browse/MAPREDUCE-7113) | Typos in test names in TestTaskAttempt: "testAppDiognostic" | Minor | . | Szilard Nemeth | Szilard Nemeth | +| [HADOOP-15551](https://issues.apache.org/jira/browse/HADOOP-15551) | Avoid use of Java8 streams in Configuration.addTags | Major | performance | Todd Lipcon | Todd Lipcon | +| [HDFS-13692](https://issues.apache.org/jira/browse/HDFS-13692) | StorageInfoDefragmenter floods log when compacting StorageInfo TreeSet | Minor | . | Yiqun Lin | Bharat Viswanadham | +| [YARN-8214](https://issues.apache.org/jira/browse/YARN-8214) | Change default RegistryDNS port | Major | . | Billie Rinaldi | Billie Rinaldi | +| [YARN-8461](https://issues.apache.org/jira/browse/YARN-8461) | Support strict memory control on individual container with elastic control memory mechanism | Major | nodemanager | Haibo Chen | Haibo Chen | +| [HADOOP-14313](https://issues.apache.org/jira/browse/HADOOP-14313) | Replace/improve Hadoop's byte[] comparator | Major | common | Vikas Vishwakarma | Vikas Vishwakarma | +| [HDFS-13703](https://issues.apache.org/jira/browse/HDFS-13703) | Avoid allocation of CorruptedBlocks hashmap when no corrupted blocks are hit | Major | performance | Todd Lipcon | Todd Lipcon | +| [HADOOP-15554](https://issues.apache.org/jira/browse/HADOOP-15554) | Improve JIT performance for Configuration parsing | Minor | conf, performance | Todd Lipcon | Todd Lipcon | +| [HDFS-13536](https://issues.apache.org/jira/browse/HDFS-13536) | [PROVIDED Storage] HA for InMemoryAliasMap | Major | . | Virajith Jalaparti | Virajith Jalaparti | +| [HDFS-13714](https://issues.apache.org/jira/browse/HDFS-13714) | Fix TestNameNodePrunesMissingStorages test failures on Windows | Major | hdfs, namenode, test | Lukas Majercak | Lukas Majercak | +| [HDFS-13712](https://issues.apache.org/jira/browse/HDFS-13712) | BlockReaderRemote.read() logging improvement | Minor | hdfs-client | Gergo Repas | Gergo Repas | +| [YARN-8302](https://issues.apache.org/jira/browse/YARN-8302) | ATS v2 should handle HBase connection issue properly | Major | ATSv2 | Yesha Vora | Billie Rinaldi | +| [HDFS-13719](https://issues.apache.org/jira/browse/HDFS-13719) | Docs around dfs.image.transfer.timeout are misleading | Major | . | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15591](https://issues.apache.org/jira/browse/HADOOP-15591) | KMSClientProvider should log KMS DT acquisition at INFO level | Minor | kms | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15581](https://issues.apache.org/jira/browse/HADOOP-15581) | Set default jetty log level to INFO in KMS | Major | . | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15568](https://issues.apache.org/jira/browse/HADOOP-15568) | fix some typos in the .sh comments | Trivial | bin | Steve Loughran | Steve Loughran | +| [YARN-8502](https://issues.apache.org/jira/browse/YARN-8502) | Use path strings consistently for webservice endpoints in RMWebServices | Major | . | Szilard Nemeth | Szilard Nemeth | +| [HADOOP-15531](https://issues.apache.org/jira/browse/HADOOP-15531) | Use commons-text instead of commons-lang in some classes to fix deprecation warnings | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-15598](https://issues.apache.org/jira/browse/HADOOP-15598) | DataChecksum calculate checksum is contented on hashtable synchronization | Major | common | Prasanth Jayachandran | Prasanth Jayachandran | +| [YARN-8524](https://issues.apache.org/jira/browse/YARN-8524) | Single parameter Resource / LightWeightResource constructor looks confusing | Major | api | Szilard Nemeth | Szilard Nemeth | +| [YARN-8361](https://issues.apache.org/jira/browse/YARN-8361) | Change App Name Placement Rule to use App Name instead of App Id for configuration | Major | yarn | Zian Chen | Zian Chen | +| [HDFS-13690](https://issues.apache.org/jira/browse/HDFS-13690) | Improve error message when creating encryption zone while KMS is unreachable | Minor | encryption, hdfs, kms | Kitti Nanasi | Kitti Nanasi | +| [YARN-8501](https://issues.apache.org/jira/browse/YARN-8501) | Reduce complexity of RMWebServices' getApps method | Major | restapi | Szilard Nemeth | Szilard Nemeth | +| [YARN-7300](https://issues.apache.org/jira/browse/YARN-7300) | DiskValidator is not used in LocalDirAllocator | Major | . | Haibo Chen | Szilard Nemeth | +| [HADOOP-15596](https://issues.apache.org/jira/browse/HADOOP-15596) | Stack trace should not be printed out when running hadoop key commands | Minor | common | Kitti Nanasi | Kitti Nanasi | +| [YARN-7133](https://issues.apache.org/jira/browse/YARN-7133) | Clean up lock-try order in fair scheduler | Major | fairscheduler | Daniel Templeton | Szilard Nemeth | +| [HDFS-13761](https://issues.apache.org/jira/browse/HDFS-13761) | Add toString Method to AclFeature Class | Minor | . | Shweta | Shweta | +| [HADOOP-15609](https://issues.apache.org/jira/browse/HADOOP-15609) | Retry KMS calls when SSLHandshakeException occurs | Major | common, kms | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15612](https://issues.apache.org/jira/browse/HADOOP-15612) | Improve exception when tfile fails to load LzoCodec | Major | . | Gera Shegalov | Gera Shegalov | +| [HDFS-11060](https://issues.apache.org/jira/browse/HDFS-11060) | make DEFAULT\_MAX\_CORRUPT\_FILEBLOCKS\_RETURNED configurable | Minor | hdfs | Lantao Jin | Lantao Jin | +| [HADOOP-15611](https://issues.apache.org/jira/browse/HADOOP-15611) | Log more details for FairCallQueue | Minor | . | Ryan Wu | Ryan Wu | +| [HDFS-13727](https://issues.apache.org/jira/browse/HDFS-13727) | Log full stack trace if DiskBalancer exits with an unhandled exception | Minor | diskbalancer | Stephen O'Donnell | Gabor Bota | +| [YARN-8517](https://issues.apache.org/jira/browse/YARN-8517) | getContainer and getContainers ResourceManager REST API methods are not documented | Major | resourcemanager | Szilard Nemeth | Antal Bálint Steinbach | +| [YARN-8566](https://issues.apache.org/jira/browse/YARN-8566) | Add diagnostic message for unschedulable containers | Major | resourcemanager | Szilard Nemeth | Szilard Nemeth | +| [YARN-8584](https://issues.apache.org/jira/browse/YARN-8584) | Several typos in Log Aggregation related classes | Minor | . | Szilard Nemeth | Szilard Nemeth | +| [YARN-8155](https://issues.apache.org/jira/browse/YARN-8155) | Improve ATSv2 client logging in RM and NM publisher | Major | . | Rohith Sharma K S | Abhishek Modi | +| [HADOOP-15476](https://issues.apache.org/jira/browse/HADOOP-15476) | fix logging for split-dns multihome | Major | . | Ajay Kumar | Ajay Kumar | +| [YARN-7948](https://issues.apache.org/jira/browse/YARN-7948) | Enable fair scheduler to refresh maximum allocation for multiple resource types | Major | fairscheduler | Yufei Gu | Szilard Nemeth | +| [HDFS-13796](https://issues.apache.org/jira/browse/HDFS-13796) | Allow verbosity of InMemoryLevelDBAliasMapServer to be configurable | Trivial | . | Virajith Jalaparti | Virajith Jalaparti | +| [YARN-8626](https://issues.apache.org/jira/browse/YARN-8626) | Create HomePolicyManager that sends all the requests to the home subcluster | Minor | . | Giovanni Matteo Fumarola | Íñigo Goiri | +| [HDFS-13728](https://issues.apache.org/jira/browse/HDFS-13728) | Disk Balancer should not fail if volume usage is greater than capacity | Minor | diskbalancer | Stephen O'Donnell | Stephen O'Donnell | +| [HDFS-13447](https://issues.apache.org/jira/browse/HDFS-13447) | Fix Typos - Node Not Chosen | Trivial | namenode | BELUGA BEHR | BELUGA BEHR | +| [YARN-8601](https://issues.apache.org/jira/browse/YARN-8601) | Print ExecutionType in Container report CLI | Minor | . | Bilwa S T | Bilwa S T | +| [HDFS-13658](https://issues.apache.org/jira/browse/HDFS-13658) | Expose HighestPriorityLowRedundancy blocks statistics | Major | hdfs | Kitti Nanasi | Kitti Nanasi | +| [YARN-8568](https://issues.apache.org/jira/browse/YARN-8568) | Replace the deprecated zk-address property in the HA config example in ResourceManagerHA.md | Minor | yarn | Antal Bálint Steinbach | Antal Bálint Steinbach | +| [HDFS-13735](https://issues.apache.org/jira/browse/HDFS-13735) | Make QJM HTTP URL connection timeout configurable | Minor | qjm | Chao Sun | Chao Sun | +| [YARN-4946](https://issues.apache.org/jira/browse/YARN-4946) | RM should not consider an application as COMPLETED when log aggregation is not in a terminal state | Major | log-aggregation | Robert Kanter | Szilard Nemeth | +| [HDFS-13814](https://issues.apache.org/jira/browse/HDFS-13814) | Remove super user privilege requirement for NameNode.getServiceStatus | Minor | namenode | Chao Sun | Chao Sun | +| [YARN-8559](https://issues.apache.org/jira/browse/YARN-8559) | Expose mutable-conf scheduler's configuration in RM /scheduler-conf endpoint | Major | resourcemanager | Anna Savarin | Weiwei Yang | +| [HDFS-13813](https://issues.apache.org/jira/browse/HDFS-13813) | Exit NameNode if dangling child inode is detected when saving FsImage | Major | hdfs, namenode | Siyao Meng | Siyao Meng | +| [HADOOP-14212](https://issues.apache.org/jira/browse/HADOOP-14212) | Expose SecurityEnabled boolean field in JMX for other services besides NameNode | Minor | . | Ray Burgemeestre | Adam Antal | +| [HDFS-13217](https://issues.apache.org/jira/browse/HDFS-13217) | Audit log all EC policy names during addErasureCodingPolicies | Major | erasure-coding | liaoyuxiangqin | liaoyuxiangqin | +| [HDFS-13732](https://issues.apache.org/jira/browse/HDFS-13732) | ECAdmin should print the policy name when an EC policy is set | Trivial | erasure-coding, tools | Soumyapn | Zsolt Venczel | +| [HDFS-13829](https://issues.apache.org/jira/browse/HDFS-13829) | Remove redundant condition judgement in DirectoryScanner#scan | Minor | datanode | liaoyuxiangqin | liaoyuxiangqin | +| [HDFS-13822](https://issues.apache.org/jira/browse/HDFS-13822) | speedup libhdfs++ build (enable parallel build) | Minor | . | Pradeep Ambati | Allen Wittenauer | +| [HADOOP-9214](https://issues.apache.org/jira/browse/HADOOP-9214) | Create a new touch command to allow modifying atime and mtime | Minor | tools | Brian Burton | Hrishikesh Gadre | +| [YARN-8242](https://issues.apache.org/jira/browse/YARN-8242) | YARN NM: OOM error while reading back the state store on recovery | Critical | yarn | Kanwaljeet Sachdev | Pradeep Ambati | +| [YARN-8683](https://issues.apache.org/jira/browse/YARN-8683) | Support to display pending scheduling requests in RM app attempt page | Major | webapp | Tao Yang | Tao Yang | +| [HDFS-13821](https://issues.apache.org/jira/browse/HDFS-13821) | RBF: Add dfs.federation.router.mount-table.cache.enable so that users can disable cache | Major | hdfs | Fei Hui | Fei Hui | +| [HDFS-13861](https://issues.apache.org/jira/browse/HDFS-13861) | RBF: Illegal Router Admin command leads to printing usage for all commands | Major | . | Ayush Saxena | Ayush Saxena | +| [HDFS-13831](https://issues.apache.org/jira/browse/HDFS-13831) | Make block increment deletion number configurable | Major | . | Yiqun Lin | Ryan Wu | +| [YARN-8051](https://issues.apache.org/jira/browse/YARN-8051) | TestRMEmbeddedElector#testCallbackSynchronization is flakey | Major | test | Robert Kanter | Robert Kanter | + + +### BUG FIXES: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-8040](https://issues.apache.org/jira/browse/YARN-8040) | [UI2] New YARN UI webapp does not respect current pathname for REST api | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [HADOOP-15062](https://issues.apache.org/jira/browse/HADOOP-15062) | TestCryptoStreamsWithOpensslAesCtrCryptoCodec fails on Debian 9 | Major | . | Miklos Szegedi | Miklos Szegedi | +| [HDFS-11043](https://issues.apache.org/jira/browse/HDFS-11043) | TestWebHdfsTimeouts fails | Major | webhdfs | Andrew Wang | Chao Sun | +| [HADOOP-15331](https://issues.apache.org/jira/browse/HADOOP-15331) | Fix a race condition causing parsing error of java.io.BufferedInputStream in class org.apache.hadoop.conf.Configuration | Major | common | Miklos Szegedi | Miklos Szegedi | +| [HDFS-11900](https://issues.apache.org/jira/browse/HDFS-11900) | Hedged reads thread pool creation not synchronized | Major | hdfs-client | John Zhuge | John Zhuge | +| [YARN-8032](https://issues.apache.org/jira/browse/YARN-8032) | Yarn service should expose failuresValidityInterval to users and use it for launching containers | Major | . | Chandni Singh | Chandni Singh | +| [YARN-8043](https://issues.apache.org/jira/browse/YARN-8043) | Add the exception message for failed launches running under LCE | Major | . | Shane Kumpf | Shane Kumpf | +| [MAPREDUCE-6441](https://issues.apache.org/jira/browse/MAPREDUCE-6441) | Improve temporary directory name generation in LocalDistributedCacheManager for concurrent processes | Major | . | William Watson | Haibo Chen | +| [HADOOP-15299](https://issues.apache.org/jira/browse/HADOOP-15299) | Bump Hadoop's Jackson 2 dependency 2.9.x | Major | . | Sean Mackrory | Sean Mackrory | +| [YARN-7734](https://issues.apache.org/jira/browse/YARN-7734) | YARN-5418 breaks TestContainerLogsPage.testContainerLogPageAccess | Major | . | Miklos Szegedi | Tao Yang | +| [HDFS-13087](https://issues.apache.org/jira/browse/HDFS-13087) | Snapshotted encryption zone information should be immutable | Major | encryption | LiXin Ge | LiXin Ge | +| [HADOOP-12862](https://issues.apache.org/jira/browse/HADOOP-12862) | LDAP Group Mapping over SSL can not specify trust store | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HADOOP-15352](https://issues.apache.org/jira/browse/HADOOP-15352) | Fix default local maven repository path in create-release script | Minor | scripts | Elek, Marton | Elek, Marton | +| [HADOOP-15317](https://issues.apache.org/jira/browse/HADOOP-15317) | Improve NetworkTopology chooseRandom's loop | Major | . | Xiao Chen | Xiao Chen | +| [HADOOP-15355](https://issues.apache.org/jira/browse/HADOOP-15355) | TestCommonConfigurationFields is broken by HADOOP-15312 | Major | test | Konstantin Shvachko | LiXin Ge | +| [YARN-7764](https://issues.apache.org/jira/browse/YARN-7764) | Findbugs warning: Resource#getResources may expose internal representation | Major | api | Weiwei Yang | Weiwei Yang | +| [YARN-8115](https://issues.apache.org/jira/browse/YARN-8115) | [UI2] URL data like nodeHTTPAddress must be encoded in UI before using to access NM | Major | yarn-ui-v2 | Sunil Govindan | Sreenath Somarajapuram | +| [HADOOP-14855](https://issues.apache.org/jira/browse/HADOOP-14855) | Hadoop scripts may errantly believe a daemon is still running, preventing it from starting | Major | scripts | Aaron T. Myers | Robert Kanter | +| [HDFS-13350](https://issues.apache.org/jira/browse/HDFS-13350) | Negative legacy block ID will confuse Erasure Coding to be considered as striped block | Major | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu | +| [YARN-8119](https://issues.apache.org/jira/browse/YARN-8119) | [UI2] Timeline Server address' url scheme should be removed while accessing via KNOX | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [HDFS-13176](https://issues.apache.org/jira/browse/HDFS-13176) | WebHdfs file path gets truncated when having semicolon (;) inside | Major | webhdfs | Zsolt Venczel | Zsolt Venczel | +| [YARN-8083](https://issues.apache.org/jira/browse/YARN-8083) | [UI2] All YARN related configurations are paged together in conf page | Major | yarn-ui-v2 | Zoltan Haindrich | Gergely Novák | +| [HDFS-13292](https://issues.apache.org/jira/browse/HDFS-13292) | Crypto command should give proper exception when trying to set key on existing EZ directory | Major | hdfs, kms | Harshakiran Reddy | Ranith Sardar | +| [HADOOP-15366](https://issues.apache.org/jira/browse/HADOOP-15366) | Add a helper shutdown routine in HadoopExecutor to ensure clean shutdown | Minor | . | Shashikant Banerjee | Shashikant Banerjee | +| [YARN-7905](https://issues.apache.org/jira/browse/YARN-7905) | Parent directory permission incorrect during public localization | Critical | . | Bibin A Chundatt | Bilwa S T | +| [HADOOP-15328](https://issues.apache.org/jira/browse/HADOOP-15328) | Fix the typo in HttpAuthentication.md | Minor | common | fang zhenyi | fang zhenyi | +| [HADOOP-15374](https://issues.apache.org/jira/browse/HADOOP-15374) | Add links of the new features of 3.1.0 to the top page | Major | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-7804](https://issues.apache.org/jira/browse/YARN-7804) | Refresh action on Grid view page should not be redirected to graph view | Major | yarn-ui-v2 | Yesha Vora | Gergely Novák | +| [HDFS-13420](https://issues.apache.org/jira/browse/HDFS-13420) | License header is displayed in ArchivalStorage/MemoryStorage html pages | Minor | documentation | Akira Ajisaka | Akira Ajisaka | +| [HDFS-13328](https://issues.apache.org/jira/browse/HDFS-13328) | Abstract ReencryptionHandler recursive logic in separate class. | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HADOOP-15340](https://issues.apache.org/jira/browse/HADOOP-15340) | Provide meaningful RPC server name for RpcMetrics | Major | common | Elek, Marton | Elek, Marton | +| [HADOOP-15357](https://issues.apache.org/jira/browse/HADOOP-15357) | Configuration.getPropsWithPrefix no longer does variable substitution | Major | . | Jim Brennan | Jim Brennan | +| [YARN-7984](https://issues.apache.org/jira/browse/YARN-7984) | Delete registry entries from ZK on ServiceClient stop and clean up stop/destroy behavior | Critical | yarn-native-services | Billie Rinaldi | Billie Rinaldi | +| [YARN-8133](https://issues.apache.org/jira/browse/YARN-8133) | Doc link broken for yarn-service from overview page. | Blocker | yarn-native-services | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8116](https://issues.apache.org/jira/browse/YARN-8116) | Nodemanager fails with NumberFormatException: For input string: "" | Critical | . | Yesha Vora | Chandni Singh | +| [MAPREDUCE-7062](https://issues.apache.org/jira/browse/MAPREDUCE-7062) | Update mapreduce.job.tags description for making use for ATSv2 purpose. | Major | . | Charan Hebri | Charan Hebri | +| [YARN-8073](https://issues.apache.org/jira/browse/YARN-8073) | TimelineClientImpl doesn't honor yarn.timeline-service.versions configuration | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8127](https://issues.apache.org/jira/browse/YARN-8127) | Resource leak when async scheduling is enabled | Critical | . | Weiwei Yang | Tao Yang | +| [HADOOP-12502](https://issues.apache.org/jira/browse/HADOOP-12502) | SetReplication OutOfMemoryError | Major | . | Philipp Schuegerl | Vinayakumar B | +| [HDFS-13427](https://issues.apache.org/jira/browse/HDFS-13427) | Fix the section titles of transparent encryption document | Minor | documentation | Akira Ajisaka | Akira Ajisaka | +| [HDFS-7101](https://issues.apache.org/jira/browse/HDFS-7101) | Potential null dereference in DFSck#doWork() | Minor | . | Ted Yu | skrho | +| [HDFS-13426](https://issues.apache.org/jira/browse/HDFS-13426) | Fix javadoc in FsDatasetAsyncDiskService#removeVolume | Minor | hdfs | Shashikant Banerjee | Shashikant Banerjee | +| [YARN-8120](https://issues.apache.org/jira/browse/YARN-8120) | JVM can crash with SIGSEGV when exiting due to custom leveldb logger | Major | nodemanager, resourcemanager | Jason Lowe | Jason Lowe | +| [YARN-8147](https://issues.apache.org/jira/browse/YARN-8147) | TestClientRMService#testGetApplications sporadically fails | Major | test | Jason Lowe | Jason Lowe | +| [HDFS-13436](https://issues.apache.org/jira/browse/HDFS-13436) | Fix javadoc of package-info.java | Major | documentation | Akira Ajisaka | Akira Ajisaka | +| [HADOOP-15379](https://issues.apache.org/jira/browse/HADOOP-15379) | Make IrqHandler.bind() public | Minor | util | Steve Loughran | Ajay Kumar | +| [YARN-8154](https://issues.apache.org/jira/browse/YARN-8154) | Fix missing titles in PlacementConstraints document | Minor | documentation | Akira Ajisaka | Weiwei Yang | +| [YARN-8153](https://issues.apache.org/jira/browse/YARN-8153) | Guaranteed containers always stay in SCHEDULED on NM after restart | Major | . | Yang Wang | Yang Wang | +| [HADOOP-14970](https://issues.apache.org/jira/browse/HADOOP-14970) | MiniHadoopClusterManager doesn't respect lack of format option | Minor | . | Erik Krogen | Erik Krogen | +| [HDFS-13438](https://issues.apache.org/jira/browse/HDFS-13438) | Fix javadoc in FsVolumeList#removeVolume | Minor | . | Shashikant Banerjee | Shashikant Banerjee | +| [YARN-8142](https://issues.apache.org/jira/browse/YARN-8142) | yarn service application stops when AM is killed with SIGTERM | Major | yarn-native-services | Yesha Vora | Billie Rinaldi | +| [MAPREDUCE-7077](https://issues.apache.org/jira/browse/MAPREDUCE-7077) | Pipe mapreduce job fails with Permission denied for jobTokenPassword | Critical | . | Yesha Vora | Akira Ajisaka | +| [HDFS-13330](https://issues.apache.org/jira/browse/HDFS-13330) | ShortCircuitCache#fetchOrCreate never retries | Major | . | Wei-Chiu Chuang | Gabor Bota | +| [YARN-8156](https://issues.apache.org/jira/browse/YARN-8156) | Increase the default value of yarn.timeline-service.app-collector.linger-period.ms | Major | . | Rohith Sharma K S | Charan Hebri | +| [HADOOP-15369](https://issues.apache.org/jira/browse/HADOOP-15369) | Avoid usage of ${project.version} in parent poms | Major | build | Elek, Marton | Elek, Marton | +| [YARN-8162](https://issues.apache.org/jira/browse/YARN-8162) | Remove Method DirectoryCollection#verifyDirUsingMkdir | Major | nodemanager | Yufei Gu | Yufei Gu | +| [YARN-7773](https://issues.apache.org/jira/browse/YARN-7773) | YARN Federation used Mysql as state store throw exception, Unknown column 'homeSubCluster' in 'field list' | Blocker | federation | Yiran Wu | Yiran Wu | +| [YARN-8165](https://issues.apache.org/jira/browse/YARN-8165) | Incorrect queue name logging in AbstractContainerAllocator | Trivial | capacityscheduler | Weiwei Yang | Weiwei Yang | +| [YARN-8164](https://issues.apache.org/jira/browse/YARN-8164) | Fix a potential NPE in AbstractSchedulerPlanFollower | Major | . | lujie | lujie | +| [YARN-7088](https://issues.apache.org/jira/browse/YARN-7088) | Add application launch time to Resource Manager REST API | Major | . | Abdullah Yousufi | Kanwaljeet Sachdev | +| [YARN-8096](https://issues.apache.org/jira/browse/YARN-8096) | Wrong condition in AmIpFilter#getProxyAddresses() to update the proxy IP list | Major | . | Oleksandr Shevchenko | Oleksandr Shevchenko | +| [HDFS-12828](https://issues.apache.org/jira/browse/HDFS-12828) | OIV ReverseXML Processor fails with escaped characters | Critical | hdfs | Erik Krogen | Erik Krogen | +| [HADOOP-15391](https://issues.apache.org/jira/browse/HADOOP-15391) | Add missing css file in hadoop-aws, hadoop-aliyun, hadoop-azure and hadoop-azure-datalake modules | Major | documentation | Yiqun Lin | Yiqun Lin | +| [YARN-8171](https://issues.apache.org/jira/browse/YARN-8171) | [UI2] AM Node link from attempt page should not redirect to new tab | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-8145](https://issues.apache.org/jira/browse/YARN-8145) | yarn rmadmin -getGroups doesn't return updated groups for user | Major | . | Sumana Sathish | Sunil Govindan | +| [HDFS-13463](https://issues.apache.org/jira/browse/HDFS-13463) | Fix javadoc in FsDatasetImpl#checkAndUpdate | Minor | datanode | Shashikant Banerjee | Shashikant Banerjee | +| [HDFS-13464](https://issues.apache.org/jira/browse/HDFS-13464) | Fix javadoc in FsVolumeList#handleVolumeFailures | Minor | documentation | Shashikant Banerjee | Shashikant Banerjee | +| [HADOOP-15396](https://issues.apache.org/jira/browse/HADOOP-15396) | Some java source files are executable | Minor | . | Akira Ajisaka | Shashikant Banerjee | +| [YARN-6827](https://issues.apache.org/jira/browse/YARN-6827) | [ATS1/1.5] NPE exception while publishing recovering applications into ATS during RM restart. | Major | resourcemanager | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8182](https://issues.apache.org/jira/browse/YARN-8182) | [UI2] Proxy- Clicking on nodes under Nodes HeatMap gives 401 error | Critical | . | Sumana Sathish | Sunil Govindan | +| [YARN-8189](https://issues.apache.org/jira/browse/YARN-8189) | [UI2] Nodes page column headers are half truncated | Major | . | Sunil Govindan | Sunil Govindan | +| [YARN-7830](https://issues.apache.org/jira/browse/YARN-7830) | [UI2] Post selecting grid view in Attempt page, attempt info page should also be opened with grid view | Major | yarn-ui-v2 | Yesha Vora | Gergely Novák | +| [YARN-7786](https://issues.apache.org/jira/browse/YARN-7786) | NullPointerException while launching ApplicationMaster | Major | . | lujie | lujie | +| [HDFS-10183](https://issues.apache.org/jira/browse/HDFS-10183) | Prevent race condition during class initialization | Minor | fs | Pavel Avgustinov | Pavel Avgustinov | +| [HDFS-13055](https://issues.apache.org/jira/browse/HDFS-13055) | Aggregate usage statistics from datanodes | Major | . | Ajay Kumar | Ajay Kumar | +| [HDFS-13388](https://issues.apache.org/jira/browse/HDFS-13388) | RequestHedgingProxyProvider calls multiple configured NNs all the time | Major | hdfs-client | Jinglun | Jinglun | +| [YARN-7956](https://issues.apache.org/jira/browse/YARN-7956) | [UI2] Avoid duplicating Components link under Services/\/Components | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [HDFS-13408](https://issues.apache.org/jira/browse/HDFS-13408) | MiniDFSCluster to support being built on randomized base directory | Major | test | Xiao Liang | Xiao Liang | +| [HDFS-13356](https://issues.apache.org/jira/browse/HDFS-13356) | Balancer:Set default value of minBlockSize to 10mb | Major | balancer & mover | Bharat Viswanadham | Bharat Viswanadham | +| [HADOOP-15390](https://issues.apache.org/jira/browse/HADOOP-15390) | Yarn RM logs flooded by DelegationTokenRenewer trying to renew KMS tokens | Critical | . | Xiao Chen | Xiao Chen | +| [HDFS-13336](https://issues.apache.org/jira/browse/HDFS-13336) | Test cases of TestWriteToReplica failed in windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-7598](https://issues.apache.org/jira/browse/YARN-7598) | Document how to use classpath isolation for aux-services in YARN | Major | . | Xuan Gong | Xuan Gong | +| [YARN-8196](https://issues.apache.org/jira/browse/YARN-8196) | yarn.webapp.api-service.enable should be highlighted in the quickstart | Trivial | documentation | Davide Vergari | Billie Rinaldi | +| [YARN-8183](https://issues.apache.org/jira/browse/YARN-8183) | Fix ConcurrentModificationException inside RMAppAttemptMetrics#convertAtomicLongMaptoLongMap | Critical | yarn | Sumana Sathish | Suma Shivaprasad | +| [HADOOP-15402](https://issues.apache.org/jira/browse/HADOOP-15402) | Prevent double logout of UGI's LoginContext | Major | security | Daryn Sharp | Daryn Sharp | +| [YARN-8188](https://issues.apache.org/jira/browse/YARN-8188) | RM Nodes UI data table index for sorting column need to be corrected post Application tags display | Major | resourcemanager, webapp | Weiwei Yang | Weiwei Yang | +| [HADOOP-15411](https://issues.apache.org/jira/browse/HADOOP-15411) | AuthenticationFilter should use Configuration.getPropsWithPrefix instead of iterator | Critical | . | Suma Shivaprasad | Suma Shivaprasad | +| [MAPREDUCE-7042](https://issues.apache.org/jira/browse/MAPREDUCE-7042) | Killed MR job data does not move to mapreduce.jobhistory.done-dir when ATS v2 is enabled | Major | . | Yesha Vora | Xuan Gong | +| [YARN-8205](https://issues.apache.org/jira/browse/YARN-8205) | Application State is not updated to ATS if AM launching is delayed. | Critical | . | Sumana Sathish | Rohith Sharma K S | +| [YARN-8004](https://issues.apache.org/jira/browse/YARN-8004) | Add unit tests for inter queue preemption for dominant resource calculator | Critical | yarn | Sumana Sathish | Zian Chen | +| [YARN-8208](https://issues.apache.org/jira/browse/YARN-8208) | Add log statement for Docker client configuration file at INFO level | Minor | yarn-native-services | Yesha Vora | Yesha Vora | +| [YARN-8211](https://issues.apache.org/jira/browse/YARN-8211) | Yarn registry dns log finds BufferUnderflowException on port ping | Major | yarn-native-services | Yesha Vora | Eric Yang | +| [MAPREDUCE-7072](https://issues.apache.org/jira/browse/MAPREDUCE-7072) | mapred job -history prints duplicate counter in human output | Major | client | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [YARN-8221](https://issues.apache.org/jira/browse/YARN-8221) | RMWebServices also need to honor yarn.resourcemanager.display.per-user-apps | Major | webapp | Sunil Govindan | Sunil Govindan | +| [YARN-8210](https://issues.apache.org/jira/browse/YARN-8210) | AMRMClient logging on every heartbeat to track updation of AM RM token causes too many log lines to be generated in AM logs | Major | yarn | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-8005](https://issues.apache.org/jira/browse/YARN-8005) | Add unit tests for queue priority with dominant resource calculator | Critical | . | Sumana Sathish | Zian Chen | +| [YARN-8225](https://issues.apache.org/jira/browse/YARN-8225) | YARN precommit build failing in TestPlacementConstraintTransformations | Critical | . | Billie Rinaldi | Shane Kumpf | +| [HDFS-13509](https://issues.apache.org/jira/browse/HDFS-13509) | Bug fix for breakHardlinks() of ReplicaInfo/LocalReplica, and fix TestFileAppend failures on Windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-8187](https://issues.apache.org/jira/browse/YARN-8187) | [UI2] Individual Node page does not contain breadcrumb trail | Critical | yarn-ui-v2 | Sumana Sathish | Zian Chen | +| [YARN-7799](https://issues.apache.org/jira/browse/YARN-7799) | YARN Service dependency follow up work | Critical | client, resourcemanager | Gour Saha | Billie Rinaldi | +| [MAPREDUCE-7073](https://issues.apache.org/jira/browse/MAPREDUCE-7073) | Optimize TokenCache#obtainTokensForNamenodesInternal | Major | . | Bibin A Chundatt | Bibin A Chundatt | +| [HADOOP-15406](https://issues.apache.org/jira/browse/HADOOP-15406) | hadoop-nfs dependencies for mockito and junit are not test scope | Major | nfs | Jason Lowe | Jason Lowe | +| [YARN-6385](https://issues.apache.org/jira/browse/YARN-6385) | Fix checkstyle warnings in TestFileSystemApplicationHistoryStore | Minor | . | Yiqun Lin | Yiqun Lin | +| [YARN-8222](https://issues.apache.org/jira/browse/YARN-8222) | Fix potential NPE when gets RMApp from RM context | Critical | . | Tao Yang | Tao Yang | +| [HADOOP-12071](https://issues.apache.org/jira/browse/HADOOP-12071) | conftest is not documented | Minor | documentation | Kengo Seki | Kengo Seki | +| [YARN-8209](https://issues.apache.org/jira/browse/YARN-8209) | NPE in DeletionService | Critical | . | Chandni Singh | Eric Badger | +| [HDFS-13481](https://issues.apache.org/jira/browse/HDFS-13481) | TestRollingFileSystemSinkWithHdfs#testFlushThread: test failed intermittently | Major | hdfs | Gabor Bota | Gabor Bota | +| [HADOOP-15434](https://issues.apache.org/jira/browse/HADOOP-15434) | Upgrade to ADLS SDK that exposes current timeout | Major | . | Sean Mackrory | Sean Mackrory | +| [YARN-8217](https://issues.apache.org/jira/browse/YARN-8217) | RmAuthenticationFilterInitializer /TimelineAuthenticationFilterInitializer should use Configuration.getPropsWithPrefix instead of iterator | Major | . | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-7818](https://issues.apache.org/jira/browse/YARN-7818) | Remove privileged operation warnings during container launch for the ContainerRuntimes | Major | . | Yesha Vora | Shane Kumpf | +| [YARN-8223](https://issues.apache.org/jira/browse/YARN-8223) | ClassNotFoundException when auxiliary service is loaded from HDFS | Blocker | . | Charan Hebri | Zian Chen | +| [YARN-8079](https://issues.apache.org/jira/browse/YARN-8079) | Support static and archive unmodified local resources in service AM | Critical | . | Wangda Tan | Suma Shivaprasad | +| [YARN-8025](https://issues.apache.org/jira/browse/YARN-8025) | UsersManangers#getComputedResourceLimitForActiveUsers throws NPE due to preComputedActiveUserLimit is empty | Major | yarn | Jiandan Yang | Tao Yang | +| [YARN-8251](https://issues.apache.org/jira/browse/YARN-8251) | [UI2] Clicking on Application link at the header goes to Diagnostics Tab instead of AppAttempt Tab | Major | yarn-ui-v2 | Sumana Sathish | Yesha Vora | +| [YARN-8232](https://issues.apache.org/jira/browse/YARN-8232) | RMContainer lost queue name when RM HA happens | Major | resourcemanager | Hu Ziqian | Hu Ziqian | +| [YARN-7894](https://issues.apache.org/jira/browse/YARN-7894) | Improve ATS response for DS\_CONTAINER when container launch fails | Major | timelineserver | Charan Hebri | Chandni Singh | +| [YARN-8264](https://issues.apache.org/jira/browse/YARN-8264) | [UI2 GPU] GPU Info tab disappears if we click any sub link under List of Applications or List of Containers | Major | . | Sumana Sathish | Sunil Govindan | +| [HDFS-13136](https://issues.apache.org/jira/browse/HDFS-13136) | Avoid taking FSN lock while doing group member lookup for FSD permission check | Major | namenode | Xiaoyu Yao | Xiaoyu Yao | +| [HDFS-13537](https://issues.apache.org/jira/browse/HDFS-13537) | TestHdfsHelper does not generate jceks path properly for relative path in Windows | Major | . | Xiao Liang | Xiao Liang | +| [MAPREDUCE-7095](https://issues.apache.org/jira/browse/MAPREDUCE-7095) | Race conditions in closing FadvisedChunkedFile | Minor | . | Miklos Szegedi | Miklos Szegedi | +| [HADOOP-15446](https://issues.apache.org/jira/browse/HADOOP-15446) | WASB: PageBlobInputStream.skip breaks HBASE replication | Major | fs/azure | Thomas Marquardt | Thomas Marquardt | +| [YARN-7715](https://issues.apache.org/jira/browse/YARN-7715) | Support NM promotion/demotion of running containers. | Major | . | Arun Suresh | Miklos Szegedi | +| [YARN-7003](https://issues.apache.org/jira/browse/YARN-7003) | DRAINING state of queues is not recovered after RM restart | Major | capacityscheduler | Tao Yang | Tao Yang | +| [YARN-8268](https://issues.apache.org/jira/browse/YARN-8268) | Fair scheduler: reservable queue is configured both as parent and leaf queue | Major | fairscheduler | Gergo Repas | Gergo Repas | +| [YARN-8244](https://issues.apache.org/jira/browse/YARN-8244) | TestContainerSchedulerQueuing.testStartMultipleContainers failed | Major | . | Miklos Szegedi | Jim Brennan | +| [YARN-8265](https://issues.apache.org/jira/browse/YARN-8265) | Service AM should retrieve new IP for docker container relaunched by NM | Critical | yarn-native-services | Eric Yang | Billie Rinaldi | +| [YARN-8271](https://issues.apache.org/jira/browse/YARN-8271) | [UI2] Improve labeling of certain tables | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [YARN-8288](https://issues.apache.org/jira/browse/YARN-8288) | Fix wrong number of table columns in Resource Model doc | Major | . | Weiwei Yang | Weiwei Yang | +| [HDFS-13539](https://issues.apache.org/jira/browse/HDFS-13539) | DFSStripedInputStream NPE when reportCheckSumFailure | Major | . | Xiao Chen | Xiao Chen | +| [YARN-8266](https://issues.apache.org/jira/browse/YARN-8266) | [UI2] Clicking on application from cluster view should redirect to application attempt page | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [YARN-8166](https://issues.apache.org/jira/browse/YARN-8166) | [UI2] Service page header links are broken | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [YARN-8236](https://issues.apache.org/jira/browse/YARN-8236) | Invalid kerberos principal file name cause NPE in native service | Critical | yarn-native-services | Sunil Govindan | Gour Saha | +| [YARN-8278](https://issues.apache.org/jira/browse/YARN-8278) | DistributedScheduling is not working in HA | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [HADOOP-15466](https://issues.apache.org/jira/browse/HADOOP-15466) | Correct units in adl.http.timeout | Major | fs/adl | Sean Mackrory | Sean Mackrory | +| [YARN-8300](https://issues.apache.org/jira/browse/YARN-8300) | Fix NPE in DefaultUpgradeComponentsFinder | Major | yarn | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-8071](https://issues.apache.org/jira/browse/YARN-8071) | Add ability to specify nodemanager environment variables individually | Major | yarn | Jim Brennan | Jim Brennan | +| [HDFS-13581](https://issues.apache.org/jira/browse/HDFS-13581) | DN UI logs link is broken when https is enabled | Minor | datanode | Namit Maheshwari | Shashikant Banerjee | +| [MAPREDUCE-7094](https://issues.apache.org/jira/browse/MAPREDUCE-7094) | LocalDistributedCacheManager leaves classloaders open, which leaks FDs | Major | . | Adam Szita | Adam Szita | +| [YARN-8128](https://issues.apache.org/jira/browse/YARN-8128) | Document better the per-node per-app file limit in YARN log aggregation | Major | . | Xuan Gong | Xuan Gong | +| [YARN-8293](https://issues.apache.org/jira/browse/YARN-8293) | In YARN Services UI, "User Name for service" should be completely removed in secure clusters | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-8141](https://issues.apache.org/jira/browse/YARN-8141) | YARN Native Service: Respect YARN\_CONTAINER\_RUNTIME\_DOCKER\_LOCAL\_RESOURCE\_MOUNTS specified in service spec | Critical | yarn-native-services | Wangda Tan | Chandni Singh | +| [YARN-8296](https://issues.apache.org/jira/browse/YARN-8296) | Update YarnServiceApi documentation and yarn service UI code to remove references to unique\_component\_support | Major | yarn-native-services, yarn-ui-v2 | Suma Shivaprasad | Suma Shivaprasad | +| [HDFS-13586](https://issues.apache.org/jira/browse/HDFS-13586) | Fsync fails on directories on Windows | Critical | datanode, hdfs | Lukas Majercak | Lukas Majercak | +| [HDFS-13573](https://issues.apache.org/jira/browse/HDFS-13573) | Javadoc for BlockPlacementPolicyDefault is inaccurate | Trivial | . | Yiqun Lin | Zsolt Venczel | +| [YARN-8248](https://issues.apache.org/jira/browse/YARN-8248) | Job hangs when a job requests a resource that its queue does not have | Major | fairscheduler | Szilard Nemeth | Szilard Nemeth | +| [YARN-8179](https://issues.apache.org/jira/browse/YARN-8179) | Preemption does not happen due to natural\_termination\_factor when DRF is used | Major | . | kyungwan nam | kyungwan nam | +| [HADOOP-15474](https://issues.apache.org/jira/browse/HADOOP-15474) | Rename properties introduced for \ | Major | conf | Nanda kumar | Zsolt Venczel | +| [HADOOP-15450](https://issues.apache.org/jira/browse/HADOOP-15450) | Avoid fsync storm triggered by DiskChecker and handle disk full situation | Blocker | . | Kihwal Lee | Arpit Agarwal | +| [YARN-8290](https://issues.apache.org/jira/browse/YARN-8290) | SystemMetricsPublisher.appACLsUpdated should be invoked after application information is published to ATS to avoid "User is not set in the application report" Exception | Critical | . | Yesha Vora | Eric Yang | +| [YARN-8332](https://issues.apache.org/jira/browse/YARN-8332) | Incorrect min/max allocation property name in resource types doc | Critical | documentation | Weiwei Yang | Weiwei Yang | +| [YARN-8273](https://issues.apache.org/jira/browse/YARN-8273) | Log aggregation does not warn if HDFS quota in target directory is exceeded | Major | log-aggregation | Gergo Repas | Gergo Repas | +| [HDFS-13601](https://issues.apache.org/jira/browse/HDFS-13601) | Optimize ByteString conversions in PBHelper | Major | . | Andrew Wang | Andrew Wang | +| [HDFS-13540](https://issues.apache.org/jira/browse/HDFS-13540) | DFSStripedInputStream should only allocate new buffers when reading | Major | . | Xiao Chen | Xiao Chen | +| [YARN-8297](https://issues.apache.org/jira/browse/YARN-8297) | Incorrect ATS Url used for Wire encrypted cluster | Blocker | yarn-ui-v2 | Yesha Vora | Sunil Govindan | +| [HDFS-13588](https://issues.apache.org/jira/browse/HDFS-13588) | Fix TestFsDatasetImpl test failures on Windows | Major | . | Xiao Liang | Xiao Liang | +| [YARN-8310](https://issues.apache.org/jira/browse/YARN-8310) | Handle old NMTokenIdentifier, AMRMTokenIdentifier, and ContainerTokenIdentifier formats | Major | . | Robert Kanter | Robert Kanter | +| [YARN-8336](https://issues.apache.org/jira/browse/YARN-8336) | Fix potential connection leak in SchedConfCLI and YarnWebServiceUtils | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8344](https://issues.apache.org/jira/browse/YARN-8344) | Missing nm.stop() in TestNodeManagerResync to fix testKillContainersOnResync | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8348](https://issues.apache.org/jira/browse/YARN-8348) | Incorrect and missing AfterClass in HBase-tests to fix NPE failures | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8327](https://issues.apache.org/jira/browse/YARN-8327) | Fix TestAggregatedLogFormat#testReadAcontainerLogs1 on Windows | Major | log-aggregation | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8346](https://issues.apache.org/jira/browse/YARN-8346) | Upgrading to 3.1 kills running containers with error "Opportunistic container queue is full" | Blocker | . | Rohith Sharma K S | Jason Lowe | +| [HDFS-13611](https://issues.apache.org/jira/browse/HDFS-13611) | Unsafe use of Text as a ConcurrentHashMap key in PBHelperClient | Major | . | Andrew Wang | Andrew Wang | +| [YARN-8316](https://issues.apache.org/jira/browse/YARN-8316) | Diagnostic message should improve when yarn service fails to launch due to ATS unavailability | Major | yarn-native-services | Yesha Vora | Billie Rinaldi | +| [YARN-8357](https://issues.apache.org/jira/browse/YARN-8357) | Yarn Service: NPE when service is saved first and then started. | Critical | . | Chandni Singh | Chandni Singh | +| [HDFS-13618](https://issues.apache.org/jira/browse/HDFS-13618) | Fix TestDataNodeFaultInjector test failures on Windows | Major | test | Xiao Liang | Xiao Liang | +| [HADOOP-15473](https://issues.apache.org/jira/browse/HADOOP-15473) | Configure serialFilter in KeyProvider to avoid UnrecoverableKeyException caused by JDK-8189997 | Critical | kms | Gabor Bota | Gabor Bota | +| [YARN-8292](https://issues.apache.org/jira/browse/YARN-8292) | Fix the dominant resource preemption cannot happen when some of the resource vector becomes negative | Critical | yarn | Sumana Sathish | Wangda Tan | +| [HADOOP-15455](https://issues.apache.org/jira/browse/HADOOP-15455) | Incorrect debug message in KMSACL#hasAccess | Trivial | . | Wei-Chiu Chuang | Yuen-Kuei Hsueh | +| [YARN-8338](https://issues.apache.org/jira/browse/YARN-8338) | TimelineService V1.5 doesn't come up after HADOOP-15406 | Critical | . | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | +| [YARN-8339](https://issues.apache.org/jira/browse/YARN-8339) | Service AM should localize static/archive resource types to container working directory instead of 'resources' | Critical | yarn-native-services | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-8369](https://issues.apache.org/jira/browse/YARN-8369) | Javadoc build failed due to "bad use of '\>'" | Critical | build, docs | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8362](https://issues.apache.org/jira/browse/YARN-8362) | Number of remaining retries are updated twice after a container failure in NM | Critical | . | Chandni Singh | Chandni Singh | +| [HDFS-13626](https://issues.apache.org/jira/browse/HDFS-13626) | Fix incorrect username when deny the setOwner operation | Minor | namenode | luhuachao | Zsolt Venczel | +| [YARN-8377](https://issues.apache.org/jira/browse/YARN-8377) | Javadoc build failed in hadoop-yarn-server-nodemanager | Critical | build, docs | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8368](https://issues.apache.org/jira/browse/YARN-8368) | yarn app start cli should print applicationId | Critical | . | Yesha Vora | Rohith Sharma K S | +| [YARN-8350](https://issues.apache.org/jira/browse/YARN-8350) | NPE in service AM related to placement policy | Critical | yarn-native-services | Billie Rinaldi | Gour Saha | +| [YARN-8367](https://issues.apache.org/jira/browse/YARN-8367) | Fix NPE in SingleConstraintAppPlacementAllocator when placement constraint in SchedulingRequest is null | Major | scheduler | Gour Saha | Weiwei Yang | +| [HADOOP-15490](https://issues.apache.org/jira/browse/HADOOP-15490) | Multiple declaration of maven-enforcer-plugin found in pom.xml | Minor | . | Nanda kumar | Nanda kumar | +| [HDFS-13646](https://issues.apache.org/jira/browse/HDFS-13646) | DFSAdmin doesn't display specialized help for triggerBlockReport | Major | tools | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8197](https://issues.apache.org/jira/browse/YARN-8197) | Tracking URL in the app state does not get redirected to MR ApplicationMaster for Running applications | Critical | yarn | Sumana Sathish | Sunil Govindan | +| [YARN-8308](https://issues.apache.org/jira/browse/YARN-8308) | Yarn service app fails due to issues with Renew Token | Major | yarn-native-services | Yesha Vora | Gour Saha | +| [YARN-7340](https://issues.apache.org/jira/browse/YARN-7340) | Fix the missing time stamp in exception message in Class NoOverCommitPolicy | Minor | reservation system | Yufei Gu | Dinesh Chitlangia | +| [HDFS-13636](https://issues.apache.org/jira/browse/HDFS-13636) | Cross-Site Scripting vulnerability in HttpServer2 | Major | . | Haibo Yan | Haibo Yan | +| [YARN-7962](https://issues.apache.org/jira/browse/YARN-7962) | Race Condition When Stopping DelegationTokenRenewer causes RM crash during failover | Critical | resourcemanager | BELUGA BEHR | BELUGA BEHR | +| [YARN-8372](https://issues.apache.org/jira/browse/YARN-8372) | Distributed shell app master should not release containers when shutdown if keep-container is true | Critical | distributed-shell | Charan Hebri | Suma Shivaprasad | +| [YARN-8375](https://issues.apache.org/jira/browse/YARN-8375) | TestCGroupElasticMemoryController fails surefire build | Major | . | Jason Lowe | Miklos Szegedi | +| [YARN-8319](https://issues.apache.org/jira/browse/YARN-8319) | More YARN pages need to honor yarn.resourcemanager.display.per-user-apps | Major | webapp | Vinod Kumar Vavilapalli | Sunil Govindan | +| [MAPREDUCE-7097](https://issues.apache.org/jira/browse/MAPREDUCE-7097) | MapReduce JHS should honor yarn.webapp.filter-entity-list-by-user | Major | . | Vinod Kumar Vavilapalli | Sunil Govindan | +| [YARN-8276](https://issues.apache.org/jira/browse/YARN-8276) | [UI2] After version field became mandatory, form-based submission of new YARN service doesn't work | Critical | yarn-ui-v2 | Gergely Novák | Gergely Novák | +| [HDFS-13339](https://issues.apache.org/jira/browse/HDFS-13339) | Volume reference can't be released and may lead to deadlock when DataXceiver does a check volume | Critical | datanode | liaoyuxiangqin | Zsolt Venczel | +| [YARN-8390](https://issues.apache.org/jira/browse/YARN-8390) | Fix API incompatible changes in FairScheduler's AllocationFileLoaderService | Major | fairscheduler | Gergo Repas | Gergo Repas | +| [YARN-8382](https://issues.apache.org/jira/browse/YARN-8382) | cgroup file leak in NM | Major | nodemanager | Hu Ziqian | Hu Ziqian | +| [YARN-8365](https://issues.apache.org/jira/browse/YARN-8365) | Revisit the record type used by Registry DNS for upstream resolution | Major | yarn-native-services | Shane Kumpf | Shane Kumpf | +| [HDFS-13545](https://issues.apache.org/jira/browse/HDFS-13545) | "guarded" is misspelled as "gaurded" in FSPermissionChecker.java | Trivial | documentation | Jianchao Jia | Jianchao Jia | +| [YARN-8396](https://issues.apache.org/jira/browse/YARN-8396) | Click on an individual container continuously spins and doesn't load the page | Blocker | . | Charan Hebri | Sunil Govindan | +| [MAPREDUCE-7103](https://issues.apache.org/jira/browse/MAPREDUCE-7103) | Fix TestHistoryViewerPrinter on windows due to a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15217](https://issues.apache.org/jira/browse/HADOOP-15217) | FsUrlConnection does not handle paths with spaces | Major | fs | Joseph Fourny | Zsolt Venczel | +| [HDFS-12950](https://issues.apache.org/jira/browse/HDFS-12950) | [oiv] ls will fail in secure cluster | Major | . | Brahma Reddy Battula | Wei-Chiu Chuang | +| [YARN-8386](https://issues.apache.org/jira/browse/YARN-8386) | App log can not be viewed from Logs tab in secure cluster | Critical | yarn-ui-v2 | Yesha Vora | Sunil Govindan | +| [HDFS-13615](https://issues.apache.org/jira/browse/HDFS-13615) | libhdfs++ SaslProtocol hanging while accessing invalid lock | Major | . | Mitchell Tracy | Mitchell Tracy | +| [YARN-8359](https://issues.apache.org/jira/browse/YARN-8359) | Exclude containermanager.linux test classes on Windows | Major | . | Giovanni Matteo Fumarola | Jason Lowe | +| [HDFS-13642](https://issues.apache.org/jira/browse/HDFS-13642) | Creating a file with block size smaller than EC policy's cell size should fail | Major | erasure-coding | Xiao Chen | Xiao Chen | +| [HDFS-13664](https://issues.apache.org/jira/browse/HDFS-13664) | Refactor ConfiguredFailoverProxyProvider to make inheritance easier | Minor | hdfs-client | Chao Sun | Chao Sun | +| [HDFS-12670](https://issues.apache.org/jira/browse/HDFS-12670) | can't renew HDFS tokens with only the hdfs client jar | Critical | . | Thomas Graves | Arpit Agarwal | +| [HDFS-13667](https://issues.apache.org/jira/browse/HDFS-13667) | Typo: Marking all "datandoes" as stale | Trivial | namenode | Wei-Chiu Chuang | Nanda kumar | +| [YARN-8323](https://issues.apache.org/jira/browse/YARN-8323) | FairScheduler.allocConf should be declared as volatile | Major | fairscheduler | Haibo Chen | Szilard Nemeth | +| [YARN-8413](https://issues.apache.org/jira/browse/YARN-8413) | Flow activity page is failing with "Timeline server failed with an error" | Major | yarn-ui-v2 | Yesha Vora | Sunil Govindan | +| [YARN-8405](https://issues.apache.org/jira/browse/YARN-8405) | RM zk-state-store.parent-path ACLs has been changed since HADOOP-14773 | Major | . | Rohith Sharma K S | Íñigo Goiri | +| [YARN-8419](https://issues.apache.org/jira/browse/YARN-8419) | [UI2] User cannot submit a new service as submit button is always disabled | Major | . | Suma Shivaprasad | Suma Shivaprasad | +| [HADOOP-15307](https://issues.apache.org/jira/browse/HADOOP-15307) | NFS: flavor AUTH\_SYS should use VerifierNone | Major | nfs | Wei-Chiu Chuang | Gabor Bota | +| [MAPREDUCE-7108](https://issues.apache.org/jira/browse/MAPREDUCE-7108) | TestFileOutputCommitter fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [MAPREDUCE-7101](https://issues.apache.org/jira/browse/MAPREDUCE-7101) | Add config parameter to allow JHS to alway scan user dir irrespective of modTime | Critical | . | Wangda Tan | Thomas Marquardt | +| [HADOOP-15527](https://issues.apache.org/jira/browse/HADOOP-15527) | loop until TIMEOUT before sending kill -9 | Major | . | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | +| [YARN-8404](https://issues.apache.org/jira/browse/YARN-8404) | Timeline event publish need to be async to avoid Dispatcher thread leak in case ATS is down | Blocker | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8410](https://issues.apache.org/jira/browse/YARN-8410) | Registry DNS lookup fails to return for CNAMEs | Major | yarn-native-services | Shane Kumpf | Shane Kumpf | +| [YARN-8426](https://issues.apache.org/jira/browse/YARN-8426) | Upgrade jquery-ui to 1.12.1 in YARN | Major | webapp | Sunil Govindan | Sunil Govindan | +| [HDFS-13675](https://issues.apache.org/jira/browse/HDFS-13675) | Speed up TestDFSAdminWithHA | Major | hdfs, namenode | Lukas Majercak | Lukas Majercak | +| [HDFS-13679](https://issues.apache.org/jira/browse/HDFS-13679) | Fix Typo in javadoc for ScanInfoPerBlockPool#addAll | Minor | . | Shashikant Banerjee | Shashikant Banerjee | +| [HDFS-13673](https://issues.apache.org/jira/browse/HDFS-13673) | TestNameNodeMetrics fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HDFS-13676](https://issues.apache.org/jira/browse/HDFS-13676) | TestEditLogRace fails on Windows | Minor | test | Zuoming Zhang | Zuoming Zhang | +| [HDFS-13174](https://issues.apache.org/jira/browse/HDFS-13174) | hdfs mover -p /path times out after 20 min | Major | balancer & mover | Istvan Fajth | Istvan Fajth | +| [HADOOP-15504](https://issues.apache.org/jira/browse/HADOOP-15504) | Upgrade Maven and Maven Wagon versions | Major | build | Sean Mackrory | Sean Mackrory | +| [HADOOP-15523](https://issues.apache.org/jira/browse/HADOOP-15523) | Shell command timeout given is in seconds whereas it is taken as millisec while scheduling | Major | . | Bilwa S T | Bilwa S T | +| [YARN-8437](https://issues.apache.org/jira/browse/YARN-8437) | Build oom-listener fails on older versions | Major | . | Miklos Szegedi | Miklos Szegedi | +| [YARN-8391](https://issues.apache.org/jira/browse/YARN-8391) | Investigate AllocationFileLoaderService.reloadListener locking issue | Critical | fairscheduler | Haibo Chen | Szilard Nemeth | +| [HDFS-13682](https://issues.apache.org/jira/browse/HDFS-13682) | Cannot create encryption zone after KMS auth token expires | Critical | encryption, kms, namenode | Xiao Chen | Xiao Chen | +| [HADOOP-15549](https://issues.apache.org/jira/browse/HADOOP-15549) | Upgrade to commons-configuration 2.1 regresses task CPU consumption | Major | metrics | Todd Lipcon | Todd Lipcon | +| [YARN-8444](https://issues.apache.org/jira/browse/YARN-8444) | NodeResourceMonitor crashes on bad swapFree value | Major | . | Jim Brennan | Jim Brennan | +| [MAPREDUCE-7114](https://issues.apache.org/jira/browse/MAPREDUCE-7114) | Make FrameworkUploader symlink ignore improvement | Major | . | Gergo Repas | Gergo Repas | +| [YARN-8184](https://issues.apache.org/jira/browse/YARN-8184) | Too many metrics if containerLocalizer/ResourceLocalizationService uses ReadWriteDiskValidator | Major | nodemanager | Yufei Gu | Yufei Gu | +| [YARN-8326](https://issues.apache.org/jira/browse/YARN-8326) | Yarn 3.0 seems runs slower than Yarn 2.6 | Major | yarn | Hsin-Liang Huang | Shane Kumpf | +| [YARN-8443](https://issues.apache.org/jira/browse/YARN-8443) | Total #VCores in cluster metrics is wrong when CapacityScheduler reserved some containers | Major | webapp | Tao Yang | Tao Yang | +| [YARN-8457](https://issues.apache.org/jira/browse/YARN-8457) | Compilation is broken with -Pyarn-ui | Major | webapp | Sunil Govindan | Sunil Govindan | +| [HADOOP-15550](https://issues.apache.org/jira/browse/HADOOP-15550) | Avoid static initialization of ObjectMappers | Minor | performance | Todd Lipcon | Todd Lipcon | +| [YARN-8438](https://issues.apache.org/jira/browse/YARN-8438) | TestContainer.testKillOnNew flaky on trunk | Major | nodemanager | Szilard Nemeth | Szilard Nemeth | +| [YARN-8464](https://issues.apache.org/jira/browse/YARN-8464) | Async scheduling thread could be interrupted when there are no NodeManagers in cluster | Blocker | capacity scheduler | Charan Hebri | Sunil Govindan | +| [YARN-8423](https://issues.apache.org/jira/browse/YARN-8423) | GPU does not get released even though the application gets killed. | Critical | yarn | Sumana Sathish | Sunil Govindan | +| [YARN-8401](https://issues.apache.org/jira/browse/YARN-8401) | [UI2] new ui is not accessible with out internet connection | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [HDFS-13705](https://issues.apache.org/jira/browse/HDFS-13705) | The native ISA-L library loading failure should be made warning rather than an error message | Minor | erasure-coding | Nilotpal Nandi | Shashikant Banerjee | +| [YARN-8409](https://issues.apache.org/jira/browse/YARN-8409) | ActiveStandbyElectorBasedElectorService is failing with NPE | Major | . | Yesha Vora | Chandni Singh | +| [YARN-8379](https://issues.apache.org/jira/browse/YARN-8379) | Improve balancing resources in already satisfied queues by using Capacity Scheduler preemption | Major | . | Wangda Tan | Zian Chen | +| [YARN-8455](https://issues.apache.org/jira/browse/YARN-8455) | Add basic ACL check for all ATS v2 REST APIs | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8469](https://issues.apache.org/jira/browse/YARN-8469) | [UI2] URL needs to be trimmed to handle index.html redirection while accessing via knox | Major | yarn-ui-v2 | Sunil Govindan | Sunil Govindan | +| [YARN-8451](https://issues.apache.org/jira/browse/YARN-8451) | Multiple NM heartbeat thread created when a slow NM resync with RM | Major | nodemanager | Botong Huang | Botong Huang | +| [HADOOP-15548](https://issues.apache.org/jira/browse/HADOOP-15548) | Randomize local dirs | Minor | . | Jim Brennan | Jim Brennan | +| [HDFS-13707](https://issues.apache.org/jira/browse/HDFS-13707) | [PROVIDED Storage] Fix failing integration tests in ITestProvidedImplementation | Major | . | Virajith Jalaparti | Virajith Jalaparti | +| [HADOOP-15574](https://issues.apache.org/jira/browse/HADOOP-15574) | Suppress build error if there are no docs after excluding private annotations | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13702](https://issues.apache.org/jira/browse/HDFS-13702) | Remove HTrace hooks from DFSClient to reduce CPU usage | Major | performance | Todd Lipcon | Todd Lipcon | +| [HDFS-13635](https://issues.apache.org/jira/browse/HDFS-13635) | Incorrect message when block is not found | Major | datanode | Wei-Chiu Chuang | Gabor Bota | +| [YARN-8415](https://issues.apache.org/jira/browse/YARN-8415) | TimelineWebServices.getEntity should throw ForbiddenException instead of 404 when ACL checks fail | Major | . | Sumana Sathish | Suma Shivaprasad | +| [HDFS-13715](https://issues.apache.org/jira/browse/HDFS-13715) | diskbalancer does not work if one of the blockpools are empty on a Federated cluster | Major | diskbalancer | Namit Maheshwari | Bharat Viswanadham | +| [YARN-8459](https://issues.apache.org/jira/browse/YARN-8459) | Improve Capacity Scheduler logs to debug invalid states | Major | capacity scheduler | Wangda Tan | Wangda Tan | +| [YARN-7451](https://issues.apache.org/jira/browse/YARN-7451) | Add missing tests to verify the presence of custom resources of RM apps and scheduler webservice endpoints | Major | resourcemanager, restapi | Grant Sohn | Szilard Nemeth | +| [YARN-8435](https://issues.apache.org/jira/browse/YARN-8435) | Fix NPE when the same client simultaneously contact for the first time Yarn Router | Critical | router | rangjiaheng | rangjiaheng | +| [HADOOP-15571](https://issues.apache.org/jira/browse/HADOOP-15571) | Multiple FileContexts created with the same configuration object should be allowed to have different umask | Critical | . | Vinod Kumar Vavilapalli | Vinod Kumar Vavilapalli | +| [YARN-8492](https://issues.apache.org/jira/browse/YARN-8492) | ATSv2 HBase tests are failing with ClassNotFoundException | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-13121](https://issues.apache.org/jira/browse/HDFS-13121) | NPE when request file descriptors when SC read | Minor | hdfs-client | Gang Xie | Zsolt Venczel | +| [HDFS-13721](https://issues.apache.org/jira/browse/HDFS-13721) | NPE in DataNode due to uninitialized DiskBalancer | Major | datanode, diskbalancer | Xiao Chen | Xiao Chen | +| [YARN-6265](https://issues.apache.org/jira/browse/YARN-6265) | yarn.resourcemanager.fail-fast is used inconsistently | Major | resourcemanager | Daniel Templeton | Yuanbo Liu | +| [HDFS-13722](https://issues.apache.org/jira/browse/HDFS-13722) | HDFS Native Client Fails Compilation on Ubuntu 18.04 | Minor | . | Jack Bearden | Jack Bearden | +| [YARN-8473](https://issues.apache.org/jira/browse/YARN-8473) | Containers being launched as app tears down can leave containers in NEW state | Major | nodemanager | Jason Lowe | Jason Lowe | +| [YARN-8512](https://issues.apache.org/jira/browse/YARN-8512) | ATSv2 entities are not published to HBase from second attempt onwards | Major | . | Yesha Vora | Rohith Sharma K S | +| [YARN-8491](https://issues.apache.org/jira/browse/YARN-8491) | TestServiceCLI#testEnableFastLaunch fail when umask is 077 | Major | . | K G Bakthavachalam | K G Bakthavachalam | +| [HADOOP-15594](https://issues.apache.org/jira/browse/HADOOP-15594) | Exclude commons-lang3 from hadoop-client-minicluster | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13723](https://issues.apache.org/jira/browse/HDFS-13723) | Occasional "Should be different group" error in TestRefreshUserMappings#testGroupMappingRefresh | Major | security, test | Siyao Meng | Siyao Meng | +| [HDFS-12837](https://issues.apache.org/jira/browse/HDFS-12837) | Intermittent failure in TestReencryptionWithKMS | Major | encryption, test | Surendra Singh Lilhore | Xiao Chen | +| [HADOOP-15316](https://issues.apache.org/jira/browse/HADOOP-15316) | GenericTestUtils can exceed maxSleepTime | Trivial | . | Sean Mackrory | Adam Antal | +| [HDFS-13729](https://issues.apache.org/jira/browse/HDFS-13729) | Fix broken links to RBF documentation | Minor | documentation | jwhitter | Gabor Bota | +| [YARN-8518](https://issues.apache.org/jira/browse/YARN-8518) | test-container-executor test\_is\_empty() is broken | Major | . | Jim Brennan | Jim Brennan | +| [HDFS-13663](https://issues.apache.org/jira/browse/HDFS-13663) | Should throw exception when incorrect block size is set | Major | . | Yongjun Zhang | Shweta | +| [YARN-8515](https://issues.apache.org/jira/browse/YARN-8515) | container-executor can crash with SIGPIPE after nodemanager restart | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8421](https://issues.apache.org/jira/browse/YARN-8421) | when moving app, activeUsers is increased, even though app does not have outstanding request | Major | . | kyungwan nam | | +| [YARN-8511](https://issues.apache.org/jira/browse/YARN-8511) | When AM releases a container, RM removes allocation tags before it is released by NM | Major | capacity scheduler | Weiwei Yang | Weiwei Yang | +| [HDFS-13524](https://issues.apache.org/jira/browse/HDFS-13524) | Occasional "All datanodes are bad" error in TestLargeBlock#testLargeBlockSize | Major | . | Wei-Chiu Chuang | Siyao Meng | +| [YARN-8538](https://issues.apache.org/jira/browse/YARN-8538) | Fix valgrind leak check on container executor | Major | . | Billie Rinaldi | Billie Rinaldi | +| [HDFS-13485](https://issues.apache.org/jira/browse/HDFS-13485) | DataNode WebHDFS endpoint throws NPE | Minor | datanode, webhdfs | Wei-Chiu Chuang | Siyao Meng | +| [HADOOP-15610](https://issues.apache.org/jira/browse/HADOOP-15610) | Hadoop Docker Image Pip Install Fails | Critical | . | Jack Bearden | Jack Bearden | +| [HADOOP-15614](https://issues.apache.org/jira/browse/HADOOP-15614) | TestGroupsCaching.testExceptionOnBackgroundRefreshHandled reliably fails | Major | . | Kihwal Lee | Weiwei Yang | +| [YARN-8436](https://issues.apache.org/jira/browse/YARN-8436) | FSParentQueue: Comparison method violates its general contract | Minor | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [MAPREDUCE-7118](https://issues.apache.org/jira/browse/MAPREDUCE-7118) | Distributed cache conflicts breaks backwards compatability | Blocker | mrv2 | Jason Lowe | Jason Lowe | +| [YARN-8528](https://issues.apache.org/jira/browse/YARN-8528) | Final states in ContainerAllocation might be modified externally causing unexpected allocation results | Major | capacity scheduler | Xintong Song | Xintong Song | +| [YARN-6964](https://issues.apache.org/jira/browse/YARN-6964) | Fair scheduler misuses Resources operations | Major | fairscheduler | Daniel Templeton | Szilard Nemeth | +| [YARN-8360](https://issues.apache.org/jira/browse/YARN-8360) | Yarn service conflict between restart policy and NM configuration | Critical | yarn | Chandni Singh | Suma Shivaprasad | +| [YARN-8380](https://issues.apache.org/jira/browse/YARN-8380) | Support bind propagation options for mounts in docker runtime | Major | . | Billie Rinaldi | Billie Rinaldi | +| [YARN-8544](https://issues.apache.org/jira/browse/YARN-8544) | [DS] AM registration fails when hadoop authorization is enabled | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8548](https://issues.apache.org/jira/browse/YARN-8548) | AllocationRespose proto setNMToken initBuilder not done | Major | . | Bibin A Chundatt | Bilwa S T | +| [YARN-7748](https://issues.apache.org/jira/browse/YARN-7748) | TestContainerResizing.testIncreaseContainerUnreservedWhenApplicationCompleted fails due to multiple container fail events | Major | capacityscheduler | Haibo Chen | Weiwei Yang | +| [YARN-8541](https://issues.apache.org/jira/browse/YARN-8541) | RM startup failure on recovery after user deletion | Blocker | resourcemanager | yimeng | Bibin A Chundatt | +| [YARN-8577](https://issues.apache.org/jira/browse/YARN-8577) | Fix the broken anchor in SLS site-doc | Minor | documentation | Weiwei Yang | Weiwei Yang | +| [HADOOP-15395](https://issues.apache.org/jira/browse/HADOOP-15395) | DefaultImpersonationProvider fails to parse proxy user config if username has . in it | Major | . | Ajay Kumar | Ajay Kumar | +| [YARN-4606](https://issues.apache.org/jira/browse/YARN-4606) | CapacityScheduler: applications could get starved because computation of #activeUsers considers pending apps | Critical | capacity scheduler, capacityscheduler | Karam Singh | Manikandan R | +| [YARN-8330](https://issues.apache.org/jira/browse/YARN-8330) | Avoid publishing reserved container to ATS from RM | Critical | yarn-native-services | Yesha Vora | Suma Shivaprasad | +| [HDFS-13622](https://issues.apache.org/jira/browse/HDFS-13622) | mkdir should print the parent directory in the error message when parent directories do not exist | Major | . | Zoltan Haindrich | Shweta | +| [HADOOP-15593](https://issues.apache.org/jira/browse/HADOOP-15593) | UserGroupInformation TGT renewer throws NPE | Blocker | security | Wei-Chiu Chuang | Gabor Bota | +| [YARN-8429](https://issues.apache.org/jira/browse/YARN-8429) | Improve diagnostic message when artifact is not set properly | Major | . | Yesha Vora | Gour Saha | +| [HDFS-13765](https://issues.apache.org/jira/browse/HDFS-13765) | Fix javadoc for FSDirMkdirOp#createParentDirectories | Minor | documentation | Lokesh Jain | Lokesh Jain | +| [YARN-8571](https://issues.apache.org/jira/browse/YARN-8571) | Validate service principal format prior to launching yarn service | Major | security, yarn | Eric Yang | Eric Yang | +| [YARN-8596](https://issues.apache.org/jira/browse/YARN-8596) | Allow SQLFederationStateStore to submit the same app in the same subcluster | Major | federation | Íñigo Goiri | Giovanni Matteo Fumarola | +| [YARN-8508](https://issues.apache.org/jira/browse/YARN-8508) | On NodeManager container gets cleaned up before its pid file is created | Critical | . | Sumana Sathish | Chandni Singh | +| [YARN-8434](https://issues.apache.org/jira/browse/YARN-8434) | Update federation documentation of Nodemanager configurations | Minor | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8591](https://issues.apache.org/jira/browse/YARN-8591) | [ATSv2] NPE while checking for entity acl in non-secure cluster | Major | timelinereader, timelineserver | Akhil PB | Rohith Sharma K S | +| [YARN-8558](https://issues.apache.org/jira/browse/YARN-8558) | NM recovery level db not cleaned up properly on container finish | Critical | . | Bibin A Chundatt | Bibin A Chundatt | +| [HADOOP-15637](https://issues.apache.org/jira/browse/HADOOP-15637) | LocalFs#listLocatedStatus does not filter out hidden .crc files | Minor | fs | Erik Krogen | Erik Krogen | +| [YARN-8605](https://issues.apache.org/jira/browse/YARN-8605) | TestDominantResourceFairnessPolicy.testModWhileSorting is flaky | Minor | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg | +| [YARN-8418](https://issues.apache.org/jira/browse/YARN-8418) | App local logs could leaked if log aggregation fails to initialize for the app | Critical | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8579](https://issues.apache.org/jira/browse/YARN-8579) | New AM attempt could not retrieve previous attempt component data | Critical | . | Yesha Vora | Gour Saha | +| [HDFS-13322](https://issues.apache.org/jira/browse/HDFS-13322) | fuse dfs - uid persists when switching between ticket caches | Minor | fuse-dfs | Alex Volskiy | Istvan Fajth | +| [YARN-8397](https://issues.apache.org/jira/browse/YARN-8397) | Potential thread leak in ActivitiesManager | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-8522](https://issues.apache.org/jira/browse/YARN-8522) | Application fails with InvalidResourceRequestException | Critical | . | Yesha Vora | Zian Chen | +| [YARN-8606](https://issues.apache.org/jira/browse/YARN-8606) | Opportunistic scheduling does not work post RM failover | Blocker | . | Bibin A Chundatt | Bibin A Chundatt | +| [YARN-8595](https://issues.apache.org/jira/browse/YARN-8595) | [UI2] Container diagnostic information is missing from container page | Major | yarn-ui-v2 | Akhil PB | Akhil PB | +| [YARN-8403](https://issues.apache.org/jira/browse/YARN-8403) | Nodemanager logs failed to download file with INFO level | Major | yarn | Eric Yang | Eric Yang | +| [YARN-8600](https://issues.apache.org/jira/browse/YARN-8600) | RegistryDNS hang when remote lookup does not reply | Critical | yarn | Eric Yang | Eric Yang | +| [YARN-8610](https://issues.apache.org/jira/browse/YARN-8610) | Yarn Service Upgrade: Typo in Error message | Major | . | Chandni Singh | Chandni Singh | +| [YARN-8593](https://issues.apache.org/jira/browse/YARN-8593) | Add RM web service endpoint to get user information | Major | resourcemanager | Akhil PB | Akhil PB | +| [YARN-8594](https://issues.apache.org/jira/browse/YARN-8594) | [UI2] Display current logged in user | Major | . | Akhil PB | Akhil PB | +| [YARN-8592](https://issues.apache.org/jira/browse/YARN-8592) | [UI2] rmip:port/ui2 endpoint shows a blank page in windows OS and Chrome browser | Major | . | Akhil S Naik | Akhil PB | +| [YARN-8318](https://issues.apache.org/jira/browse/YARN-8318) | [UI2] IP address in component page shows N/A | Major | yarn-ui-v2 | Yesha Vora | Yesha Vora | +| [YARN-6966](https://issues.apache.org/jira/browse/YARN-6966) | NodeManager metrics may return wrong negative values when NM restart | Major | . | Yang Wang | Szilard Nemeth | +| [YARN-8603](https://issues.apache.org/jira/browse/YARN-8603) | [UI2] Latest run application should be listed first in the RM UI | Major | yarn-ui-v2 | Sumana Sathish | Akhil PB | +| [YARN-8608](https://issues.apache.org/jira/browse/YARN-8608) | [UI2] No information available per application appAttempt about 'Total Outstanding Resource Requests' | Major | yarn-ui-v2 | Sumana Sathish | Akhil PB | +| [YARN-8620](https://issues.apache.org/jira/browse/YARN-8620) | [UI2] YARN Services UI new submission failures are not debuggable | Major | yarn-ui-v2 | Akhil PB | Akhil PB | +| [YARN-8615](https://issues.apache.org/jira/browse/YARN-8615) | [UI2] Resource Usage tab shows only memory related info. No info available for vcores/gpu. | Major | yarn-ui-v2 | Sumana Sathish | Akhil PB | +| [HDFS-13792](https://issues.apache.org/jira/browse/HDFS-13792) | Fix FSN read/write lock metrics name | Trivial | documentation, metrics | Chao Sun | Chao Sun | +| [YARN-8629](https://issues.apache.org/jira/browse/YARN-8629) | Container cleanup fails while trying to delete Cgroups | Critical | . | Yesha Vora | Suma Shivaprasad | +| [YARN-8407](https://issues.apache.org/jira/browse/YARN-8407) | Container launch exception in AM log should be printed in ERROR level | Major | . | Yesha Vora | Yesha Vora | +| [YARN-8399](https://issues.apache.org/jira/browse/YARN-8399) | NodeManager is giving 403 GSS exception post upgrade to 3.1 in secure mode | Major | timelineservice | Sunil Govindan | Sunil Govindan | +| [HDFS-13799](https://issues.apache.org/jira/browse/HDFS-13799) | TestEditLogTailer#testTriggersLogRollsForAllStandbyNN fails due to missing synchronization between rollEditsRpcExecutor and tailerThread shutdown | Minor | ha | Hrishikesh Gadre | Hrishikesh Gadre | +| [HDFS-13786](https://issues.apache.org/jira/browse/HDFS-13786) | EC: Display erasure coding policy for sub-directories is not working | Major | erasure-coding | Souryakanta Dwivedy | Ayush Saxena | +| [HDFS-13785](https://issues.apache.org/jira/browse/HDFS-13785) | EC: "removePolicy" is not working for built-in/system Erasure Code policies | Minor | documentation, erasure-coding | Souryakanta Dwivedy | Ayush Saxena | +| [YARN-8633](https://issues.apache.org/jira/browse/YARN-8633) | Update DataTables version in yarn-common in line with JQuery 3 upgrade | Major | yarn | Akhil PB | Akhil PB | +| [YARN-8331](https://issues.apache.org/jira/browse/YARN-8331) | Race condition in NM container launched after done | Major | . | Yang Wang | Pradeep Ambati | +| [YARN-8521](https://issues.apache.org/jira/browse/YARN-8521) | NPE in AllocationTagsManager when a container is removed more than once | Major | resourcemanager | Weiwei Yang | Weiwei Yang | +| [YARN-8575](https://issues.apache.org/jira/browse/YARN-8575) | Avoid committing allocation proposal to unavailable nodes in async scheduling | Major | capacityscheduler | Tao Yang | Tao Yang | +| [HDFS-13795](https://issues.apache.org/jira/browse/HDFS-13795) | Fix potential NPE in InMemoryLevelDBAliasMapServer | Major | . | Virajith Jalaparti | Virajith Jalaparti | +| [HDFS-13668](https://issues.apache.org/jira/browse/HDFS-13668) | FSPermissionChecker may throws AIOOE when check inode permission | Major | namenode | He Xiaoqiao | He Xiaoqiao | +| [HADOOP-15638](https://issues.apache.org/jira/browse/HADOOP-15638) | KMS Accept Queue Size default changed from 500 to 128 in Hadoop 3.x | Major | kms | Wei-Chiu Chuang | Wei-Chiu Chuang | +| [HDFS-13823](https://issues.apache.org/jira/browse/HDFS-13823) | NameNode UI : "Utilities -\> Browse the file system -\> open a file -\> Head the file" is not working | Major | ui | Nanda kumar | Nanda kumar | +| [HDFS-13738](https://issues.apache.org/jira/browse/HDFS-13738) | fsck -list-corruptfileblocks has infinite loop if user is not privileged. | Major | tools | Wei-Chiu Chuang | Yuen-Kuei Hsueh | +| [HDFS-13758](https://issues.apache.org/jira/browse/HDFS-13758) | DatanodeManager should throw exception if it has BlockRecoveryCommand but the block is not under construction | Major | namenode | Wei-Chiu Chuang | chencan | +| [YARN-8614](https://issues.apache.org/jira/browse/YARN-8614) | Fix few annotation typos in YarnConfiguration | Trivial | . | Sen Zhao | Sen Zhao | +| [HDFS-13819](https://issues.apache.org/jira/browse/HDFS-13819) | TestDirectoryScanner#testDirectoryScannerInFederatedCluster is flaky | Minor | hdfs | Daniel Templeton | Daniel Templeton | +| [YARN-8656](https://issues.apache.org/jira/browse/YARN-8656) | container-executor should not write cgroup tasks files for docker containers | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8474](https://issues.apache.org/jira/browse/YARN-8474) | sleeper service fails to launch with "Authentication Required" | Critical | yarn | Sumana Sathish | Billie Rinaldi | +| [HDFS-13746](https://issues.apache.org/jira/browse/HDFS-13746) | Still occasional "Should be different group" failure in TestRefreshUserMappings#testGroupMappingRefresh | Major | . | Siyao Meng | Siyao Meng | +| [YARN-8667](https://issues.apache.org/jira/browse/YARN-8667) | Cleanup symlinks when container restarted by NM to solve issue "find: File system loop detected;" for tar ball artifacts. | Critical | . | Rohith Sharma K S | Chandni Singh | +| [HDFS-10240](https://issues.apache.org/jira/browse/HDFS-10240) | Race between close/recoverLease leads to missing block | Major | . | zhouyingchao | Jinglun | +| [HADOOP-15655](https://issues.apache.org/jira/browse/HADOOP-15655) | Enhance KMS client retry behavior | Critical | kms | Kitti Nanasi | Kitti Nanasi | +| [YARN-8612](https://issues.apache.org/jira/browse/YARN-8612) | Fix NM Collector Service Port issue in YarnConfiguration | Major | ATSv2 | Prabha Manepalli | Prabha Manepalli | +| [HDFS-13747](https://issues.apache.org/jira/browse/HDFS-13747) | Statistic for list\_located\_status is incremented incorrectly by listStatusIterator | Minor | hdfs-client | Todd Lipcon | Antal Mihalyi | +| [HADOOP-8807](https://issues.apache.org/jira/browse/HADOOP-8807) | Update README and website to reflect HADOOP-8662 | Trivial | documentation | Eli Collins | Andras Bokor | +| [HADOOP-15674](https://issues.apache.org/jira/browse/HADOOP-15674) | Test failure TestSSLHttpServer.testExcludedCiphers with TLS\_ECDHE\_RSA\_WITH\_AES\_128\_CBC\_SHA256 cipher suite | Major | common | Gabor Bota | Szilard Nemeth | +| [YARN-8640](https://issues.apache.org/jira/browse/YARN-8640) | Restore previous state in container-executor after failure | Major | . | Jim Brennan | Jim Brennan | +| [YARN-8679](https://issues.apache.org/jira/browse/YARN-8679) | [ATSv2] If HBase cluster is down for long time, high chances that NM ContainerManager dispatcher get blocked | Major | . | Rohith Sharma K S | Wangda Tan | +| [HDFS-13772](https://issues.apache.org/jira/browse/HDFS-13772) | Erasure coding: Unnecessary NameNode Logs displaying for Enabling/Disabling Erasure coding policies which are already enabled/disabled | Trivial | erasure-coding | Souryakanta Dwivedy | Ayush Saxena | +| [HADOOP-14314](https://issues.apache.org/jira/browse/HADOOP-14314) | The OpenSolaris taxonomy link is dead in InterfaceClassification.md | Major | documentation | Daniel Templeton | Rui Gao | +| [YARN-8649](https://issues.apache.org/jira/browse/YARN-8649) | NPE in localizer hearbeat processing if a container is killed while localizing | Major | . | lujie | lujie | +| [HDFS-13805](https://issues.apache.org/jira/browse/HDFS-13805) | Journal Nodes should allow to format non-empty directories with "-force" option | Major | journal-node | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [YARN-8632](https://issues.apache.org/jira/browse/YARN-8632) | Threads in SLS quit without logging exception | Major | scheduler-load-simulator | Xianghao Lu | Xianghao Lu | +| [MAPREDUCE-6861](https://issues.apache.org/jira/browse/MAPREDUCE-6861) | Add metrics tags for ShuffleClientMetrics | Major | . | Akira Ajisaka | Zoltan Siegl | +| [YARN-8719](https://issues.apache.org/jira/browse/YARN-8719) | Typo correction for yarn configuration in OpportunisticContainers(federation) docs | Major | documentation, federation | Y. SREENIVASULU REDDY | Y. SREENIVASULU REDDY | +| [YARN-8675](https://issues.apache.org/jira/browse/YARN-8675) | Setting hostname of docker container breaks with "host" networking mode for Apps which do not run as a YARN service | Major | . | Yesha Vora | Suma Shivaprasad | +| [HADOOP-15633](https://issues.apache.org/jira/browse/HADOOP-15633) | fs.TrashPolicyDefault: Can't create trash directory | Major | common | Fei Hui | Fei Hui | +| [HDFS-13858](https://issues.apache.org/jira/browse/HDFS-13858) | RBF: Add check to have single valid argument to safemode command | Major | federation | Soumyapn | Ayush Saxena | +| [HDFS-13837](https://issues.apache.org/jira/browse/HDFS-13837) | Enable debug log for LeaseRenewer in TestDistributedFileSystem | Major | hdfs | Shweta | Shweta | +| [HDFS-13731](https://issues.apache.org/jira/browse/HDFS-13731) | ReencryptionUpdater fails with ConcurrentModificationException during processCheckpoints | Major | encryption | Xiao Chen | Zsolt Venczel | +| [YARN-8723](https://issues.apache.org/jira/browse/YARN-8723) | Fix a typo in CS init error message when resource calculator is not correctly set | Minor | . | Weiwei Yang | Abhishek Modi | +| [HADOOP-15705](https://issues.apache.org/jira/browse/HADOOP-15705) | Typo in the definition of "stable" in the interface classification | Minor | . | Daniel Templeton | Daniel Templeton | +| [HDFS-13863](https://issues.apache.org/jira/browse/HDFS-13863) | FsDatasetImpl should log DiskOutOfSpaceException | Major | hdfs | Fei Hui | Fei Hui | +| [HADOOP-15698](https://issues.apache.org/jira/browse/HADOOP-15698) | KMS log4j is not initialized properly at startup | Major | kms | Kitti Nanasi | Kitti Nanasi | +| [HADOOP-15706](https://issues.apache.org/jira/browse/HADOOP-15706) | Typo in compatibility doc: SHOUD -\> SHOULD | Trivial | . | Daniel Templeton | Laszlo Kollar | +| [HDFS-13027](https://issues.apache.org/jira/browse/HDFS-13027) | Handle possible NPEs due to deleted blocks in race condition | Major | namenode | Vinayakumar B | Vinayakumar B | + + +### TESTS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [MAPREDUCE-7066](https://issues.apache.org/jira/browse/MAPREDUCE-7066) | TestQueue fails on Java9 | Major | test | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-15313](https://issues.apache.org/jira/browse/HADOOP-15313) | TestKMS should close providers | Major | kms, test | Xiao Chen | Xiao Chen | +| [HDFS-13129](https://issues.apache.org/jira/browse/HDFS-13129) | Add a test for DfsAdmin refreshSuperUserGroupsConfiguration | Minor | namenode | Mukul Kumar Singh | Mukul Kumar Singh | +| [HADOOP-14188](https://issues.apache.org/jira/browse/HADOOP-14188) | Remove the usage of org.mockito.internal.util.reflection.Whitebox | Major | test | Akira Ajisaka | Ewan Higgs | +| [HDFS-13503](https://issues.apache.org/jira/browse/HDFS-13503) | Fix TestFsck test failures on Windows | Major | hdfs | Xiao Liang | Xiao Liang | +| [HDFS-13542](https://issues.apache.org/jira/browse/HDFS-13542) | TestBlockManager#testNeededReplicationWhileAppending fails due to improper cluster shutdown in TestBlockManager#testBlockManagerMachinesArray on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13551](https://issues.apache.org/jira/browse/HDFS-13551) | TestMiniDFSCluster#testClusterSetStorageCapacity does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-11700](https://issues.apache.org/jira/browse/HDFS-11700) | TestHDFSServerPorts#testBackupNodePorts doesn't pass on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13548](https://issues.apache.org/jira/browse/HDFS-13548) | TestResolveHdfsSymlink#testFcResolveAfs fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13567](https://issues.apache.org/jira/browse/HDFS-13567) | TestNameNodeMetrics#testGenerateEDEKTime,TestNameNodeMetrics#testResourceCheck should use a different cluster basedir | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13557](https://issues.apache.org/jira/browse/HDFS-13557) | TestDFSAdmin#testListOpenFiles fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13550](https://issues.apache.org/jira/browse/HDFS-13550) | TestDebugAdmin#testComputeMetaCommand fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13559](https://issues.apache.org/jira/browse/HDFS-13559) | TestBlockScanner does not close TestContext properly | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13570](https://issues.apache.org/jira/browse/HDFS-13570) | TestQuotaByStorageType,TestQuota,TestDFSOutputStream fail on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13558](https://issues.apache.org/jira/browse/HDFS-13558) | TestDatanodeHttpXFrame does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13554](https://issues.apache.org/jira/browse/HDFS-13554) | TestDatanodeRegistration#testForcedRegistration does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13556](https://issues.apache.org/jira/browse/HDFS-13556) | TestNestedEncryptionZones does not shut down cluster | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13560](https://issues.apache.org/jira/browse/HDFS-13560) | Insufficient system resources exist to complete the requested service for some tests on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13592](https://issues.apache.org/jira/browse/HDFS-13592) | TestNameNodePrunesMissingStorages#testNameNodePrunesUnreportedStorages does not shut down cluster properly | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13593](https://issues.apache.org/jira/browse/HDFS-13593) | TestBlockReaderLocalLegacy#testBlockReaderLocalLegacyWithAppend fails on Windows | Minor | test | Anbang Hu | Anbang Hu | +| [HDFS-13587](https://issues.apache.org/jira/browse/HDFS-13587) | TestQuorumJournalManager fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13619](https://issues.apache.org/jira/browse/HDFS-13619) | TestAuditLoggerWithCommands fails on Windows | Minor | test | Anbang Hu | Anbang Hu | +| [HDFS-13620](https://issues.apache.org/jira/browse/HDFS-13620) | Randomize the test directory path for TestHDFSFileSystemContract | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13627](https://issues.apache.org/jira/browse/HDFS-13627) | TestErasureCodingExerciseAPIs fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13591](https://issues.apache.org/jira/browse/HDFS-13591) | TestDFSShell#testSetrepLow fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13632](https://issues.apache.org/jira/browse/HDFS-13632) | Randomize baseDir for MiniJournalCluster in MiniQJMHACluster for TestDFSAdminWithHA | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13629](https://issues.apache.org/jira/browse/HDFS-13629) | Some tests in TestDiskBalancerCommand fail on Windows due to MiniDFSCluster path conflict and improper path usage | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13631](https://issues.apache.org/jira/browse/HDFS-13631) | TestDFSAdmin#testCheckNumOfBlocksInReportCommand should use a separate MiniDFSCluster path | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13651](https://issues.apache.org/jira/browse/HDFS-13651) | TestReencryptionHandler fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13648](https://issues.apache.org/jira/browse/HDFS-13648) | Fix TestGetConf#testGetJournalNodes on Windows due to a mismatch line separator | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [MAPREDUCE-7102](https://issues.apache.org/jira/browse/MAPREDUCE-7102) | Fix TestJavaSerialization for Windows due a mismatch line separator | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [MAPREDUCE-7105](https://issues.apache.org/jira/browse/MAPREDUCE-7105) | Fix TestNativeCollectorOnlyHandler.testOnCall on Windows because of the path format | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HDFS-13652](https://issues.apache.org/jira/browse/HDFS-13652) | Randomize baseDir for MiniDFSCluster in TestBlockScanner | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13649](https://issues.apache.org/jira/browse/HDFS-13649) | Randomize baseDir for MiniDFSCluster in TestReconstructStripedFile and TestReconstructStripedFileWithRandomECPolicy | Minor | . | Anbang Hu | Anbang Hu | +| [HDFS-13650](https://issues.apache.org/jira/browse/HDFS-13650) | Randomize baseDir for MiniDFSCluster in TestDFSStripedInputStream and TestDFSStripedInputStreamWithRandomECPolicy | Minor | . | Anbang Hu | Anbang Hu | +| [HADOOP-15520](https://issues.apache.org/jira/browse/HADOOP-15520) | Add tests for various org.apache.hadoop.util classes | Minor | test, util | Arash Nabili | Arash Nabili | +| [YARN-8370](https://issues.apache.org/jira/browse/YARN-8370) | Some Node Manager tests fail on Windows due to improper path/file separator | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8422](https://issues.apache.org/jira/browse/YARN-8422) | TestAMSimulator failing with NPE | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15532](https://issues.apache.org/jira/browse/HADOOP-15532) | TestBasicDiskValidator fails with NoSuchFileException | Minor | . | Íñigo Goiri | Giovanni Matteo Fumarola | +| [HDFS-13563](https://issues.apache.org/jira/browse/HDFS-13563) | TestDFSAdminWithHA times out on Windows | Minor | . | Anbang Hu | Lukas Majercak | +| [HDFS-13681](https://issues.apache.org/jira/browse/HDFS-13681) | Fix TestStartup.testNNFailToStartOnReadOnlyNNDir test failure on Windows | Major | test | Xiao Liang | Xiao Liang | + + +### SUB-TASKS: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [HDFS-10794](https://issues.apache.org/jira/browse/HDFS-10794) | [SPS]: Provide storage policy satisfy worker at DN for co-ordinating the block storage movement work | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-10800](https://issues.apache.org/jira/browse/HDFS-10800) | [SPS]: Daemon thread in Namenode to find blocks placed in other storage than what the policy specifies | Major | namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-10801](https://issues.apache.org/jira/browse/HDFS-10801) | [SPS]: Protocol buffer changes for sending storage movement commands from NN to DN | Major | datanode, namenode | Uma Maheswara Rao G | Rakesh R | +| [HDFS-10884](https://issues.apache.org/jira/browse/HDFS-10884) | [SPS]: Add block movement tracker to track the completion of block movement future tasks at DN | Major | datanode | Rakesh R | Rakesh R | +| [HDFS-10954](https://issues.apache.org/jira/browse/HDFS-10954) | [SPS]: Provide mechanism to send blocks movement result back to NN from coordinator DN | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-11029](https://issues.apache.org/jira/browse/HDFS-11029) | [SPS]:Provide retry mechanism for the blocks which were failed while moving its storage at DNs | Major | namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-11068](https://issues.apache.org/jira/browse/HDFS-11068) | [SPS]: Provide unique trackID to track the block movement sends to coordinator | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-10802](https://issues.apache.org/jira/browse/HDFS-10802) | [SPS]: Add satisfyStoragePolicy API in HdfsAdmin | Major | hdfs-client | Uma Maheswara Rao G | Yuanbo Liu | +| [HDFS-11151](https://issues.apache.org/jira/browse/HDFS-11151) | [SPS]: StoragePolicySatisfier should gracefully handle when there is no target node with the required storage type | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-10885](https://issues.apache.org/jira/browse/HDFS-10885) | [SPS]: Mover tool should not be allowed to run when Storage Policy Satisfier is on | Major | datanode, namenode | Wei Zhou | Wei Zhou | +| [HDFS-11123](https://issues.apache.org/jira/browse/HDFS-11123) | [SPS] Make storage policy satisfier daemon work on/off dynamically | Major | datanode, namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-11032](https://issues.apache.org/jira/browse/HDFS-11032) | [SPS]: Handling of block movement failure at the coordinator datanode | Major | datanode | Rakesh R | Rakesh R | +| [HDFS-11248](https://issues.apache.org/jira/browse/HDFS-11248) | [SPS]: Handle partial block location movements | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-11193](https://issues.apache.org/jira/browse/HDFS-11193) | [SPS]: Erasure coded files should be considered for satisfying storage policy | Major | namenode | Rakesh R | Rakesh R | +| [HDFS-11289](https://issues.apache.org/jira/browse/HDFS-11289) | [SPS]: Make SPS movement monitor timeouts configurable | Major | namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-11293](https://issues.apache.org/jira/browse/HDFS-11293) | [SPS]: Local DN should be given preference as source node, when target available in same node | Critical | namenode | Yuanbo Liu | Uma Maheswara Rao G | +| [HDFS-11150](https://issues.apache.org/jira/browse/HDFS-11150) | [SPS]: Provide persistence when satisfying storage policy. | Major | datanode, namenode | Yuanbo Liu | Yuanbo Liu | +| [HDFS-11186](https://issues.apache.org/jira/browse/HDFS-11186) | [SPS]: Daemon thread of SPS should start only in Active NN | Major | datanode, namenode | Wei Zhou | Wei Zhou | +| [HDFS-11309](https://issues.apache.org/jira/browse/HDFS-11309) | [SPS]: chooseTargetTypeInSameNode should pass accurate block size to chooseStorage4Block while choosing target | Major | namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-11243](https://issues.apache.org/jira/browse/HDFS-11243) | [SPS]: Add a protocol command from NN to DN for dropping the SPS work and queues | Major | datanode, namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-11239](https://issues.apache.org/jira/browse/HDFS-11239) | [SPS]: Check Mover file ID lease also to determine whether Mover is running | Major | datanode, namenode | Wei Zhou | Wei Zhou | +| [HDFS-11336](https://issues.apache.org/jira/browse/HDFS-11336) | [SPS]: Remove xAttrs when movements done or SPS disabled | Major | datanode, namenode | Yuanbo Liu | Yuanbo Liu | +| [HDFS-11338](https://issues.apache.org/jira/browse/HDFS-11338) | [SPS]: Fix timeout issue in unit tests caused by longger NN down time | Major | datanode, namenode | Wei Zhou | Rakesh R | +| [HDFS-11334](https://issues.apache.org/jira/browse/HDFS-11334) | [SPS]: NN switch and rescheduling movements can lead to have more than one coordinator for same file blocks | Major | datanode, namenode | Uma Maheswara Rao G | Rakesh R | +| [HDFS-11572](https://issues.apache.org/jira/browse/HDFS-11572) | [SPS]: SPS should clean Xattrs when no blocks required to satisfy for a file | Major | namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-11695](https://issues.apache.org/jira/browse/HDFS-11695) | [SPS]: Namenode failed to start while loading SPS xAttrs from the edits log. | Blocker | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-11883](https://issues.apache.org/jira/browse/HDFS-11883) | [SPS] : Handle NPE in BlockStorageMovementTracker when dropSPSWork() called | Major | datanode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-11762](https://issues.apache.org/jira/browse/HDFS-11762) | [SPS] : Empty files should be ignored in StoragePolicySatisfier. | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-11726](https://issues.apache.org/jira/browse/HDFS-11726) | [SPS] : StoragePolicySatisfier should not select same storage type as source and destination in same datanode. | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-11966](https://issues.apache.org/jira/browse/HDFS-11966) | [SPS] Correct the log in BlockStorageMovementAttemptedItems#blockStorageMovementResultCheck | Minor | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-11670](https://issues.apache.org/jira/browse/HDFS-11670) | [SPS]: Add CLI command for satisfy storage policy operations | Major | datanode, namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-11965](https://issues.apache.org/jira/browse/HDFS-11965) | [SPS]: Should give chance to satisfy the low redundant blocks before removing the xattr | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-11264](https://issues.apache.org/jira/browse/HDFS-11264) | [SPS]: Double checks to ensure that SPS/Mover are not running together | Major | datanode, namenode | Wei Zhou | Rakesh R | +| [HDFS-11874](https://issues.apache.org/jira/browse/HDFS-11874) | [SPS]: Document the SPS feature | Major | documentation | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-12146](https://issues.apache.org/jira/browse/HDFS-12146) | [SPS] : Fix TestStoragePolicySatisfierWithStripedFile#testSPSWhenFileHasLowRedundancyBlocks | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-12141](https://issues.apache.org/jira/browse/HDFS-12141) | [SPS]: Fix checkstyle warnings | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-12152](https://issues.apache.org/jira/browse/HDFS-12152) | [SPS]: Re-arrange StoragePolicySatisfyWorker stopping sequence to improve thread cleanup time | Minor | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-12214](https://issues.apache.org/jira/browse/HDFS-12214) | [SPS]: Fix review comments of StoragePolicySatisfier feature | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-12225](https://issues.apache.org/jira/browse/HDFS-12225) | [SPS]: Optimize extended attributes for tracking SPS movements | Major | datanode, namenode | Uma Maheswara Rao G | Surendra Singh Lilhore | +| [HDFS-12291](https://issues.apache.org/jira/browse/HDFS-12291) | [SPS]: Provide a mechanism to recursively iterate and satisfy storage policy of all the files under the given dir | Major | datanode, namenode | Rakesh R | Surendra Singh Lilhore | +| [HDFS-12570](https://issues.apache.org/jira/browse/HDFS-12570) | [SPS]: Refactor Co-ordinator datanode logic to track the block storage movements | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-12556](https://issues.apache.org/jira/browse/HDFS-12556) | [SPS] : Block movement analysis should be done in read lock. | Major | datanode, namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-12310](https://issues.apache.org/jira/browse/HDFS-12310) | [SPS]: Provide an option to track the status of in progress requests | Major | datanode, namenode | Uma Maheswara Rao G | Surendra Singh Lilhore | +| [HDFS-12790](https://issues.apache.org/jira/browse/HDFS-12790) | [SPS]: Rebasing HDFS-10285 branch after HDFS-10467, HDFS-12599 and HDFS-11968 commits | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-12106](https://issues.apache.org/jira/browse/HDFS-12106) | [SPS]: Improve storage policy satisfier configurations | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-12955](https://issues.apache.org/jira/browse/HDFS-12955) | [SPS]: Move SPS classes to a separate package | Trivial | nn | Uma Maheswara Rao G | Rakesh R | +| [HDFS-12982](https://issues.apache.org/jira/browse/HDFS-12982) | [SPS]: Reduce the locking and cleanup the Namesystem access | Major | datanode, namenode | Rakesh R | Rakesh R | +| [HDFS-12911](https://issues.apache.org/jira/browse/HDFS-12911) | [SPS]: Modularize the SPS code and expose necessary interfaces for external/internal implementations. | Major | datanode, namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-13025](https://issues.apache.org/jira/browse/HDFS-13025) | [SPS]: Implement a mechanism to scan the files for external SPS | Major | namenode | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-13033](https://issues.apache.org/jira/browse/HDFS-13033) | [SPS]: Implement a mechanism to do file block movements for external SPS | Major | . | Rakesh R | Rakesh R | +| [HDFS-13057](https://issues.apache.org/jira/browse/HDFS-13057) | [SPS]: Revisit configurations to make SPS service modes internal/external/none | Blocker | . | Rakesh R | Rakesh R | +| [HDFS-13075](https://issues.apache.org/jira/browse/HDFS-13075) | [SPS]: Provide External Context implementation. | Major | . | Uma Maheswara Rao G | Uma Maheswara Rao G | +| [HDFS-13050](https://issues.apache.org/jira/browse/HDFS-13050) | [SPS] : Create start/stop script to start external SPS process. | Blocker | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-13077](https://issues.apache.org/jira/browse/HDFS-13077) | [SPS]: Fix review comments of external storage policy satisfier | Major | . | Rakesh R | Rakesh R | +| [HDFS-13097](https://issues.apache.org/jira/browse/HDFS-13097) | [SPS]: Fix the branch review comments(Part1) | Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore | +| [HDFS-13110](https://issues.apache.org/jira/browse/HDFS-13110) | [SPS]: Reduce the number of APIs in NamenodeProtocol used by external satisfier | Major | . | Rakesh R | Rakesh R | +| [HDFS-13166](https://issues.apache.org/jira/browse/HDFS-13166) | [SPS]: Implement caching mechanism to keep LIVE datanodes to minimize costly getLiveDatanodeStorageReport() calls | Major | . | Rakesh R | Rakesh R | +| [HADOOP-15262](https://issues.apache.org/jira/browse/HADOOP-15262) | AliyunOSS: move files under a directory in parallel when rename a directory | Major | fs/oss | wujinhu | wujinhu | +| [YARN-8002](https://issues.apache.org/jira/browse/YARN-8002) | Support NOT\_SELF and ALL namespace types for allocation tag | Major | resourcemanager | Weiwei Yang | Weiwei Yang | +| [HDFS-13307](https://issues.apache.org/jira/browse/HDFS-13307) | RBF: Improve the use of setQuota command | Major | . | liuhongtong | liuhongtong | +| [YARN-7497](https://issues.apache.org/jira/browse/YARN-7497) | Add file system based scheduler configuration store | Major | yarn | Jiandan Yang | Jiandan Yang | +| [HDFS-13289](https://issues.apache.org/jira/browse/HDFS-13289) | RBF: TestConnectionManager#testCleanup() test case need correction | Minor | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [HADOOP-14758](https://issues.apache.org/jira/browse/HADOOP-14758) | S3GuardTool.prune to handle UnsupportedOperationException | Trivial | fs/s3 | Steve Loughran | Gabor Bota | +| [HDFS-13364](https://issues.apache.org/jira/browse/HDFS-13364) | RBF: Support NamenodeProtocol in the Router | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-8013](https://issues.apache.org/jira/browse/YARN-8013) | Support application tags when defining application namespaces for placement constraints | Major | . | Weiwei Yang | Weiwei Yang | +| [YARN-7946](https://issues.apache.org/jira/browse/YARN-7946) | Update TimelineServerV2 doc as per YARN-7919 | Major | . | Rohith Sharma K S | Haibo Chen | +| [YARN-6936](https://issues.apache.org/jira/browse/YARN-6936) | [Atsv2] Retrospect storing entities into sub application table from client perspective | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-13353](https://issues.apache.org/jira/browse/HDFS-13353) | RBF: TestRouterWebHDFSContractCreate failed | Major | test | Takanobu Asanuma | Takanobu Asanuma | +| [HADOOP-15367](https://issues.apache.org/jira/browse/HADOOP-15367) | Update the initialization code in the docker hadoop-runner baseimage | Major | . | Elek, Marton | Elek, Marton | +| [HADOOP-14759](https://issues.apache.org/jira/browse/HADOOP-14759) | S3GuardTool prune to prune specific bucket entries | Minor | fs/s3 | Steve Loughran | Gabor Bota | +| [YARN-8107](https://issues.apache.org/jira/browse/YARN-8107) | Give an informative message when incorrect format is used in ATSv2 filter attributes | Major | ATSv2 | Charan Hebri | Rohith Sharma K S | +| [YARN-8048](https://issues.apache.org/jira/browse/YARN-8048) | Support auto-spawning of admin configured services during bootstrap of rm/apiserver | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [HDFS-13402](https://issues.apache.org/jira/browse/HDFS-13402) | RBF: Fix java doc for StateStoreFileSystemImpl | Minor | hdfs | Yiran Wu | Yiran Wu | +| [YARN-7574](https://issues.apache.org/jira/browse/YARN-7574) | Add support for Node Labels on Auto Created Leaf Queue Template | Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad | +| [HDFS-13380](https://issues.apache.org/jira/browse/HDFS-13380) | RBF: mv/rm fail after the directory exceeded the quota limit | Major | . | Weiwei Wu | Yiqun Lin | +| [YARN-7667](https://issues.apache.org/jira/browse/YARN-7667) | Docker Stop grace period should be configurable | Major | yarn | Eric Badger | Eric Badger | +| [HDFS-13410](https://issues.apache.org/jira/browse/HDFS-13410) | RBF: Support federation with no subclusters | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13384](https://issues.apache.org/jira/browse/HDFS-13384) | RBF: Improve timeout RPC call mechanism | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-15376](https://issues.apache.org/jira/browse/HADOOP-15376) | Remove double semi colons on imports that make Clover fall over. | Minor | . | Ewan Higgs | Ewan Higgs | +| [YARN-7973](https://issues.apache.org/jira/browse/YARN-7973) | Support ContainerRelaunch for Docker containers | Major | . | Shane Kumpf | Shane Kumpf | +| [YARN-7941](https://issues.apache.org/jira/browse/YARN-7941) | Transitive dependencies for component are not resolved | Major | . | Rohith Sharma K S | Billie Rinaldi | +| [HDFS-13045](https://issues.apache.org/jira/browse/HDFS-13045) | RBF: Improve error message returned from subcluster | Minor | . | Wei Yan | Íñigo Goiri | +| [HDFS-13428](https://issues.apache.org/jira/browse/HDFS-13428) | RBF: Remove LinkedList From StateStoreFileImpl.java | Trivial | federation | BELUGA BEHR | BELUGA BEHR | +| [HDFS-13386](https://issues.apache.org/jira/browse/HDFS-13386) | RBF: Wrong date information in list file(-ls) result | Minor | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [YARN-7221](https://issues.apache.org/jira/browse/YARN-7221) | Add security check for privileged docker container | Major | security | Eric Yang | Eric Yang | +| [HADOOP-15350](https://issues.apache.org/jira/browse/HADOOP-15350) | [JDK10] Update maven plugin tools to fix compile error in hadoop-maven-plugins module | Major | build | Akira Ajisaka | Takanobu Asanuma | +| [YARN-7931](https://issues.apache.org/jira/browse/YARN-7931) | [atsv2 read acls] Include domain table creation as part of schema creator | Major | . | Vrushali C | Vrushali C | +| [YARN-7936](https://issues.apache.org/jira/browse/YARN-7936) | Add default service AM Xmx | Major | . | Jian He | Jian He | +| [YARN-8018](https://issues.apache.org/jira/browse/YARN-8018) | Yarn Service Upgrade: Add support for initiating service upgrade | Major | . | Chandni Singh | Chandni Singh | +| [HADOOP-14999](https://issues.apache.org/jira/browse/HADOOP-14999) | AliyunOSS: provide one asynchronous multi-part based uploading mechanism | Major | fs/oss | Genmao Yu | Genmao Yu | +| [YARN-7142](https://issues.apache.org/jira/browse/YARN-7142) | Support placement policy in yarn native services | Major | yarn-native-services | Billie Rinaldi | Gour Saha | +| [YARN-8138](https://issues.apache.org/jira/browse/YARN-8138) | Add unit test to validate queue priority preemption works under node partition. | Minor | . | Charan Hebri | Zian Chen | +| [YARN-8060](https://issues.apache.org/jira/browse/YARN-8060) | Create default readiness check for service components | Major | yarn-native-services | Billie Rinaldi | Billie Rinaldi | +| [HDFS-13435](https://issues.apache.org/jira/browse/HDFS-13435) | RBF: Improve the error loggings for printing the stack trace | Major | . | Yiqun Lin | Yiqun Lin | +| [YARN-8126](https://issues.apache.org/jira/browse/YARN-8126) | Support auto-spawning of admin configured services during bootstrap of RM | Major | . | Rohith Sharma K S | Rohith Sharma K S | +| [YARN-7996](https://issues.apache.org/jira/browse/YARN-7996) | Allow user supplied Docker client configurations with YARN native services | Major | . | Shane Kumpf | Shane Kumpf | +| [YARN-8134](https://issues.apache.org/jira/browse/YARN-8134) | Support specifying node resources in SLS | Major | . | Abhishek Modi | Abhishek Modi | +| [HDFS-13466](https://issues.apache.org/jira/browse/HDFS-13466) | RBF: Add more router-related information to the UI | Minor | . | Wei Yan | Wei Yan | +| [YARN-5888](https://issues.apache.org/jira/browse/YARN-5888) | [UI2] Improve unit tests for new YARN UI | Minor | yarn-ui-v2 | Akhil PB | Akhil PB | +| [HDFS-13453](https://issues.apache.org/jira/browse/HDFS-13453) | RBF: getMountPointDates should fetch latest subdir time/date when parent dir is not present but /parent/child dirs are present in mount table | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [YARN-8186](https://issues.apache.org/jira/browse/YARN-8186) | [Router] Federation: routing getAppState REST invocations transparently to multiple RMs | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8137](https://issues.apache.org/jira/browse/YARN-8137) | Parallelize node addition in SLS | Major | . | Abhishek Modi | Abhishek Modi | +| [YARN-8111](https://issues.apache.org/jira/browse/YARN-8111) | Simplify PlacementConstraints API by removing allocationTagToIntraApp | Minor | resourcemanager | Weiwei Yang | Weiwei Yang | +| [YARN-8064](https://issues.apache.org/jira/browse/YARN-8064) | Docker ".cmd" files should not be put in hadoop.tmp.dir | Critical | . | Eric Badger | Eric Badger | +| [HDFS-13478](https://issues.apache.org/jira/browse/HDFS-13478) | RBF: Disabled Nameservice store API | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-8177](https://issues.apache.org/jira/browse/YARN-8177) | Documentation changes for auto creation of Leaf Queues with node label | Major | . | Suma Shivaprasad | Suma Shivaprasad | +| [HADOOP-14756](https://issues.apache.org/jira/browse/HADOOP-14756) | S3Guard: expose capability query in MetadataStore and add tests of authoritative mode | Major | fs/s3 | Steve Loughran | Gabor Bota | +| [HDFS-13490](https://issues.apache.org/jira/browse/HDFS-13490) | RBF: Fix setSafeMode in the Router | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HDFS-13484](https://issues.apache.org/jira/browse/HDFS-13484) | RBF: Disable Nameservices from the federation | Major | . | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-15404](https://issues.apache.org/jira/browse/HADOOP-15404) | Remove multibyte characters in DataNodeUsageReportUtil | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-7939](https://issues.apache.org/jira/browse/YARN-7939) | Yarn Service Upgrade: add support to upgrade a component instance | Major | . | Chandni Singh | Chandni Singh | +| [HDFS-13326](https://issues.apache.org/jira/browse/HDFS-13326) | RBF: Improve the interfaces to modify and view mount tables | Minor | . | Wei Yan | Gang Li | +| [YARN-8122](https://issues.apache.org/jira/browse/YARN-8122) | Component health threshold monitor | Major | . | Gour Saha | Gour Saha | +| [HDFS-13499](https://issues.apache.org/jira/browse/HDFS-13499) | RBF: Show disabled name services in the UI | Minor | . | Íñigo Goiri | Íñigo Goiri | +| [HADOOP-13756](https://issues.apache.org/jira/browse/HADOOP-13756) | LocalMetadataStore#put(DirListingMetadata) should also put file metadata into fileHash. | Major | fs/s3, test | Lei (Eddy) Xu | Gabor Bota | +| [YARN-8215](https://issues.apache.org/jira/browse/YARN-8215) | ATS v2 returns invalid YARN\_CONTAINER\_ALLOCATED\_HOST\_HTTP\_ADDRESS from NM | Critical | ATSv2 | Yesha Vora | Rohith Sharma K S | +| [YARN-8152](https://issues.apache.org/jira/browse/YARN-8152) | Add chart in SLS to illustrate the throughput of the scheduler | Major | scheduler-load-simulator | Weiwei Yang | Tao Yang | +| [YARN-8204](https://issues.apache.org/jira/browse/YARN-8204) | Yarn Service Upgrade: Add a flag to disable upgrade | Major | . | Chandni Singh | Chandni Singh | +| [YARN-7781](https://issues.apache.org/jira/browse/YARN-7781) | Update YARN-Services-Examples.md to be in sync with the latest code | Major | . | Gour Saha | Gour Saha | +| [HDFS-13508](https://issues.apache.org/jira/browse/HDFS-13508) | RBF: Normalize paths (automatically) when adding, updating, removing or listing mount table entries | Minor | . | Ekanth Sethuramalingam | Ekanth Sethuramalingam | +| [HDFS-13434](https://issues.apache.org/jira/browse/HDFS-13434) | RBF: Fix dead links in RBF document | Major | documentation | Akira Ajisaka | Chetna Chaudhari | +| [HDFS-13165](https://issues.apache.org/jira/browse/HDFS-13165) | [SPS]: Collects successfully moved block details via IBR | Major | . | Rakesh R | Rakesh R | +| [YARN-8195](https://issues.apache.org/jira/browse/YARN-8195) | Fix constraint cardinality check in the presence of multiple target allocation tags | Critical | . | Weiwei Yang | Weiwei Yang | +| [HADOOP-15239](https://issues.apache.org/jira/browse/HADOOP-15239) | S3ABlockOutputStream.flush() be no-op when stream closed | Trivial | fs/s3 | Steve Loughran | Gabor Bota | +| [YARN-8228](https://issues.apache.org/jira/browse/YARN-8228) | Docker does not support hostnames greater than 64 characters | Critical | yarn-native-services | Yesha Vora | Shane Kumpf | +| [YARN-8212](https://issues.apache.org/jira/browse/YARN-8212) | Pending backlog for async allocation threads should be configurable | Major | . | Weiwei Yang | Tao Yang | +| [YARN-2674](https://issues.apache.org/jira/browse/YARN-2674) | Distributed shell AM may re-launch containers if RM work preserving restart happens | Major | applications, resourcemanager | Chun Chen | Shane Kumpf | +| [HDFS-13488](https://issues.apache.org/jira/browse/HDFS-13488) | RBF: Reject requests when a Router is overloaded | Major | . | Íñigo Goiri | Íñigo Goiri | +| [YARN-8113](https://issues.apache.org/jira/browse/YARN-8113) | Update placement constraints doc with application namespaces and inter-app constraints | Major | documentation | Weiwei Yang | Weiwei Yang | +| [YARN-8194](https://issues.apache.org/jira/browse/YARN-8194) | Exception when reinitializing a container using LinuxContainerExecutor | Blocker | . | Chandni Singh | Chandni Singh | +| [YARN-8151](https://issues.apache.org/jira/browse/YARN-8151) | Yarn RM Epoch should wrap around | Major | . | Young Chen | Young Chen | +| [YARN-7961](https://issues.apache.org/jira/browse/YARN-7961) | Improve status response when yarn application is destroyed | Major | yarn-native-services | Yesha Vora | Gour Saha | +| [HDFS-13525](https://issues.apache.org/jira/browse/HDFS-13525) | RBF: Add unit test TestStateStoreDisabledNameservice | Major | . | Yiqun Lin | Yiqun Lin | +| [HADOOP-15444](https://issues.apache.org/jira/browse/HADOOP-15444) | ITestS3GuardToolDynamo should only run with -Ddynamo | Major | . | Aaron Fabbri | Aaron Fabbri | +| [YARN-5151](https://issues.apache.org/jira/browse/YARN-5151) | [UI2] Support kill application from new YARN UI | Major | . | Wangda Tan | Gergely Novák | +| [YARN-8253](https://issues.apache.org/jira/browse/YARN-8253) | HTTPS Ats v2 api call fails with "bad HTTP parsed" | Critical | ATSv2 | Yesha Vora | Charan Hebri | +| [YARN-8207](https://issues.apache.org/jira/browse/YARN-8207) | Docker container launch use popen have risk of shell expansion | Blocker | yarn-native-services | Eric Yang | Eric Yang | +| [HADOOP-13649](https://issues.apache.org/jira/browse/HADOOP-13649) | s3guard: implement time-based (TTL) expiry for LocalMetadataStore | Minor | fs/s3 | Aaron Fabbri | Gabor Bota | +| [HADOOP-15420](https://issues.apache.org/jira/browse/HADOOP-15420) | s3guard ITestS3GuardToolLocal failures in diff tests | Minor | . | Aaron Fabbri | Gabor Bota | +| [YARN-8261](https://issues.apache.org/jira/browse/YARN-8261) | Docker container launch fails due to .cmd file creation failure | Blocker | . | Eric Badger | Jason Lowe | +| [HADOOP-15454](https://issues.apache.org/jira/browse/HADOOP-15454) | TestRollingFileSystemSinkWithLocal fails on Windows | Major | test | Xiao Liang | Xiao Liang | +| [HDFS-13346](https://issues.apache.org/jira/browse/HDFS-13346) | RBF: Fix synchronization of router quota and nameservice quota | Major | . | liuhongtong | Yiqun Lin | +| [YARN-8243](https://issues.apache.org/jira/browse/YARN-8243) | Flex down should remove instance with largest component instance ID first | Critical | yarn-native-services | Gour Saha | Gour Saha | +| [YARN-7654](https://issues.apache.org/jira/browse/YARN-7654) | Support ENTRY\_POINT for docker container | Blocker | yarn | Eric Yang | Eric Yang | +| [YARN-8247](https://issues.apache.org/jira/browse/YARN-8247) | Incorrect HTTP status code returned by ATSv2 for non-whitelisted users | Critical | ATSv2 | Charan Hebri | Rohith Sharma K S | +| [YARN-8130](https://issues.apache.org/jira/browse/YARN-8130) | Race condition when container events are published for KILLED applications | Major | ATSv2 | Charan Hebri | Rohith Sharma K S | +| [YARN-8081](https://issues.apache.org/jira/browse/YARN-8081) | Yarn Service Upgrade: Add support to upgrade a component | Major | . | Chandni Singh | Chandni Singh | +| [YARN-8284](https://issues.apache.org/jira/browse/YARN-8284) | get\_docker\_command refactoring | Minor | . | Jason Lowe | Eric Badger | +| [YARN-7933](https://issues.apache.org/jira/browse/YARN-7933) | [atsv2 read acls] Add TimelineWriter#writeDomain | Major | . | Vrushali C | Rohith Sharma K S | +| [YARN-7900](https://issues.apache.org/jira/browse/YARN-7900) | [AMRMProxy] AMRMClientRelayer for stateful FederationInterceptor | Major | . | Botong Huang | Botong Huang | +| [YARN-8206](https://issues.apache.org/jira/browse/YARN-8206) | Sending a kill does not immediately kill docker containers | Major | . | Eric Badger | Eric Badger | +| [YARN-7960](https://issues.apache.org/jira/browse/YARN-7960) | Add no-new-privileges flag to docker run | Major | . | Eric Badger | Eric Badger | +| [YARN-8285](https://issues.apache.org/jira/browse/YARN-8285) | Remove unused environment variables from the Docker runtime | Trivial | . | Shane Kumpf | Eric Badger | +| [YARN-4599](https://issues.apache.org/jira/browse/YARN-4599) | Set OOM control for memory cgroups | Major | nodemanager | Karthik Kambatla | Miklos Szegedi | +| [YARN-7530](https://issues.apache.org/jira/browse/YARN-7530) | hadoop-yarn-services-api should be part of hadoop-yarn-services | Blocker | yarn-native-services | Eric Yang | Chandni Singh | +| [YARN-6919](https://issues.apache.org/jira/browse/YARN-6919) | Add default volume mount list | Major | yarn | Eric Badger | Eric Badger | +| [HADOOP-15494](https://issues.apache.org/jira/browse/HADOOP-15494) | TestRawLocalFileSystemContract fails on Windows | Minor | test | Anbang Hu | Anbang Hu | +| [HADOOP-15498](https://issues.apache.org/jira/browse/HADOOP-15498) | TestHadoopArchiveLogs (#testGenerateScript, #testPrepareWorkingDir) fails on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [HADOOP-15497](https://issues.apache.org/jira/browse/HADOOP-15497) | TestTrash should use proper test path to avoid failing on Windows | Minor | . | Anbang Hu | Anbang Hu | +| [YARN-8329](https://issues.apache.org/jira/browse/YARN-8329) | Docker client configuration can still be set incorrectly | Major | . | Shane Kumpf | Shane Kumpf | +| [HADOOP-14946](https://issues.apache.org/jira/browse/HADOOP-14946) | S3Guard testPruneCommandCLI can fail | Major | fs/s3 | Steve Loughran | Gabor Bota | +| [HADOOP-15480](https://issues.apache.org/jira/browse/HADOOP-15480) | AbstractS3GuardToolTestBase.testDiffCommand fails when using dynamo | Major | fs/s3 | Gabor Bota | Gabor Bota | +| [HDFS-12978](https://issues.apache.org/jira/browse/HDFS-12978) | Fine-grained locking while consuming journal stream. | Major | namenode | Konstantin Shvachko | Konstantin Shvachko | +| [YARN-8384](https://issues.apache.org/jira/browse/YARN-8384) | stdout.txt, stderr.txt logs of a launched docker container is coming with primary group of submit user instead of hadoop | Critical | yarn-native-services | Sunil Govindan | Eric Yang | +| [YARN-8349](https://issues.apache.org/jira/browse/YARN-8349) | Remove YARN registry entries when a service is killed by the RM | Critical | yarn-native-services | Shane Kumpf | Billie Rinaldi | +| [HDFS-13637](https://issues.apache.org/jira/browse/HDFS-13637) | RBF: Router fails when threadIndex (in ConnectionPool) wraps around Integer.MIN\_VALUE | Critical | federation | CR Hota | CR Hota | +| [YARN-8342](https://issues.apache.org/jira/browse/YARN-8342) | Using docker image from a non-privileged registry, the launch\_command is not honored | Critical | . | Wangda Tan | Eric Yang | +| [HDFS-13281](https://issues.apache.org/jira/browse/HDFS-13281) | Namenode#createFile should be /.reserved/raw/ aware. | Critical | encryption | Rushabh S Shah | Rushabh S Shah | +| [YARN-4677](https://issues.apache.org/jira/browse/YARN-4677) | RMNodeResourceUpdateEvent update from scheduler can lead to race condition | Major | graceful, resourcemanager, scheduler | Brook Zhou | Wilfred Spiegelenburg | +| [HADOOP-15137](https://issues.apache.org/jira/browse/HADOOP-15137) | ClassNotFoundException: org.apache.hadoop.yarn.server.api.DistributedSchedulingAMProtocol when using hadoop-client-minicluster | Major | . | Jeff Zhang | Bharat Viswanadham | +| [HADOOP-15514](https://issues.apache.org/jira/browse/HADOOP-15514) | NoClassDefFoundError for TimelineCollectorManager when starting MiniYARNCluster | Major | . | Jeff Zhang | Rohith Sharma K S | +| [HADOOP-15513](https://issues.apache.org/jira/browse/HADOOP-15513) | Add additional test cases to cover some corner cases for FileUtil#symlink | Major | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15516](https://issues.apache.org/jira/browse/HADOOP-15516) | Add test cases to cover FileUtil#readLink | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HADOOP-15506](https://issues.apache.org/jira/browse/HADOOP-15506) | Upgrade Azure Storage Sdk version to 7.0.0 and update corresponding code blocks | Minor | fs/azure | Esfandiar Manii | Esfandiar Manii | +| [YARN-6931](https://issues.apache.org/jira/browse/YARN-6931) | Make the aggregation interval in AppLevelTimelineCollector configurable | Minor | timelineserver | Haibo Chen | Abhishek Modi | +| [HADOOP-15529](https://issues.apache.org/jira/browse/HADOOP-15529) | ContainerLaunch#testInvalidEnvVariableSubstitutionType is not supported in Windows | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [YARN-8411](https://issues.apache.org/jira/browse/YARN-8411) | Enable stopped system services to be started during RM start | Critical | . | Billie Rinaldi | Billie Rinaldi | +| [YARN-8259](https://issues.apache.org/jira/browse/YARN-8259) | Revisit liveliness checks for Docker containers | Blocker | . | Shane Kumpf | Shane Kumpf | +| [HADOOP-15537](https://issues.apache.org/jira/browse/HADOOP-15537) | Clean up ContainerLaunch and ContainerExecutor pre-HADOOP-15528 | Minor | . | Giovanni Matteo Fumarola | Giovanni Matteo Fumarola | +| [HDFS-13186](https://issues.apache.org/jira/browse/HDFS-13186) | [PROVIDED Phase 2] Multipart Uploader API | Major | . | Ewan Higgs | Ewan Higgs | +| [HADOOP-15533](https://issues.apache.org/jira/browse/HADOOP-15533) | Make WASB listStatus messages consistent | Trivial | fs/azure | Esfandiar Manii | Esfandiar Manii | +| [HADOOP-14918](https://issues.apache.org/jira/browse/HADOOP-14918) | Remove the Local Dynamo DB test option | Major | fs/s3 | Steve Loughran | Gabor Bota | +| [HADOOP-14396](https://issues.apache.org/jira/browse/HADOOP-14396) | Add builder interface to FileContext | Major | fs | Lei (Eddy) Xu | Lei (Eddy) Xu | +| [HADOOP-15458](https://issues.apache.org/jira/browse/HADOOP-15458) | TestLocalFileSystem#testFSOutputStreamBuilder fails on Windows | Minor | test | Xiao Liang | Xiao Liang | +| [HADOOP-15416](https://issues.apache.org/jira/browse/HADOOP-15416) | s3guard diff assert failure if source path not found | Minor | fs/s3 | Steve Loughran | Gabor Bota | +| [HADOOP-15423](https://issues.apache.org/jira/browse/HADOOP-15423) | Merge fileCache and dirCache into one single cache in LocalMetadataStore | Minor | . | Gabor Bota | Gabor Bota | +| [YARN-8465](https://issues.apache.org/jira/browse/YARN-8465) | Dshell docker container gets marked as lost after NM restart | Major | yarn-native-services | Yesha Vora | Shane Kumpf | +| [YARN-8485](https://issues.apache.org/jira/browse/YARN-8485) | Priviledged container app launch is failing intermittently | Major | yarn-native-services | Yesha Vora | Eric Yang | +| [HDFS-13381](https://issues.apache.org/jira/browse/HDFS-13381) | [SPS]: Use DFSUtilClient#makePathFromFileId() to prepare satisfier file path | Major | . | Rakesh R | Rakesh R | +| [HADOOP-15215](https://issues.apache.org/jira/browse/HADOOP-15215) | s3guard set-capacity command to fail on read/write of 0 | Minor | fs/s3 | Steve Loughran | Gabor Bota | +| [HDFS-13528](https://issues.apache.org/jira/browse/HDFS-13528) | RBF: If a directory exceeds quota limit then quota usage is not refreshed for other mount entries | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [YARN-7556](https://issues.apache.org/jira/browse/YARN-7556) | Fair scheduler configuration should allow resource types in the minResources and maxResources properties | Critical | fairscheduler | Daniel Templeton | Szilard Nemeth | +| [HDFS-13710](https://issues.apache.org/jira/browse/HDFS-13710) | RBF: setQuota and getQuotaUsage should check the dfs.federation.router.quota.enable | Major | federation, hdfs | yanghuafeng | yanghuafeng | +| [YARN-7899](https://issues.apache.org/jira/browse/YARN-7899) | [AMRMProxy] Stateful FederationInterceptor for pending requests | Major | . | Botong Huang | Botong Huang | +| [HDFS-13726](https://issues.apache.org/jira/browse/HDFS-13726) | RBF: Fix RBF configuration links | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [HDFS-13475](https://issues.apache.org/jira/browse/HDFS-13475) | RBF: Admin cannot enforce Router enter SafeMode | Major | . | Wei Yan | Chao Sun | +| [YARN-8299](https://issues.apache.org/jira/browse/YARN-8299) | Yarn Service Upgrade: Add GET APIs that returns instances matching query params | Major | . | Chandni Singh | Chandni Singh | +| [HDFS-13733](https://issues.apache.org/jira/browse/HDFS-13733) | RBF: Add Web UI configurations and descriptions to RBF document | Minor | documentation | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-6995](https://issues.apache.org/jira/browse/YARN-6995) | Improve use of ResourceNotFoundException in resource types code | Minor | nodemanager, resourcemanager | Daniel Templeton | Szilard Nemeth | +| [HDFS-13743](https://issues.apache.org/jira/browse/HDFS-13743) | RBF: Router throws NullPointerException due to the invalid initialization of MountTableResolver | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8301](https://issues.apache.org/jira/browse/YARN-8301) | Yarn Service Upgrade: Add documentation | Critical | . | Chandni Singh | Chandni Singh | +| [HDFS-13076](https://issues.apache.org/jira/browse/HDFS-13076) | [SPS]: Cleanup work for HDFS-10285 | Major | . | Rakesh R | Rakesh R | +| [HDFS-13583](https://issues.apache.org/jira/browse/HDFS-13583) | RBF: Router admin clrQuota is not synchronized with nameservice | Major | . | Dibyendu Karmakar | Dibyendu Karmakar | +| [YARN-8546](https://issues.apache.org/jira/browse/YARN-8546) | Resource leak caused by a reserved container being released more than once under async scheduling | Major | capacity scheduler | Weiwei Yang | Tao Yang | +| [YARN-8175](https://issues.apache.org/jira/browse/YARN-8175) | Add support for Node Labels in SLS | Major | . | Abhishek Modi | Abhishek Modi | +| [HDFS-13084](https://issues.apache.org/jira/browse/HDFS-13084) | [SPS]: Fix the branch review comments | Major | . | Uma Maheswara Rao G | Rakesh R | +| [YARN-8263](https://issues.apache.org/jira/browse/YARN-8263) | DockerClient still touches hadoop.tmp.dir | Minor | . | Jason Lowe | Craig Condit | +| [YARN-7159](https://issues.apache.org/jira/browse/YARN-7159) | Normalize unit of resource objects in RM to avoid unit conversion in critical path | Critical | nodemanager, resourcemanager | Wangda Tan | Manikandan R | +| [YARN-8287](https://issues.apache.org/jira/browse/YARN-8287) | Update documentation and yarn-default related to the Docker runtime | Minor | . | Shane Kumpf | Craig Condit | +| [YARN-8624](https://issues.apache.org/jira/browse/YARN-8624) | Cleanup ENTRYPOINT documentation | Minor | . | Craig Condit | Craig Condit | +| [YARN-7089](https://issues.apache.org/jira/browse/YARN-7089) | Mark the log-aggregation-controller APIs as public | Major | . | Xuan Gong | Zian Chen | +| [HADOOP-15400](https://issues.apache.org/jira/browse/HADOOP-15400) | Improve S3Guard documentation on Authoritative Mode implementation | Minor | fs/s3 | Aaron Fabbri | Gabor Bota | +| [YARN-8136](https://issues.apache.org/jira/browse/YARN-8136) | Add version attribute to site doc examples and quickstart | Major | site | Gour Saha | Eric Yang | +| [YARN-8588](https://issues.apache.org/jira/browse/YARN-8588) | Logging improvements for better debuggability | Major | . | Suma Shivaprasad | Suma Shivaprasad | +| [YARN-8520](https://issues.apache.org/jira/browse/YARN-8520) | Document best practice for user management | Major | documentation, yarn | Eric Yang | Eric Yang | +| [HDFS-13808](https://issues.apache.org/jira/browse/HDFS-13808) | [SPS]: Remove unwanted FSNamesystem #isFileOpenedForWrite() and #getFileInfo() function | Minor | . | Rakesh R | Rakesh R | +| [HADOOP-15576](https://issues.apache.org/jira/browse/HADOOP-15576) | S3A Multipart Uploader to work with S3Guard and encryption | Blocker | fs/s3 | Steve Loughran | Ewan Higgs | +| [YARN-8561](https://issues.apache.org/jira/browse/YARN-8561) | [Submarine] Initial implementation: Training job submission and job history retrieval | Major | . | Wangda Tan | Wangda Tan | +| [HADOOP-15645](https://issues.apache.org/jira/browse/HADOOP-15645) | ITestS3GuardToolLocal.testDiffCommand fails if bucket has per-bucket binding to DDB | Blocker | fs/s3 | Steve Loughran | Steve Loughran | +| [YARN-7417](https://issues.apache.org/jira/browse/YARN-7417) | re-factory IndexedFileAggregatedLogsBlock and TFileAggregatedLogsBlock to remove duplicate codes | Major | . | Xuan Gong | Zian Chen | +| [YARN-8160](https://issues.apache.org/jira/browse/YARN-8160) | Yarn Service Upgrade: Support upgrade of service that use docker containers | Major | . | Chandni Singh | Chandni Singh | +| [HADOOP-15552](https://issues.apache.org/jira/browse/HADOOP-15552) | Move logging APIs over to slf4j in hadoop-tools - Part2 | Major | . | Giovanni Matteo Fumarola | Ian Pickering | +| [HADOOP-15642](https://issues.apache.org/jira/browse/HADOOP-15642) | Update aws-sdk version to 1.11.375 | Blocker | build, fs/s3 | Steve Loughran | Steve Loughran | +| [HADOOP-14154](https://issues.apache.org/jira/browse/HADOOP-14154) | Persist isAuthoritative bit in DynamoDBMetaStore (authoritative mode support) | Minor | fs/s3 | Rajesh Balamohan | Gabor Bota | +| [HADOOP-14624](https://issues.apache.org/jira/browse/HADOOP-14624) | Add GenericTestUtils.DelayAnswer that accept slf4j logger API | Major | . | Wenxin He | Wenxin He | +| [HDFS-13750](https://issues.apache.org/jira/browse/HDFS-13750) | RBF: Router ID in RouterRpcClient is always null | Major | . | Takanobu Asanuma | Takanobu Asanuma | +| [YARN-8129](https://issues.apache.org/jira/browse/YARN-8129) | Improve error message for invalid value in fields attribute | Minor | ATSv2 | Charan Hebri | Abhishek Modi | +| [YARN-7494](https://issues.apache.org/jira/browse/YARN-7494) | Add muti-node lookup mechanism and pluggable nodes sorting policies to optimize placement decision | Major | capacity scheduler | Sunil Govindan | Sunil Govindan | +| [YARN-8298](https://issues.apache.org/jira/browse/YARN-8298) | Yarn Service Upgrade: Support express upgrade of a service | Major | . | Chandni Singh | Chandni Singh | +| [YARN-8015](https://issues.apache.org/jira/browse/YARN-8015) | Support all types of placement constraint support for Capacity Scheduler | Critical | capacity scheduler | Weiwei Yang | Weiwei Yang | +| [HDFS-13848](https://issues.apache.org/jira/browse/HDFS-13848) | Refactor NameNode failover proxy providers | Major | ha, hdfs-client | Konstantin Shvachko | Konstantin Shvachko | +| [YARN-8705](https://issues.apache.org/jira/browse/YARN-8705) | Refactor the UAM heartbeat thread in preparation for YARN-8696 | Major | . | Botong Huang | Botong Huang | +| [HADOOP-15699](https://issues.apache.org/jira/browse/HADOOP-15699) | Fix some of testContainerManager failures in Windows | Major | . | Botong Huang | Botong Huang | +| [YARN-8697](https://issues.apache.org/jira/browse/YARN-8697) | LocalityMulticastAMRMProxyPolicy should fallback to random sub-cluster when cannot resolve resource | Major | . | Botong Huang | Botong Huang | +| [HDFS-13634](https://issues.apache.org/jira/browse/HDFS-13634) | RBF: Configurable value in xml for async connection request queue size. | Major | federation | CR Hota | CR Hota | +| [YARN-8642](https://issues.apache.org/jira/browse/YARN-8642) | Add support for tmpfs mounts with the Docker runtime | Major | . | Shane Kumpf | Craig Condit | +| [HADOOP-15667](https://issues.apache.org/jira/browse/HADOOP-15667) | FileSystemMultipartUploader should verify that UploadHandle has non-0 length | Major | fs/s3 | Ewan Higgs | Ewan Higgs | + + +### OTHER: + +| JIRA | Summary | Priority | Component | Reporter | Contributor | +|:---- |:---- | :--- |:---- |:---- |:---- | +| [YARN-8016](https://issues.apache.org/jira/browse/YARN-8016) | Refine PlacementRule interface and add a app-name queue mapping rule as an example | Major | . | Zian Chen | Zian Chen | +| [YARN-8091](https://issues.apache.org/jira/browse/YARN-8091) | Revisit checkUserAccessToQueue RM REST API | Critical | . | Wangda Tan | Wangda Tan | +| [YARN-8274](https://issues.apache.org/jira/browse/YARN-8274) | Docker command error during container relaunch | Critical | . | Billie Rinaldi | Jason Lowe | +| [YARN-8080](https://issues.apache.org/jira/browse/YARN-8080) | YARN native service should support component restart policy | Critical | . | Wangda Tan | Suma Shivaprasad | +| [HADOOP-15482](https://issues.apache.org/jira/browse/HADOOP-15482) | Upgrade jackson-databind to version 2.9.5 | Major | . | Lokesh Jain | Lokesh Jain | +| [YARN-7668](https://issues.apache.org/jira/browse/YARN-7668) | Remove unused variables from ContainerLocalizer | Trivial | . | Ray Chiang | Dedunu Dhananjaya | +| [YARN-8412](https://issues.apache.org/jira/browse/YARN-8412) | Move ResourceRequest.clone logic everywhere into a proper API | Minor | . | Botong Huang | Botong Huang | +| [HADOOP-15483](https://issues.apache.org/jira/browse/HADOOP-15483) | Upgrade jquery to version 3.3.1 | Major | . | Lokesh Jain | Lokesh Jain | +| [YARN-8506](https://issues.apache.org/jira/browse/YARN-8506) | Make GetApplicationsRequestPBImpl thread safe | Critical | . | Wangda Tan | Wangda Tan | +| [YARN-8545](https://issues.apache.org/jira/browse/YARN-8545) | YARN native service should return container if launch failed | Critical | . | Wangda Tan | Chandni Singh | +| [HDFS-11610](https://issues.apache.org/jira/browse/HDFS-11610) | sun.net.spi.nameservice.NameService has moved to a new location | Major | . | Akira Ajisaka | Akira Ajisaka | +| [HDFS-13788](https://issues.apache.org/jira/browse/HDFS-13788) | Update EC documentation about rack fault tolerance | Major | documentation, erasure-coding | Xiao Chen | Kitti Nanasi | +| [YARN-8488](https://issues.apache.org/jira/browse/YARN-8488) | YARN service/components/instances should have SUCCEEDED/FAILED states | Major | yarn-native-services | Wangda Tan | Suma Shivaprasad | diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.0/RELEASENOTES.3.2.0.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.0/RELEASENOTES.3.2.0.md new file mode 100644 index 00000000000..1f598a233c9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.0/RELEASENOTES.3.2.0.md @@ -0,0 +1,134 @@ + + +# Apache Hadoop 3.2.0 Release Notes + +These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements. + + +--- + +* [HADOOP-14667](https://issues.apache.org/jira/browse/HADOOP-14667) | *Major* | **Flexible Visual Studio support** + + + +This change updates the Microsoft Windows build directions to be more flexible with regards to Visual Studio compiler versions: + +* Any version of Visual Studio 2010 Pro or higher may be used. +* MSBuild Solution files are converted to the version of VS at build time +* Example command file to set command paths prior to using maven so that conversion works + +Additionally, Snappy and ISA-L that use bin as the location of the DLL will now be recognized without having to set their respective lib paths if the prefix is set. + +Note to contributors: + +It is very important that solutions for any patches remain at the VS 2010-level. + + +--- + +* [YARN-6257](https://issues.apache.org/jira/browse/YARN-6257) | *Minor* | **CapacityScheduler REST API produces incorrect JSON - JSON object operationsInfo contains deplicate key** + +**WARNING: No release note provided for this change.** + + +--- + +* [HADOOP-15146](https://issues.apache.org/jira/browse/HADOOP-15146) | *Minor* | **Remove DataOutputByteBuffer** + +**WARNING: No release note provided for this change.** + + +--- + +* [MAPREDUCE-7069](https://issues.apache.org/jira/browse/MAPREDUCE-7069) | *Major* | **Add ability to specify user environment variables individually** + +Environment variables for MapReduce tasks can now be specified as separate properties, e.g.: +mapreduce.map.env.VARNAME=value +mapreduce.reduce.env.VARNAME=value +yarn.app.mapreduce.am.env.VARNAME=value +yarn.app.mapreduce.am.admin.user.env.VARNAME=value +This form of specifying environment variables is useful when the value of an environment variable contains commas. + + +--- + +* [HADOOP-15446](https://issues.apache.org/jira/browse/HADOOP-15446) | *Major* | **WASB: PageBlobInputStream.skip breaks HBASE replication** + +WASB: Bug fix to support non-sequential page blob reads. Required for HBASE replication. + + +--- + +* [HDFS-13589](https://issues.apache.org/jira/browse/HDFS-13589) | *Major* | **Add dfsAdmin command to query if "upgrade" is finalized** + +New command is added to dfsadmin. +hdfs dfsadmin [-upgrade [query \| finalize] +1. -upgrade query gives the upgradeStatus +2. -upgrade finalize is equivalent to -finalizeUpgrade. + + +--- + +* [YARN-8191](https://issues.apache.org/jira/browse/YARN-8191) | *Major* | **Fair scheduler: queue deletion without RM restart** + +**WARNING: No release note provided for this change.** + + +--- + +* [HADOOP-15477](https://issues.apache.org/jira/browse/HADOOP-15477) | *Trivial* | **Make unjar in RunJar overrideable** + + +If `HADOOP_CLIENT_SKIP_UNJAR` environment variable is set to true, Apache Hadoop RunJar skips unjar the provided jar. + + +--- + +* [HADOOP-15506](https://issues.apache.org/jira/browse/HADOOP-15506) | *Minor* | **Upgrade Azure Storage Sdk version to 7.0.0 and update corresponding code blocks** + +WASB: Fix Spark process hang at shutdown due to use of non-daemon threads by updating Azure Storage Java SDK to 7.0 + + +--- + +* [HDFS-13174](https://issues.apache.org/jira/browse/HDFS-13174) | *Major* | **hdfs mover -p /path times out after 20 min** + +Mover could have fail after 20+ minutes if a block move was enqueued for this long, between two DataNodes due to an internal constant that was introduced for Balancer, but affected Mover as well. +The internal constant can be configured with the dfs.balancer.max-iteration-time parameter after the patch, and affects only the Balancer. Default is 20 minutes. + + +--- + +* [HADOOP-15495](https://issues.apache.org/jira/browse/HADOOP-15495) | *Major* | **Upgrade common-lang version to 3.7 in hadoop-common-project and hadoop-tools** + +commons-lang version 2.6 was removed from Apache Hadoop. If you are using commons-lang 2.6 as transitive dependency of Hadoop, you need to add the dependency directly. Note: this also means it is absent from share/hadoop/common/lib/ + + +--- + +* [HDFS-13322](https://issues.apache.org/jira/browse/HDFS-13322) | *Minor* | **fuse dfs - uid persists when switching between ticket caches** + +FUSE lib now recognize the change of the Kerberos ticket cache path if it was changed between two file system access in the same local user session via the KRB5CCNAME environment variable. + + +--- + +* [HADOOP-15638](https://issues.apache.org/jira/browse/HADOOP-15638) | *Major* | **KMS Accept Queue Size default changed from 500 to 128 in Hadoop 3.x** + +Restore the KMS accept queue size to 500 in Hadoop 3.x, making it the same as in Hadoop 2.x. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java index bd68dca22c7..058db921793 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java @@ -18,6 +18,7 @@ package org.apache.hadoop.crypto.key.kms; import static org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -26,11 +27,17 @@ import static org.mockito.Mockito.verify; import java.io.IOException; +import java.net.ConnectException; import java.net.NoRouteToHostException; +import java.net.SocketTimeoutException; import java.net.URI; import java.net.UnknownHostException; import java.security.GeneralSecurityException; import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.List; + +import javax.net.ssl.SSLHandshakeException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; @@ -44,13 +51,18 @@ import org.apache.hadoop.security.authorize.AuthorizationException; import org.junit.After; import org.junit.BeforeClass; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.Timeout; import org.mockito.Mockito; import com.google.common.collect.Sets; public class TestLoadBalancingKMSClientProvider { + @Rule + public Timeout testTimeout = new Timeout(30 * 1000); + @BeforeClass public static void setup() throws IOException { SecurityUtil.setTokenServiceUseIp(false); @@ -346,24 +358,27 @@ public void testWarmUpEncryptedKeysWhenOneProviderSucceeds() } /** - * Tests whether retryPolicy fails immediately, after trying each provider - * once, on encountering IOException which is not SocketException. + * Tests whether retryPolicy fails immediately on non-idempotent operations, + * after trying each provider once, + * on encountering IOException which is not SocketException. * @throws Exception */ @Test - public void testClientRetriesWithIOException() throws Exception { + public void testClientRetriesNonIdempotentOpWithIOExceptionFailsImmediately() + throws Exception { Configuration conf = new Configuration(); + final String keyName = "test"; // Setting total failover attempts to . conf.setInt( CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 10); KMSClientProvider p1 = mock(KMSClientProvider.class); - when(p1.getMetadata(Mockito.anyString())) + when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class))) .thenThrow(new IOException("p1")); KMSClientProvider p2 = mock(KMSClientProvider.class); - when(p2.getMetadata(Mockito.anyString())) + when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class))) .thenThrow(new IOException("p2")); KMSClientProvider p3 = mock(KMSClientProvider.class); - when(p3.getMetadata(Mockito.anyString())) + when(p3.createKey(Mockito.anyString(), Mockito.any(Options.class))) .thenThrow(new IOException("p3")); when(p1.getKMSUrl()).thenReturn("p1"); @@ -372,17 +387,61 @@ public void testClientRetriesWithIOException() throws Exception { LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider( new KMSClientProvider[] {p1, p2, p3}, 0, conf); try { - kp.getMetadata("test3"); + kp.createKey(keyName, new Options(conf)); fail("Should fail since all providers threw an IOException"); } catch (Exception e) { assertTrue(e instanceof IOException); } verify(kp.getProviders()[0], Mockito.times(1)) - .getMetadata(Mockito.eq("test3")); + .createKey(Mockito.eq(keyName), Mockito.any(Options.class)); + verify(kp.getProviders()[1], Mockito.times(1)) + .createKey(Mockito.eq(keyName), Mockito.any(Options.class)); + verify(kp.getProviders()[2], Mockito.times(1)) + .createKey(Mockito.eq(keyName), Mockito.any(Options.class)); + } + + /** + * Tests whether retryPolicy retries on idempotent operations + * when encountering IOException. + * @throws Exception + */ + @Test + public void testClientRetriesIdempotentOpWithIOExceptionSucceedsSecondTime() + throws Exception { + Configuration conf = new Configuration(); + final String keyName = "test"; + final KeyProvider.KeyVersion keyVersion + = new KMSClientProvider.KMSKeyVersion(keyName, "v1", + new byte[0]); + // Setting total failover attempts to . + conf.setInt( + CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 10); + KMSClientProvider p1 = mock(KMSClientProvider.class); + when(p1.getCurrentKey(Mockito.anyString())) + .thenThrow(new IOException("p1")) + .thenReturn(keyVersion); + KMSClientProvider p2 = mock(KMSClientProvider.class); + when(p2.getCurrentKey(Mockito.anyString())) + .thenThrow(new IOException("p2")); + KMSClientProvider p3 = mock(KMSClientProvider.class); + when(p3.getCurrentKey(Mockito.anyString())) + .thenThrow(new IOException("p3")); + + when(p1.getKMSUrl()).thenReturn("p1"); + when(p2.getKMSUrl()).thenReturn("p2"); + when(p3.getKMSUrl()).thenReturn("p3"); + LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider( + new KMSClientProvider[] {p1, p2, p3}, 0, conf); + + KeyProvider.KeyVersion result = kp.getCurrentKey(keyName); + + assertEquals(keyVersion, result); + verify(kp.getProviders()[0], Mockito.times(2)) + .getCurrentKey(Mockito.eq(keyName)); verify(kp.getProviders()[1], Mockito.times(1)) - .getMetadata(Mockito.eq("test3")); + .getCurrentKey(Mockito.eq(keyName)); verify(kp.getProviders()[2], Mockito.times(1)) - .getMetadata(Mockito.eq("test3")); + .getCurrentKey(Mockito.eq(keyName)); } /** @@ -638,4 +697,185 @@ public void testClientRetriesWithAuthenticationExceptionWrappedinIOException() verify(p2, Mockito.times(1)).createKey(Mockito.eq("test3"), Mockito.any(Options.class)); } + + /** + * Tests the operation succeeds second time after SSLHandshakeException. + * @throws Exception + */ + @Test + public void testClientRetriesWithSSLHandshakeExceptionSucceedsSecondTime() + throws Exception { + Configuration conf = new Configuration(); + conf.setInt( + CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3); + final String keyName = "test"; + KMSClientProvider p1 = mock(KMSClientProvider.class); + when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class))) + .thenThrow(new SSLHandshakeException("p1")) + .thenReturn(new KMSClientProvider.KMSKeyVersion(keyName, "v1", + new byte[0])); + KMSClientProvider p2 = mock(KMSClientProvider.class); + when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class))) + .thenThrow(new ConnectException("p2")); + + when(p1.getKMSUrl()).thenReturn("p1"); + when(p2.getKMSUrl()).thenReturn("p2"); + + LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider( + new KMSClientProvider[] {p1, p2}, 0, conf); + + kp.createKey(keyName, new Options(conf)); + verify(p1, Mockito.times(2)).createKey(Mockito.eq(keyName), + Mockito.any(Options.class)); + verify(p2, Mockito.times(1)).createKey(Mockito.eq(keyName), + Mockito.any(Options.class)); + } + + /** + * Tests the operation fails at every attempt after SSLHandshakeException. + * @throws Exception + */ + @Test + public void testClientRetriesWithSSLHandshakeExceptionFailsAtEveryAttempt() + throws Exception { + Configuration conf = new Configuration(); + conf.setInt( + CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 2); + final String keyName = "test"; + final String exceptionMessage = "p1 exception message"; + KMSClientProvider p1 = mock(KMSClientProvider.class); + Exception originalSslEx = new SSLHandshakeException(exceptionMessage); + when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class))) + .thenThrow(originalSslEx); + KMSClientProvider p2 = mock(KMSClientProvider.class); + when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class))) + .thenThrow(new ConnectException("p2 exception message")); + + when(p1.getKMSUrl()).thenReturn("p1"); + when(p2.getKMSUrl()).thenReturn("p2"); + + LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider( + new KMSClientProvider[] {p1, p2}, 0, conf); + + Exception interceptedEx = intercept(ConnectException.class, + "SSLHandshakeException: " + exceptionMessage, + ()-> kp.createKey(keyName, new Options(conf))); + assertEquals(originalSslEx, interceptedEx.getCause()); + + verify(p1, Mockito.times(2)).createKey(Mockito.eq(keyName), + Mockito.any(Options.class)); + verify(p2, Mockito.times(1)).createKey(Mockito.eq(keyName), + Mockito.any(Options.class)); + } + + /** + * Tests that if an idempotent operation succeeds second time after + * SocketTimeoutException, then the operation is successful. + * @throws Exception + */ + @Test + public void testClientRetriesIdempotentOpWithSocketTimeoutExceptionSucceeds() + throws Exception { + Configuration conf = new Configuration(); + conf.setInt( + CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3); + final List keys = Arrays.asList("testKey"); + KMSClientProvider p1 = mock(KMSClientProvider.class); + when(p1.getKeys()) + .thenThrow(new SocketTimeoutException("p1")) + .thenReturn(keys); + KMSClientProvider p2 = mock(KMSClientProvider.class); + when(p2.getKeys()).thenThrow(new SocketTimeoutException("p2")); + + when(p1.getKMSUrl()).thenReturn("p1"); + when(p2.getKMSUrl()).thenReturn("p2"); + + LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider( + new KMSClientProvider[] {p1, p2}, 0, conf); + + List result = kp.getKeys(); + assertEquals(keys, result); + verify(p1, Mockito.times(2)).getKeys(); + verify(p2, Mockito.times(1)).getKeys(); + } + + /** + * Tests that if a non idempotent operation fails at every attempt + * after SocketTimeoutException, then SocketTimeoutException is thrown. + * @throws Exception + */ + @Test + public void testClientRetriesIdempotentOpWithSocketTimeoutExceptionFails() + throws Exception { + Configuration conf = new Configuration(); + conf.setInt( + CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 2); + final String keyName = "test"; + final String exceptionMessage = "p1 exception message"; + KMSClientProvider p1 = mock(KMSClientProvider.class); + Exception originalEx = new SocketTimeoutException(exceptionMessage); + when(p1.getKeyVersions(Mockito.anyString())) + .thenThrow(originalEx); + KMSClientProvider p2 = mock(KMSClientProvider.class); + when(p2.getKeyVersions(Mockito.anyString())) + .thenThrow(new SocketTimeoutException("p2 exception message")); + + when(p1.getKMSUrl()).thenReturn("p1"); + when(p2.getKMSUrl()).thenReturn("p2"); + + LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider( + new KMSClientProvider[] {p1, p2}, 0, conf); + + Exception interceptedEx = intercept(SocketTimeoutException.class, + "SocketTimeoutException: " + exceptionMessage, + ()-> kp.getKeyVersions(keyName)); + assertEquals(originalEx, interceptedEx); + + verify(p1, Mockito.times(2)) + .getKeyVersions(Mockito.eq(keyName)); + verify(p2, Mockito.times(1)) + .getKeyVersions(Mockito.eq(keyName)); + } + + /** + * Tests whether retryPolicy fails immediately on non-idempotent operations, + * after trying each provider once, on encountering SocketTimeoutException. + * @throws Exception + */ + @Test + public void testClientRetriesNonIdempotentOpWithSocketTimeoutExceptionFails() + throws Exception { + Configuration conf = new Configuration(); + final String keyName = "test"; + // Setting total failover attempts to . + conf.setInt( + CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 10); + KMSClientProvider p1 = mock(KMSClientProvider.class); + when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class))) + .thenThrow(new SocketTimeoutException("p1")); + KMSClientProvider p2 = mock(KMSClientProvider.class); + when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class))) + .thenThrow(new SocketTimeoutException("p2")); + KMSClientProvider p3 = mock(KMSClientProvider.class); + when(p3.createKey(Mockito.anyString(), Mockito.any(Options.class))) + .thenThrow(new SocketTimeoutException("p3")); + + when(p1.getKMSUrl()).thenReturn("p1"); + when(p2.getKMSUrl()).thenReturn("p2"); + when(p3.getKMSUrl()).thenReturn("p3"); + LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider( + new KMSClientProvider[] {p1, p2, p3}, 0, conf); + try { + kp.createKey(keyName, new Options(conf)); + fail("Should fail since all providers threw a SocketTimeoutException"); + } catch (Exception e) { + assertTrue(e instanceof SocketTimeoutException); + } + verify(kp.getProviders()[0], Mockito.times(1)) + .createKey(Mockito.eq(keyName), Mockito.any(Options.class)); + verify(kp.getProviders()[1], Mockito.times(1)) + .createKey(Mockito.eq(keyName), Mockito.any(Options.class)); + verify(kp.getProviders()[2], Mockito.times(1)) + .createKey(Mockito.eq(keyName), Mockito.any(Options.class)); + } } \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/AbstractSystemMultipartUploaderTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/AbstractSystemMultipartUploaderTest.java deleted file mode 100644 index f132089a9e2..00000000000 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/AbstractSystemMultipartUploaderTest.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.tuple.Pair; - -import org.junit.Test; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - -public abstract class AbstractSystemMultipartUploaderTest { - - abstract FileSystem getFS() throws IOException; - - abstract Path getBaseTestPath(); - - @Test - public void testMultipartUpload() throws Exception { - FileSystem fs = getFS(); - Path file = new Path(getBaseTestPath(), "some-file"); - MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); - UploadHandle uploadHandle = mpu.initialize(file); - List> partHandles = new ArrayList<>(); - StringBuilder sb = new StringBuilder(); - for (int i = 1; i <= 100; ++i) { - String contents = "ThisIsPart" + i + "\n"; - sb.append(contents); - int len = contents.getBytes().length; - InputStream is = IOUtils.toInputStream(contents, "UTF-8"); - PartHandle partHandle = mpu.putPart(file, is, i, uploadHandle, len); - partHandles.add(Pair.of(i, partHandle)); - } - PathHandle fd = mpu.complete(file, partHandles, uploadHandle); - byte[] fdData = IOUtils.toByteArray(fs.open(fd)); - byte[] fileData = IOUtils.toByteArray(fs.open(file)); - String readString = new String(fdData); - assertEquals(sb.toString(), readString); - assertArrayEquals(fdData, fileData); - } - - @Test - public void testMultipartUploadReverseOrder() throws Exception { - FileSystem fs = getFS(); - Path file = new Path(getBaseTestPath(), "some-file"); - MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); - UploadHandle uploadHandle = mpu.initialize(file); - List> partHandles = new ArrayList<>(); - StringBuilder sb = new StringBuilder(); - for (int i = 1; i <= 100; ++i) { - String contents = "ThisIsPart" + i + "\n"; - sb.append(contents); - } - for (int i = 100; i > 0; --i) { - String contents = "ThisIsPart" + i + "\n"; - int len = contents.getBytes().length; - InputStream is = IOUtils.toInputStream(contents, "UTF-8"); - PartHandle partHandle = mpu.putPart(file, is, i, uploadHandle, len); - partHandles.add(Pair.of(i, partHandle)); - } - PathHandle fd = mpu.complete(file, partHandles, uploadHandle); - byte[] fdData = IOUtils.toByteArray(fs.open(fd)); - byte[] fileData = IOUtils.toByteArray(fs.open(file)); - String readString = new String(fdData); - assertEquals(sb.toString(), readString); - assertArrayEquals(fdData, fileData); - } - - @Test - public void testMultipartUploadReverseOrderNoNContiguousPartNumbers() - throws Exception { - FileSystem fs = getFS(); - Path file = new Path(getBaseTestPath(), "some-file"); - MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); - UploadHandle uploadHandle = mpu.initialize(file); - List> partHandles = new ArrayList<>(); - StringBuilder sb = new StringBuilder(); - for (int i = 2; i <= 200; i += 2) { - String contents = "ThisIsPart" + i + "\n"; - sb.append(contents); - } - for (int i = 200; i > 0; i -= 2) { - String contents = "ThisIsPart" + i + "\n"; - int len = contents.getBytes().length; - InputStream is = IOUtils.toInputStream(contents, "UTF-8"); - PartHandle partHandle = mpu.putPart(file, is, i, uploadHandle, len); - partHandles.add(Pair.of(i, partHandle)); - } - PathHandle fd = mpu.complete(file, partHandles, uploadHandle); - byte[] fdData = IOUtils.toByteArray(fs.open(fd)); - byte[] fileData = IOUtils.toByteArray(fs.open(file)); - String readString = new String(fdData); - assertEquals(sb.toString(), readString); - assertArrayEquals(fdData, fileData); - } - - @Test - public void testMultipartUploadAbort() throws Exception { - FileSystem fs = getFS(); - Path file = new Path(getBaseTestPath(), "some-file"); - MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); - UploadHandle uploadHandle = mpu.initialize(file); - for (int i = 100; i >= 50; --i) { - String contents = "ThisIsPart" + i + "\n"; - int len = contents.getBytes().length; - InputStream is = IOUtils.toInputStream(contents, "UTF-8"); - PartHandle partHandle = mpu.putPart(file, is, i, uploadHandle, len); - } - mpu.abort(file, uploadHandle); - - String contents = "ThisIsPart49\n"; - int len = contents.getBytes().length; - InputStream is = IOUtils.toInputStream(contents, "UTF-8"); - - try { - mpu.putPart(file, is, 49, uploadHandle, len); - fail("putPart should have thrown an exception"); - } catch (IOException ok) { - // ignore - } - } -} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java index 62ecd9f13a5..c07a6ffa344 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java @@ -369,6 +369,44 @@ public void testListStatus() throws Exception { pathsIterator = fc.listStatus(getTestRootPath(fc, "test/hadoop/a")); Assert.assertFalse(pathsIterator.hasNext()); } + + @Test + public void testListFiles() throws Exception { + Path[] testDirs = { + getTestRootPath(fc, "test/dir1"), + getTestRootPath(fc, "test/dir1/dir1"), + getTestRootPath(fc, "test/dir2") + }; + Path[] testFiles = { + new Path(testDirs[0], "file1"), + new Path(testDirs[0], "file2"), + new Path(testDirs[1], "file2"), + new Path(testDirs[2], "file1") + }; + + for (Path path : testDirs) { + fc.mkdir(path, FsPermission.getDefault(), true); + } + for (Path p : testFiles) { + FSDataOutputStream out = fc.create(p).build(); + out.writeByte(0); + out.close(); + } + + RemoteIterator filesIterator = + fc.util().listFiles(getTestRootPath(fc, "test"), true); + LocatedFileStatus[] fileStats = + new LocatedFileStatus[testFiles.length]; + for (int i = 0; i < fileStats.length; i++) { + assertTrue(filesIterator.hasNext()); + fileStats[i] = filesIterator.next(); + } + assertFalse(filesIterator.hasNext()); + + for (Path p : testFiles) { + assertTrue(containsPath(p, fileStats)); + } + } @Test public void testListStatusFilterWithNoMatches() throws Exception { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellTouch.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellTouch.java index 5fe4e39ade8..2e7cb5d6342 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellTouch.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellTouch.java @@ -21,7 +21,11 @@ import static org.hamcrest.CoreMatchers.not; import static org.junit.Assert.assertThat; +import java.text.ParseException; +import java.util.Date; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.shell.TouchCommands.Touch; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.StringUtils; import org.junit.Before; @@ -85,4 +89,103 @@ public void testTouchz() throws Exception { assertThat("Expected failed touchz in a non-existent directory", shellRun("-touchz", noDirName + "/foo"), is(not(0))); } + + @Test + public void testTouch() throws Exception { + // Ensure newFile2 does not exist + final String newFileName = "newFile2"; + final Path newFile = new Path(newFileName); + lfs.delete(newFile, true); + assertThat(lfs.exists(newFile), is(false)); + + { + assertThat( + "Expected successful touch on a non-existent file with -c option", + shellRun("-touch", "-c", newFileName), is(not(0))); + assertThat(lfs.exists(newFile), is(false)); + } + + { + String strTime = formatTimestamp(System.currentTimeMillis()); + Date dateObj = parseTimestamp(strTime); + + assertThat( + "Expected successful touch on a new file with a specified timestamp", + shellRun("-touch", "-t", strTime, newFileName), is(0)); + FileStatus new_status = lfs.getFileStatus(newFile); + assertThat(new_status.getAccessTime(), is(dateObj.getTime())); + assertThat(new_status.getModificationTime(), is(dateObj.getTime())); + } + + FileStatus fstatus = lfs.getFileStatus(newFile); + + { + String strTime = formatTimestamp(System.currentTimeMillis()); + Date dateObj = parseTimestamp(strTime); + + assertThat("Expected successful touch with a specified access time", + shellRun("-touch", "-a", "-t", strTime, newFileName), is(0)); + FileStatus new_status = lfs.getFileStatus(newFile); + // Verify if access time is recorded correctly (and modification time + // remains unchanged). + assertThat(new_status.getAccessTime(), is(dateObj.getTime())); + assertThat(new_status.getModificationTime(), + is(fstatus.getModificationTime())); + } + + fstatus = lfs.getFileStatus(newFile); + + { + String strTime = formatTimestamp(System.currentTimeMillis()); + Date dateObj = parseTimestamp(strTime); + + assertThat( + "Expected successful touch with a specified modificatiom time", + shellRun("-touch", "-m", "-t", strTime, newFileName), is(0)); + // Verify if modification time is recorded correctly (and access time + // remains unchanged). + FileStatus new_status = lfs.getFileStatus(newFile); + assertThat(new_status.getAccessTime(), is(fstatus.getAccessTime())); + assertThat(new_status.getModificationTime(), is(dateObj.getTime())); + } + + { + String strTime = formatTimestamp(System.currentTimeMillis()); + Date dateObj = parseTimestamp(strTime); + + assertThat("Expected successful touch with a specified timestamp", + shellRun("-touch", "-t", strTime, newFileName), is(0)); + + // Verify if both modification and access times are recorded correctly + FileStatus new_status = lfs.getFileStatus(newFile); + assertThat(new_status.getAccessTime(), is(dateObj.getTime())); + assertThat(new_status.getModificationTime(), is(dateObj.getTime())); + } + + { + String strTime = formatTimestamp(System.currentTimeMillis()); + Date dateObj = parseTimestamp(strTime); + + assertThat("Expected successful touch with a specified timestamp", + shellRun("-touch", "-a", "-m", "-t", strTime, newFileName), is(0)); + + // Verify if both modification and access times are recorded correctly + FileStatus new_status = lfs.getFileStatus(newFile); + assertThat(new_status.getAccessTime(), is(dateObj.getTime())); + assertThat(new_status.getModificationTime(), is(dateObj.getTime())); + } + + { + assertThat("Expected failed touch with a missing timestamp", + shellRun("-touch", "-t", newFileName), is(not(0))); + } + } + + private String formatTimestamp(long timeInMillis) { + return (new Touch()).getDateFormat().format(new Date(timeInMillis)); + } + + private Date parseTimestamp(String tstamp) throws ParseException { + return (new Touch()).getDateFormat().parse(tstamp); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java index fa2d21fb423..568821be280 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java @@ -517,6 +517,60 @@ public void testTrash() throws IOException { trashShell(FileSystem.getLocal(conf), TEST_DIR); } + @Test + public void testExistingFileTrash() throws IOException { + Configuration conf = new Configuration(); + conf.setClass("fs.file.impl", TestLFS.class, FileSystem.class); + FileSystem fs = FileSystem.getLocal(conf); + conf.set("fs.defaultFS", fs.getUri().toString()); + conf.setLong(FS_TRASH_INTERVAL_KEY, 0); // disabled + assertFalse(new Trash(conf).isEnabled()); + + conf.setLong(FS_TRASH_INTERVAL_KEY, 10); // 10 minute + assertTrue(new Trash(conf).isEnabled()); + + FsShell shell = new FsShell(); + shell.setConf(conf); + + // First create a new directory with mkdirs + Path myPath = new Path(TEST_DIR, "test/mkdirs"); + mkdir(fs, myPath); + + // Second, create a file in that directory. + Path myFile = new Path(TEST_DIR, "test/mkdirs/myExistingFile"); + writeFile(fs, myFile, 10); + // First rm a file + mkdir(fs, myPath); + writeFile(fs, myFile, 10); + + String[] args1 = new String[2]; + args1[0] = "-rm"; + args1[1] = myFile.toString(); + int val1 = -1; + try { + val1 = shell.run(args1); + } catch (Exception e) { + System.err.println("Exception raised from Trash.run " + + e.getLocalizedMessage()); + } + assertTrue(val1 == 0); + + // Second rm a file which parent path is the same as above + mkdir(fs, myFile); + writeFile(fs, new Path(myFile, "mySubFile"), 10); + String[] args2 = new String[2]; + args2[0] = "-rm"; + args2[1] = new Path(myFile, "mySubFile").toString(); + int val2 = -1; + try { + val2 = shell.run(args2); + } catch (Exception e) { + System.err.println("Exception raised from Trash.run " + + e.getLocalizedMessage()); + } + assertTrue(val2 == 0); + } + @Test public void testNonDefaultFS() throws IOException { Configuration conf = new Configuration(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMultipartUploaderTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMultipartUploaderTest.java new file mode 100644 index 00000000000..85a68616371 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMultipartUploaderTest.java @@ -0,0 +1,343 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.contract; + +import java.io.ByteArrayInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.List; + +import com.google.common.base.Charsets; +import org.junit.Test; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.fs.BBUploadHandle; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.MultipartUploader; +import org.apache.hadoop.fs.MultipartUploaderFactory; +import org.apache.hadoop.fs.PartHandle; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathHandle; +import org.apache.hadoop.fs.UploadHandle; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.verifyPathExists; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public abstract class AbstractContractMultipartUploaderTest extends + AbstractFSContractTestBase { + + /** + * The payload is the part number repeated for the length of the part. + * This makes checking the correctness of the upload straightforward. + * @param partNumber part number + * @return the bytes to upload. + */ + private byte[] generatePayload(int partNumber) { + int sizeInBytes = partSizeInBytes(); + ByteBuffer buffer = ByteBuffer.allocate(sizeInBytes); + for (int i=0 ; i < sizeInBytes/(Integer.SIZE/Byte.SIZE); ++i) { + buffer.putInt(partNumber); + } + return buffer.array(); + } + + /** + * Load a path, make an MD5 digest. + * @param path path to load + * @return the digest array + * @throws IOException failure to read or digest the file. + */ + protected byte[] digest(Path path) throws IOException { + FileSystem fs = getFileSystem(); + try (InputStream in = fs.open(path)) { + byte[] fdData = IOUtils.toByteArray(in); + MessageDigest newDigest = DigestUtils.getMd5Digest(); + return newDigest.digest(fdData); + } + } + + /** + * Get the partition size in bytes to use for each upload. + * @return a number > 0 + */ + protected abstract int partSizeInBytes(); + + /** + * Get the number of test payloads to upload. + * @return a number > 1 + */ + protected int getTestPayloadCount() { + return 10; + } + + /** + * Assert that a multipart upload is successful. + * @throws Exception failure + */ + @Test + public void testSingleUpload() throws Exception { + FileSystem fs = getFileSystem(); + Path file = path("testSingleUpload"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + UploadHandle uploadHandle = mpu.initialize(file); + List> partHandles = new ArrayList<>(); + MessageDigest origDigest = DigestUtils.getMd5Digest(); + byte[] payload = generatePayload(1); + origDigest.update(payload); + InputStream is = new ByteArrayInputStream(payload); + PartHandle partHandle = mpu.putPart(file, is, 1, uploadHandle, + payload.length); + partHandles.add(Pair.of(1, partHandle)); + PathHandle fd = completeUpload(file, mpu, uploadHandle, partHandles, + origDigest, + payload.length); + + // Complete is idempotent + PathHandle fd2 = mpu.complete(file, partHandles, uploadHandle); + assertArrayEquals("Path handles differ", fd.toByteArray(), + fd2.toByteArray()); + } + + private PathHandle completeUpload(final Path file, + final MultipartUploader mpu, + final UploadHandle uploadHandle, + final List> partHandles, + final MessageDigest origDigest, + final int expectedLength) throws IOException { + PathHandle fd = mpu.complete(file, partHandles, uploadHandle); + + FileStatus status = verifyPathExists(getFileSystem(), + "Completed file", file); + assertEquals("length of " + status, + expectedLength, status.getLen()); + + assertArrayEquals("digest of source and " + file + + " differ", + origDigest.digest(), digest(file)); + return fd; + } + + /** + * Assert that a multipart upload is successful. + * @throws Exception failure + */ + @Test + public void testMultipartUpload() throws Exception { + FileSystem fs = getFileSystem(); + Path file = path("testMultipartUpload"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + UploadHandle uploadHandle = mpu.initialize(file); + List> partHandles = new ArrayList<>(); + MessageDigest origDigest = DigestUtils.getMd5Digest(); + final int payloadCount = getTestPayloadCount(); + for (int i = 1; i <= payloadCount; ++i) { + byte[] payload = generatePayload(i); + origDigest.update(payload); + InputStream is = new ByteArrayInputStream(payload); + PartHandle partHandle = mpu.putPart(file, is, i, uploadHandle, + payload.length); + partHandles.add(Pair.of(i, partHandle)); + } + completeUpload(file, mpu, uploadHandle, partHandles, origDigest, + payloadCount * partSizeInBytes()); + } + + /** + * Assert that a multipart upload is successful even when the parts are + * given in the reverse order. + */ + @Test + public void testMultipartUploadReverseOrder() throws Exception { + FileSystem fs = getFileSystem(); + Path file = path("testMultipartUploadReverseOrder"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + UploadHandle uploadHandle = mpu.initialize(file); + List> partHandles = new ArrayList<>(); + MessageDigest origDigest = DigestUtils.getMd5Digest(); + final int payloadCount = getTestPayloadCount(); + for (int i = 1; i <= payloadCount; ++i) { + byte[] payload = generatePayload(i); + origDigest.update(payload); + } + for (int i = payloadCount; i > 0; --i) { + byte[] payload = generatePayload(i); + InputStream is = new ByteArrayInputStream(payload); + PartHandle partHandle = mpu.putPart(file, is, i, uploadHandle, + payload.length); + partHandles.add(Pair.of(i, partHandle)); + } + completeUpload(file, mpu, uploadHandle, partHandles, origDigest, + payloadCount * partSizeInBytes()); + } + + /** + * Assert that a multipart upload is successful even when the parts are + * given in reverse order and the part numbers are not contiguous. + */ + @Test + public void testMultipartUploadReverseOrderNonContiguousPartNumbers() + throws Exception { + describe("Upload in reverse order and the part numbers are not contiguous"); + FileSystem fs = getFileSystem(); + Path file = path("testMultipartUploadReverseOrderNonContiguousPartNumbers"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + UploadHandle uploadHandle = mpu.initialize(file); + List> partHandles = new ArrayList<>(); + MessageDigest origDigest = DigestUtils.getMd5Digest(); + int payloadCount = 2 * getTestPayloadCount(); + for (int i = 2; i <= payloadCount; i += 2) { + byte[] payload = generatePayload(i); + origDigest.update(payload); + } + for (int i = payloadCount; i > 0; i -= 2) { + byte[] payload = generatePayload(i); + InputStream is = new ByteArrayInputStream(payload); + PartHandle partHandle = mpu.putPart(file, is, i, uploadHandle, + payload.length); + partHandles.add(Pair.of(i, partHandle)); + } + completeUpload(file, mpu, uploadHandle, partHandles, origDigest, + getTestPayloadCount() * partSizeInBytes()); + } + + /** + * Assert that when we abort a multipart upload, the resulting file does + * not show up. + */ + @Test + public void testMultipartUploadAbort() throws Exception { + describe("Upload and then abort it before completing"); + FileSystem fs = getFileSystem(); + Path file = path("testMultipartUploadAbort"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + UploadHandle uploadHandle = mpu.initialize(file); + List> partHandles = new ArrayList<>(); + for (int i = 20; i >= 10; --i) { + byte[] payload = generatePayload(i); + InputStream is = new ByteArrayInputStream(payload); + PartHandle partHandle = mpu.putPart(file, is, i, uploadHandle, + payload.length); + partHandles.add(Pair.of(i, partHandle)); + } + mpu.abort(file, uploadHandle); + + String contents = "ThisIsPart49\n"; + int len = contents.getBytes(Charsets.UTF_8).length; + InputStream is = IOUtils.toInputStream(contents, "UTF-8"); + + intercept(IOException.class, + () -> mpu.putPart(file, is, 49, uploadHandle, len)); + intercept(IOException.class, + () -> mpu.complete(file, partHandles, uploadHandle)); + + assertPathDoesNotExist("Uploaded file should not exist", file); + } + + /** + * Trying to abort from an invalid handle must fail. + */ + @Test + public void testAbortUnknownUpload() throws Exception { + FileSystem fs = getFileSystem(); + Path file = path("testAbortUnknownUpload"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + ByteBuffer byteBuffer = ByteBuffer.wrap( + "invalid-handle".getBytes(Charsets.UTF_8)); + UploadHandle uploadHandle = BBUploadHandle.from(byteBuffer); + intercept(FileNotFoundException.class, () -> mpu.abort(file, uploadHandle)); + } + + /** + * Trying to abort with a handle of size 0 must fail. + */ + @Test + public void testAbortEmptyUploadHandle() throws Exception { + FileSystem fs = getFileSystem(); + Path file = path("testAbortEmptyUpload"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + ByteBuffer byteBuffer = ByteBuffer.wrap(new byte[0]); + UploadHandle uploadHandle = BBUploadHandle.from(byteBuffer); + intercept(IllegalArgumentException.class, + () -> mpu.abort(file, uploadHandle)); + } + + /** + * When we complete with no parts provided, it must fail. + */ + @Test + public void testCompleteEmptyUpload() throws Exception { + describe("Expect an empty MPU to fail, but still be abortable"); + FileSystem fs = getFileSystem(); + Path dest = path("testCompleteEmptyUpload"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + UploadHandle handle = mpu.initialize(dest); + intercept(IOException.class, + () -> mpu.complete(dest, new ArrayList<>(), handle)); + mpu.abort(dest, handle); + } + + /** + * When we pass empty uploadID, putPart throws IllegalArgumentException. + * @throws Exception + */ + @Test + public void testPutPartEmptyUploadID() throws Exception { + describe("Expect IllegalArgumentException when putPart uploadID is empty"); + FileSystem fs = getFileSystem(); + Path dest = path("testCompleteEmptyUpload"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + mpu.initialize(dest); + UploadHandle emptyHandle = + BBUploadHandle.from(ByteBuffer.wrap(new byte[0])); + byte[] payload = generatePayload(1); + InputStream is = new ByteArrayInputStream(payload); + intercept(IllegalArgumentException.class, + () -> mpu.putPart(dest, is, 1, emptyHandle, payload.length)); + } + + /** + * When we pass empty uploadID, complete throws IllegalArgumentException. + * @throws Exception + */ + @Test + public void testCompleteEmptyUploadID() throws Exception { + describe("Expect IllegalArgumentException when complete uploadID is empty"); + FileSystem fs = getFileSystem(); + Path dest = path("testCompleteEmptyUpload"); + MultipartUploader mpu = MultipartUploaderFactory.get(fs, null); + UploadHandle realHandle = mpu.initialize(dest); + UploadHandle emptyHandle = + BBUploadHandle.from(ByteBuffer.wrap(new byte[0])); + List> partHandles = new ArrayList<>(); + byte[] payload = generatePayload(1); + InputStream is = new ByteArrayInputStream(payload); + PartHandle partHandle = mpu.putPart(dest, is, 1, realHandle, + payload.length); + partHandles.add(Pair.of(1, partHandle)); + + intercept(IllegalArgumentException.class, + () -> mpu.complete(dest, partHandles, emptyHandle)); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java index b6d0a4919be..5b76a753de1 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java @@ -69,7 +69,7 @@ public void testRenameNonexistentFile() throws Throwable { } else { // at least one FS only returns false here, if that is the case // warn but continue - getLog().warn("Rename returned {} renaming a nonexistent file", renamed); + getLogger().warn("Rename returned {} renaming a nonexistent file", renamed); assertFalse("Renaming a missing file returned true", renamed); } } catch (FileNotFoundException e) { @@ -118,7 +118,7 @@ public void testRenameFileOverExistingFile() throws Throwable { if (renamed && !renameReturnsFalseOnRenameDestExists) { //expected an exception String destDirLS = generateAndLogErrorListing(srcFile, destFile); - getLog().error("dest dir {}", destDirLS); + getLogger().error("dest dir {}", destDirLS); fail("expected rename(" + srcFile + ", " + destFile + " ) to fail," + " but got success and destination of " + destDirLS); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java index 7af3cb0a525..3c1377a5a49 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java @@ -132,7 +132,7 @@ public void testBlockReadZeroByteFile() throws Throwable { @Test public void testSeekReadClosedFile() throws Throwable { instream = getFileSystem().open(smallSeekFile); - getLog().debug( + getLogger().debug( "Stream is of type " + instream.getClass().getCanonicalName()); instream.close(); try { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java index b2e68f5316e..1cd2164fad3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java @@ -110,7 +110,7 @@ public FileSystem getFileSystem() { * Get the log of the base class. * @return a logger */ - public static Logger getLog() { + public static Logger getLogger() { return LOG; } @@ -281,7 +281,7 @@ protected void handleRelaxedException(String action, * @param e exception raised. */ protected void handleExpectedException(Exception e) { - getLog().debug("expected :{}" ,e, e); + getLogger().debug("expected :{}" ,e, e); } /** @@ -366,7 +366,7 @@ protected boolean rename(Path src, Path dst) throws IOException { protected String generateAndLogErrorListing(Path src, Path dst) throws IOException { FileSystem fs = getFileSystem(); - getLog().error( + getLogger().error( "src dir " + ContractTestUtils.ls(fs, src.getParent())); String destDirLS = ContractTestUtils.ls(fs, dst.getParent()); if (fs.isDirectory(dst)) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystemMultipartUploader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractMultipartUploader.java similarity index 53% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystemMultipartUploader.java rename to hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractMultipartUploader.java index 21d01b6cdbd..a50d2e41b14 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystemMultipartUploader.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractMultipartUploader.java @@ -15,51 +15,29 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.fs; +package org.apache.hadoop.fs.contract.localfs; import org.apache.hadoop.conf.Configuration; -import static org.apache.hadoop.test.GenericTestUtils.getRandomizedTestDir; - -import org.junit.After; -import org.junit.Before; -import org.junit.BeforeClass; - -import java.io.File; -import java.io.IOException; +import org.apache.hadoop.fs.contract.AbstractContractMultipartUploaderTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; /** * Test the FileSystemMultipartUploader on local file system. */ -public class TestLocalFileSystemMultipartUploader - extends AbstractSystemMultipartUploaderTest { - - private static FileSystem fs; - private File tmp; - - @BeforeClass - public static void init() throws IOException { - fs = LocalFileSystem.getLocal(new Configuration()); - } - - @Before - public void setup() throws IOException { - tmp = getRandomizedTestDir(); - tmp.mkdirs(); - } - - @After - public void tearDown() throws IOException { - tmp.delete(); - } +public class TestLocalFSContractMultipartUploader + extends AbstractContractMultipartUploaderTest { @Override - public FileSystem getFS() { - return fs; + protected AbstractFSContract createContract(Configuration conf) { + return new LocalFSContract(conf); } + /** + * There is no real need to upload any particular size. + * @return 1 kilobyte + */ @Override - public Path getBaseTestPath() { - return new Path(tmp.getAbsolutePath()); + protected int partSizeInBytes() { + return 1024; } - -} \ No newline at end of file +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServer.java index 26b1137e49b..e0c87e93a9a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServer.java @@ -55,6 +55,7 @@ import javax.ws.rs.core.MediaType; import java.io.IOException; import java.io.PrintWriter; +import java.lang.reflect.Field; import java.net.HttpURLConnection; import java.net.URI; import java.net.URL; @@ -702,6 +703,19 @@ public void testBacklogSize() throws Exception assertEquals(backlogSize, listener.getAcceptQueueSize()); } + @Test + public void testIdleTimeout() throws Exception { + final int idleTimeout = 1000; + Configuration conf = new Configuration(); + conf.setInt(HttpServer2.HTTP_IDLE_TIMEOUT_MS_KEY, idleTimeout); + HttpServer2 srv = createServer("test", conf); + Field f = HttpServer2.class.getDeclaredField("listeners"); + f.setAccessible(true); + List listeners = (List) f.get(srv); + ServerConnector listener = (ServerConnector)listeners.get(0); + assertEquals(idleTimeout, listener.getIdleTimeout()); + } + @Test public void testHttpResponseDefaultHeaders() throws Exception { Configuration conf = new Configuration(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestIsActiveServlet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestIsActiveServlet.java new file mode 100644 index 00000000000..5f5d51e1d7d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestIsActiveServlet.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.http; + + +import org.junit.Before; +import org.junit.Test; + +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintWriter; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + + +/** + * Test if the {@link IsActiveServlet} returns the right answer if the + * underlying service is active. + */ +public class TestIsActiveServlet { + + private IsActiveServlet servlet; + private HttpServletRequest req; + private HttpServletResponse resp; + private ByteArrayOutputStream respOut; + + @Before + public void setUp() throws Exception { + req = mock(HttpServletRequest.class); + resp = mock(HttpServletResponse.class); + respOut = new ByteArrayOutputStream(); + PrintWriter writer = new PrintWriter(respOut); + when(resp.getWriter()).thenReturn(writer); + } + + @Test + public void testSucceedsOnActive() throws IOException { + servlet = new IsActiveServlet() { + @Override + protected boolean isActive() { + return true; + } + }; + + String response = doGet(); + verify(resp, never()).sendError(anyInt(), anyString()); + assertEquals(IsActiveServlet.RESPONSE_ACTIVE, response); + } + + @Test + public void testFailsOnInactive() throws IOException { + servlet = new IsActiveServlet() { + @Override + protected boolean isActive() { + return false; + } + }; + + doGet(); + verify(resp, atLeastOnce()).sendError( + eq(HttpServletResponse.SC_METHOD_NOT_ALLOWED), + eq(IsActiveServlet.RESPONSE_NOT_ACTIVE)); + } + + private String doGet() throws IOException { + servlet.doGet(req, resp); + return new String(respOut.toByteArray(), "UTF-8"); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java index 5af6d6fb56a..21664648080 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java @@ -48,7 +48,7 @@ /** * This testcase issues SSL certificates configures the HttpServer to serve - * HTTPS using the created certficates and calls an echo servlet using the + * HTTPS using the created certificates and calls an echo servlet using the * corresponding HTTPS URL. */ public class TestSSLHttpServer extends HttpServerFunctionalTest { @@ -58,11 +58,15 @@ private static final Logger LOG = LoggerFactory.getLogger(TestSSLHttpServer.class); + private static final String HTTPS_CIPHER_SUITES_KEY = "https.cipherSuites"; + private static final String JAVAX_NET_DEBUG_KEY = "javax.net.debug"; private static Configuration conf; private static HttpServer2 server; private static String keystoresDir; private static String sslConfDir; private static SSLFactory clientSslFactory; + private static String cipherSuitesPropertyValue; + private static String sslDebugPropertyValue; private static final String excludeCiphers = "TLS_ECDHE_RSA_WITH_RC4_128_SHA," + "SSL_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA, \n" + "SSL_RSA_WITH_DES_CBC_SHA," @@ -83,6 +87,9 @@ @BeforeClass public static void setup() throws Exception { + turnOnSSLDebugLogging(); + storeHttpsCipherSuites(); + conf = new Configuration(); conf.setInt(HttpServer2.HTTP_MAX_THREADS_KEY, 10); @@ -127,6 +134,51 @@ public static void cleanup() throws Exception { FileUtil.fullyDelete(new File(BASEDIR)); KeyStoreTestUtil.cleanupSSLConfig(keystoresDir, sslConfDir); clientSslFactory.destroy(); + restoreHttpsCipherSuites(); + restoreSSLDebugLogging(); + } + + /** + * Stores the JVM property value of https.cipherSuites and sets its + * value to an empty string. + * This ensures that the value https.cipherSuites does + * not affect the result of tests. + */ + private static void storeHttpsCipherSuites() { + String cipherSuites = System.getProperty(HTTPS_CIPHER_SUITES_KEY); + if (cipherSuites != null) { + LOG.info( + "Found value for property {}: {}", HTTPS_CIPHER_SUITES_KEY, + cipherSuites); + cipherSuitesPropertyValue = cipherSuites; + } + System.clearProperty(HTTPS_CIPHER_SUITES_KEY); + } + + private static void restoreHttpsCipherSuites() { + if (cipherSuitesPropertyValue != null) { + LOG.info("Restoring property {} to value: {}", HTTPS_CIPHER_SUITES_KEY, + cipherSuitesPropertyValue); + System.setProperty(HTTPS_CIPHER_SUITES_KEY, cipherSuitesPropertyValue); + cipherSuitesPropertyValue = null; + } + } + + private static void turnOnSSLDebugLogging() { + String sslDebug = System.getProperty(JAVAX_NET_DEBUG_KEY); + if (sslDebug != null) { + sslDebugPropertyValue = sslDebug; + } + System.setProperty(JAVAX_NET_DEBUG_KEY, "all"); + } + + private static void restoreSSLDebugLogging() { + if (sslDebugPropertyValue != null) { + System.setProperty(JAVAX_NET_DEBUG_KEY, sslDebugPropertyValue); + sslDebugPropertyValue = null; + } else { + System.clearProperty(JAVAX_NET_DEBUG_KEY); + } } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/file/tfile/TestCompression.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/file/tfile/TestCompression.java index ff6c72a8bbd..b1bf0774974 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/file/tfile/TestCompression.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/file/tfile/TestCompression.java @@ -17,14 +17,28 @@ */ package org.apache.hadoop.io.file.tfile; -import org.junit.Test; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.test.LambdaTestUtils; +import org.junit.*; import java.io.IOException; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; public class TestCompression { + @BeforeClass + public static void resetConfigBeforeAll() { + Compression.Algorithm.LZO.conf.setBoolean("test.reload.lzo.codec", true); + } + + @AfterClass + public static void resetConfigAfterAll() { + Compression.Algorithm.LZO.conf.setBoolean("test.reload.lzo.codec", false); + } + /** * Regression test for HADOOP-11418. * Verify we can set a LZO codec different from default LZO codec. @@ -38,4 +52,22 @@ public void testConfigureLZOCodec() throws IOException { assertEquals(defaultCodec, Compression.Algorithm.LZO.getCodec().getClass().getName()); } + + + @Test + public void testMisconfiguredLZOCodec() throws Exception { + // Dummy codec + String defaultCodec = "org.apache.hadoop.io.compress.InvalidLzoCodec"; + Compression.Algorithm.conf.set( + Compression.Algorithm.CONF_LZO_CLASS, defaultCodec); + IOException ioEx = LambdaTestUtils.intercept( + IOException.class, + defaultCodec, + () -> Compression.Algorithm.LZO.getCodec()); + + if (!(ioEx.getCause() instanceof ClassNotFoundException)) { + throw ioEx; + } + } + } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java index 84b82e21336..19314c1d501 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Matchers.anyInt; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; @@ -1398,6 +1399,50 @@ public void testClientGetTimeout() throws IOException { assertEquals(Client.getTimeout(config), -1); } + @Test(timeout=60000) + public void testSetupConnectionShouldNotBlockShutdown() throws Exception { + // Start server + SocketFactory mockFactory = Mockito.mock(SocketFactory.class); + Server server = new TestServer(1, true); + final InetSocketAddress addr = NetUtils.getConnectAddress(server); + + // Track how many times we retried to set up the connection + final AtomicInteger createSocketCalled = new AtomicInteger(); + + doAnswer(new Answer() { + @Override + public Object answer(InvocationOnMock invocationOnMock) throws Throwable { + createSocketCalled.addAndGet(1); + Thread.sleep(MIN_SLEEP_TIME * 5); + throw new ConnectTimeoutException("fake"); + } + }).when(mockFactory).createSocket(); + final Client client = new Client(LongWritable.class, conf, mockFactory); + + final AtomicBoolean callStarted = new AtomicBoolean(false); + + // Call a random function asynchronously so that we can call stop() + new Thread(new Runnable() { + public void run() { + try { + callStarted.set(true); + call(client, RANDOM.nextLong(), addr, conf); + } catch (IOException ignored) {} + } + }).start(); + + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return callStarted.get() && createSocketCalled.get() == 1; + } + }, 50, 60000); + + // stop() should stop the client immediately without any more retries + client.stop(); + assertEquals(1, createSocketCalled.get()); + } + private void assertRetriesOnSocketTimeouts(Configuration conf, int maxTimeoutRetries) throws IOException { SocketFactory mockFactory = Mockito.mock(SocketFactory.class); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestGroupsCaching.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestGroupsCaching.java index 46e36b31722..bba81522a41 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestGroupsCaching.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestGroupsCaching.java @@ -561,23 +561,28 @@ public void testExceptionOnBackgroundRefreshHandled() throws Exception { // Then expire that entry timer.advance(4 * 1000); + // Pause the getGroups operation and this will delay the cache refresh + FakeGroupMapping.pause(); // Now get the cache entry - it should return immediately // with the old value and the cache will not have completed // a request to getGroups yet. assertEquals(groups.getGroups("me").size(), 2); assertEquals(startingRequestCount, FakeGroupMapping.getRequestCount()); + // Resume the getGroups operation and the cache can get refreshed + FakeGroupMapping.resume(); - // Now sleep for a short time and re-check the request count. It should have - // increased, but the exception means the cache will not have updated - Thread.sleep(50); + // Now wait for the refresh done, because of the exception, we expect + // a onFailure callback gets called and the counter for failure is 1 + waitForGroupCounters(groups, 0, 0, 0, 1); FakeGroupMapping.setThrowException(false); assertEquals(startingRequestCount + 1, FakeGroupMapping.getRequestCount()); assertEquals(groups.getGroups("me").size(), 2); - // Now sleep another short time - the 3rd call to getGroups above - // will have kicked off another refresh that updates the cache - Thread.sleep(50); + // Now the 3rd call to getGroups above will have kicked off + // another refresh that updates the cache, since it no longer gives + // exception, we now expect the counter for success is 1. + waitForGroupCounters(groups, 0, 0, 1, 1); assertEquals(startingRequestCount + 2, FakeGroupMapping.getRequestCount()); assertEquals(groups.getGroups("me").size(), 3); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java index 94779901fdb..011e930e50c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java @@ -47,6 +47,7 @@ import javax.security.auth.Subject; import javax.security.auth.kerberos.KerberosPrincipal; +import javax.security.auth.kerberos.KerberosTicket; import javax.security.auth.kerberos.KeyTab; import javax.security.auth.login.AppConfigurationEntry; import javax.security.auth.login.LoginContext; @@ -61,6 +62,7 @@ import java.util.Collection; import java.util.ConcurrentModificationException; import java.util.Date; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.Set; import java.util.concurrent.Callable; @@ -88,7 +90,10 @@ import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; public class TestUserGroupInformation { @@ -1211,4 +1216,37 @@ public UserGroupInformation run() throws Exception { barrier.await(); assertSame(testUgi1.getSubject(), blockingLookup.get().getSubject()); } + + @Test + public void testKerberosTicketIsDestroyedChecked() throws Exception { + // Create UserGroupInformation + GenericTestUtils.setLogLevel(UserGroupInformation.LOG, Level.DEBUG); + Set users = new HashSet<>(); + users.add(new User("Foo")); + Subject subject = + new Subject(true, users, new HashSet<>(), new HashSet<>()); + UserGroupInformation ugi = spy(new UserGroupInformation(subject)); + + // throw IOException in the middle of the autoRenewalForUserCreds + doThrow(new IOException()).when(ugi).reloginFromTicketCache(); + + // Create and destroy the KerberosTicket, so endTime will be null + Date d = new Date(); + KerberosPrincipal kp = new KerberosPrincipal("Foo"); + KerberosTicket tgt = spy(new KerberosTicket(new byte[]{}, kp, kp, new + byte[]{}, 0, null, d, d, d, d, null)); + tgt.destroy(); + + // run AutoRenewalForUserCredsRunnable with this + UserGroupInformation.AutoRenewalForUserCredsRunnable userCredsRunnable = + ugi.new AutoRenewalForUserCredsRunnable(tgt, + Boolean.toString(Boolean.TRUE), 100); + + // Set the runnable to not to run in a loop + userCredsRunnable.setRunRenewalLoop(false); + // there should be no exception when calling this + userCredsRunnable.run(); + // isDestroyed should be called at least once + Mockito.verify(tgt, atLeastOnce()).isDestroyed(); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestDefaultImpersonationProvider.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestDefaultImpersonationProvider.java new file mode 100644 index 00000000000..ef86697ab14 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestDefaultImpersonationProvider.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.authorize; + +import static org.mockito.Mockito.when; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.test.LambdaTestUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; +import org.mockito.Mockito; + +/** + * Test class for @DefaultImpersonationProvider + */ +public class TestDefaultImpersonationProvider { + + private String proxyUser; + private String user; + private DefaultImpersonationProvider provider; + private UserGroupInformation userGroupInformation = Mockito + .mock(UserGroupInformation.class); + private UserGroupInformation realUserUGI = Mockito + .mock(UserGroupInformation.class); + private Configuration conf; + @Rule + public Timeout globalTimeout = new Timeout(10000); + + @Before + public void setup() { + conf = new Configuration(); + provider = new DefaultImpersonationProvider(); + + // Setup 3 proxy users + conf.set("hadoop.proxyuser.fakeuser.groups", "*"); + conf.set("hadoop.proxyuser.fakeuser.hosts", "*"); + conf.set("hadoop.proxyuser.test.user.groups", "*"); + conf.set("hadoop.proxyuser.test.user.hosts", "*"); + conf.set("hadoop.proxyuser.test user2.groups", "*"); + conf.set("hadoop.proxyuser.test user2.hosts", "*"); + provider.setConf(conf); + provider.init(ProxyUsers.CONF_HADOOP_PROXYUSER); + } + + @Test + public void testAuthorizationSuccess() throws AuthorizationException { + proxyUser = "fakeuser"; + user = "dummyUser"; + when(realUserUGI.getShortUserName()).thenReturn(proxyUser); + when(userGroupInformation.getRealUser()).thenReturn(realUserUGI); + provider.authorize(userGroupInformation, "2.2.2.2"); + + user = "somerandomuser"; + proxyUser = "test.user"; + when(realUserUGI.getShortUserName()).thenReturn(proxyUser); + when(userGroupInformation.getRealUser()).thenReturn(realUserUGI); + provider.authorize(userGroupInformation, "2.2.2.2"); + } + + @Test + public void testAuthorizationFailure() throws Exception { + user = "dummyUser"; + proxyUser = "test user2"; + when(realUserUGI.getShortUserName()).thenReturn(proxyUser); + when(realUserUGI.getUserName()).thenReturn(proxyUser); + when(userGroupInformation.getUserName()).thenReturn(user); + when(userGroupInformation.getRealUser()).thenReturn(realUserUGI); + LambdaTestUtils.intercept(AuthorizationException.class, "User: " + + proxyUser + " is not allowed to impersonate " + user, () -> + provider.authorize(userGroupInformation, "2.2.2.2")); + } + + @After + public void clear() { + provider = null; + conf = null; + userGroupInformation = null; + realUserUGI = null; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/TestToken.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/TestToken.java index f6e513389b5..3a3567ce2a4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/TestToken.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/TestToken.java @@ -21,6 +21,7 @@ import java.io.*; import java.util.Arrays; +import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.io.*; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; import org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier; @@ -100,6 +101,23 @@ public void testEncodeWritable() throws Exception { } } + /* + * Test decodeWritable() with null newValue string argument, + * should throw HadoopIllegalArgumentException. + */ + @Test + public void testDecodeWritableArgSanityCheck() throws Exception { + Token token = + new Token(); + try { + token.decodeFromUrlString(null); + fail("Should have thrown HadoopIllegalArgumentException"); + } + catch (HadoopIllegalArgumentException e) { + Token.LOG.info("Test decodeWritable() sanity check success."); + } + } + @Test public void testDecodeIdentifier() throws IOException { TestDelegationTokenSecretManager secretManager = diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 3e9da1b45fd..d68f4e210d1 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -501,7 +501,7 @@ public void clearOutput() { * method is called, then waits on another before continuing. */ public static class DelayAnswer implements Answer { - private final Log LOG; + private final org.slf4j.Logger LOG; private final CountDownLatch fireLatch = new CountDownLatch(1); private final CountDownLatch waitLatch = new CountDownLatch(1); @@ -514,7 +514,7 @@ public void clearOutput() { private volatile Throwable thrown; private volatile Object returnValue; - public DelayAnswer(Log log) { + public DelayAnswer(org.slf4j.Logger log) { this.LOG = log; } @@ -611,13 +611,13 @@ public int getResultCount() { */ public static class DelegateAnswer implements Answer { private final Object delegate; - private final Log log; + private final org.slf4j.Logger log; public DelegateAnswer(Object delegate) { this(null, delegate); } - public DelegateAnswer(Log log, Object delegate) { + public DelegateAnswer(org.slf4j.Logger log, Object delegate) { this.log = log; this.delegate = delegate; } @@ -661,7 +661,7 @@ public SleepAnswer(int minSleepTime, int maxSleepTime) { public Object answer(InvocationOnMock invocation) throws Throwable { boolean interrupted = false; try { - Thread.sleep(r.nextInt(maxSleepTime) + minSleepTime); + Thread.sleep(r.nextInt(maxSleepTime - minSleepTime) + minSleepTime); } catch (InterruptedException ie) { interrupted = true; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShutdownHookManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShutdownHookManager.java index d53982363d3..03fa903170f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShutdownHookManager.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShutdownHookManager.java @@ -17,97 +17,285 @@ */ package org.apache.hadoop.util; -import org.apache.commons.lang3.exception.ExceptionUtils; -import org.slf4j.LoggerFactory; -import org.junit.Assert; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import org.junit.After; import org.junit.Test; import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.util.concurrent.TimeUnit; +import org.apache.hadoop.conf.Configuration; import static java.lang.Thread.sleep; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.SERVICE_SHUTDOWN_TIMEOUT; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.SERVICE_SHUTDOWN_TIMEOUT_DEFAULT; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; public class TestShutdownHookManager { + static final Logger LOG = LoggerFactory.getLogger(TestShutdownHookManager.class.getName()); + /** + * remove all the shutdown hooks so that they never get invoked later + * on in this test process. + */ + @After + public void clearShutdownHooks() { + ShutdownHookManager.get().clearShutdownHooks(); + } + + /** + * Verify hook registration, then execute the hook callback stage + * of shutdown to verify invocation, execution order and timeout + * processing. + */ @Test public void shutdownHookManager() { ShutdownHookManager mgr = ShutdownHookManager.get(); - Assert.assertNotNull(mgr); - Assert.assertEquals(0, mgr.getShutdownHooksInOrder().size()); - Runnable hook1 = new Runnable() { - @Override - public void run() { - LOG.info("Shutdown hook1 complete."); - } - }; - Runnable hook2 = new Runnable() { - @Override - public void run() { - LOG.info("Shutdown hook2 complete."); - } - }; - - Runnable hook3 = new Runnable() { - @Override - public void run() { - try { - sleep(3000); - LOG.info("Shutdown hook3 complete."); - } catch (InterruptedException ex) { - LOG.info("Shutdown hook3 interrupted exception:", - ExceptionUtils.getStackTrace(ex)); - Assert.fail("Hook 3 should not timeout."); - } - } - }; - - Runnable hook4 = new Runnable() { - @Override - public void run() { - try { - sleep(3500); - LOG.info("Shutdown hook4 complete."); - Assert.fail("Hook 4 should timeout"); - } catch (InterruptedException ex) { - LOG.info("Shutdown hook4 interrupted exception:", - ExceptionUtils.getStackTrace(ex)); - } - } - }; + assertNotNull("No ShutdownHookManager", mgr); + assertEquals(0, mgr.getShutdownHooksInOrder().size()); + Hook hook1 = new Hook("hook1", 0, false); + Hook hook2 = new Hook("hook2", 0, false); + Hook hook3 = new Hook("hook3", 1000, false); + Hook hook4 = new Hook("hook4", 25000, true); + Hook hook5 = new Hook("hook5", + (SERVICE_SHUTDOWN_TIMEOUT_DEFAULT + 1) * 1000, true); mgr.addShutdownHook(hook1, 0); - Assert.assertTrue(mgr.hasShutdownHook(hook1)); - Assert.assertEquals(1, mgr.getShutdownHooksInOrder().size()); - Assert.assertEquals(hook1, mgr.getShutdownHooksInOrder().get(0).getHook()); - mgr.removeShutdownHook(hook1); - Assert.assertFalse(mgr.hasShutdownHook(hook1)); + assertTrue(mgr.hasShutdownHook(hook1)); + assertEquals(1, mgr.getShutdownHooksInOrder().size()); + assertEquals(hook1, mgr.getShutdownHooksInOrder().get(0).getHook()); + assertTrue(mgr.removeShutdownHook(hook1)); + assertFalse(mgr.hasShutdownHook(hook1)); + assertFalse(mgr.removeShutdownHook(hook1)); mgr.addShutdownHook(hook1, 0); - Assert.assertTrue(mgr.hasShutdownHook(hook1)); - Assert.assertEquals(1, mgr.getShutdownHooksInOrder().size()); - Assert.assertTrue(mgr.hasShutdownHook(hook1)); - Assert.assertEquals(1, mgr.getShutdownHooksInOrder().size()); + assertTrue(mgr.hasShutdownHook(hook1)); + assertEquals(1, mgr.getShutdownHooksInOrder().size()); + assertEquals(SERVICE_SHUTDOWN_TIMEOUT_DEFAULT, + mgr.getShutdownHooksInOrder().get(0).getTimeout()); mgr.addShutdownHook(hook2, 1); - Assert.assertTrue(mgr.hasShutdownHook(hook1)); - Assert.assertTrue(mgr.hasShutdownHook(hook2)); - Assert.assertEquals(2, mgr.getShutdownHooksInOrder().size()); - Assert.assertEquals(hook2, mgr.getShutdownHooksInOrder().get(0).getHook()); - Assert.assertEquals(hook1, mgr.getShutdownHooksInOrder().get(1).getHook()); + assertTrue(mgr.hasShutdownHook(hook1)); + assertTrue(mgr.hasShutdownHook(hook2)); + assertEquals(2, mgr.getShutdownHooksInOrder().size()); + assertEquals(hook2, mgr.getShutdownHooksInOrder().get(0).getHook()); + assertEquals(hook1, mgr.getShutdownHooksInOrder().get(1).getHook()); // Test hook finish without timeout mgr.addShutdownHook(hook3, 2, 4, TimeUnit.SECONDS); - Assert.assertTrue(mgr.hasShutdownHook(hook3)); - Assert.assertEquals(hook3, mgr.getShutdownHooksInOrder().get(0).getHook()); - Assert.assertEquals(4, mgr.getShutdownHooksInOrder().get(0).getTimeout()); - - // Test hook finish with timeout - mgr.addShutdownHook(hook4, 3, 2, TimeUnit.SECONDS); - Assert.assertTrue(mgr.hasShutdownHook(hook4)); - Assert.assertEquals(hook4, mgr.getShutdownHooksInOrder().get(0).getHook()); - Assert.assertEquals(2, mgr.getShutdownHooksInOrder().get(0).getTimeout()); - LOG.info("Shutdown starts here"); + assertTrue(mgr.hasShutdownHook(hook3)); + assertEquals(hook3, mgr.getShutdownHooksInOrder().get(0).getHook()); + assertEquals(4, mgr.getShutdownHooksInOrder().get(0).getTimeout()); + + // Test hook finish with timeout; highest priority + int hook4timeout = 2; + mgr.addShutdownHook(hook4, 3, hook4timeout, TimeUnit.SECONDS); + assertTrue(mgr.hasShutdownHook(hook4)); + assertEquals(hook4, mgr.getShutdownHooksInOrder().get(0).getHook()); + assertEquals(2, mgr.getShutdownHooksInOrder().get(0).getTimeout()); + + // a default timeout hook and verify it gets the default timeout + mgr.addShutdownHook(hook5, 5); + ShutdownHookManager.HookEntry hookEntry5 = mgr.getShutdownHooksInOrder() + .get(0); + assertEquals(hook5, hookEntry5.getHook()); + assertEquals("default timeout not used", + ShutdownHookManager.getShutdownTimeout(new Configuration()), + hookEntry5.getTimeout()); + assertEquals("hook priority", 5, hookEntry5.getPriority()); + // remove this to avoid a longer sleep in the test run + assertTrue("failed to remove " + hook5, + mgr.removeShutdownHook(hook5)); + + + // now execute the hook shutdown sequence + INVOCATION_COUNT.set(0); + LOG.info("invoking executeShutdown()"); + int timeouts = ShutdownHookManager.executeShutdown(); + LOG.info("Shutdown completed"); + assertEquals("Number of timed out hooks", 1, timeouts); + + List hooks + = mgr.getShutdownHooksInOrder(); + + // analyze the hooks + for (ShutdownHookManager.HookEntry entry : hooks) { + Hook hook = (Hook) entry.getHook(); + assertTrue("Was not invoked " + hook, hook.invoked); + // did any hook raise an exception? + hook.maybeThrowAssertion(); + } + + // check the state of some of the invoked hooks + // hook4 was invoked first, but it timed out. + assertEquals("Expected to be invoked first " + hook4, + 1, hook4.invokedOrder); + assertFalse("Expected to time out " + hook4, hook4.completed); + + + // hook1 completed, but in order after the others, so its start time + // is the longest. + assertTrue("Expected to complete " + hook1, hook1.completed); + long invocationInterval = hook1.startTime - hook4.startTime; + assertTrue("invocation difference too short " + invocationInterval, + invocationInterval >= hook4timeout * 1000); + assertTrue("sleeping hook4 blocked other threads for " + invocationInterval, + invocationInterval < hook4.sleepTime); + + // finally, clear the hooks + mgr.clearShutdownHooks(); + // and verify that the hooks are empty + assertFalse(mgr.hasShutdownHook(hook1)); + assertEquals("shutdown hook list is not empty", + 0, + mgr.getShutdownHooksInOrder().size()); + } + + @Test + public void testShutdownTimeoutConfiguration() throws Throwable { + // set the shutdown timeout and verify it can be read back. + Configuration conf = new Configuration(); + long shutdownTimeout = 5; + conf.setTimeDuration(SERVICE_SHUTDOWN_TIMEOUT, + shutdownTimeout, TimeUnit.SECONDS); + assertEquals(SERVICE_SHUTDOWN_TIMEOUT, + shutdownTimeout, + ShutdownHookManager.getShutdownTimeout(conf)); + } + + /** + * Verify that low timeouts simply fall back to + * {@link ShutdownHookManager#TIMEOUT_MINIMUM}. + */ + @Test + public void testShutdownTimeoutBadConfiguration() throws Throwable { + // set the shutdown timeout and verify it can be read back. + Configuration conf = new Configuration(); + long shutdownTimeout = 50; + conf.setTimeDuration(SERVICE_SHUTDOWN_TIMEOUT, + shutdownTimeout, TimeUnit.NANOSECONDS); + assertEquals(SERVICE_SHUTDOWN_TIMEOUT, + ShutdownHookManager.TIMEOUT_MINIMUM, + ShutdownHookManager.getShutdownTimeout(conf)); + } + + /** + * Verifies that a hook cannot be re-registered: an attempt to do so + * will simply be ignored. + */ + @Test + public void testDuplicateRegistration() throws Throwable { + ShutdownHookManager mgr = ShutdownHookManager.get(); + Hook hook = new Hook("hook1", 0, false); + + // add the hook + mgr.addShutdownHook(hook, 2, 1, TimeUnit.SECONDS); + + // add it at a higher priority. This will be ignored. + mgr.addShutdownHook(hook, 5); + List hookList + = mgr.getShutdownHooksInOrder(); + assertEquals("Hook added twice", 1, hookList.size()); + ShutdownHookManager.HookEntry entry = hookList.get(0); + assertEquals("priority of hook", 2, entry.getPriority()); + assertEquals("timeout of hook", 1, entry.getTimeout()); + + // remove the hook + assertTrue("failed to remove hook " + hook, mgr.removeShutdownHook(hook)); + // which will fail a second time + assertFalse("expected hook removal to fail", mgr.removeShutdownHook(hook)); + + // now register it + mgr.addShutdownHook(hook, 5); + hookList = mgr.getShutdownHooksInOrder(); + entry = hookList.get(0); + assertEquals("priority of hook", 5, entry.getPriority()); + assertNotEquals("timeout of hook", 1, entry.getTimeout()); + + } + + private static final AtomicInteger INVOCATION_COUNT = new AtomicInteger(); + + /** + * Hooks for testing; save state for ease of asserting on + * invocation. + */ + private class Hook implements Runnable { + + private final String name; + private final long sleepTime; + private final boolean expectFailure; + private AssertionError assertion; + private boolean invoked; + private int invokedOrder; + private boolean completed; + private boolean interrupted; + private long startTime; + + Hook(final String name, + final long sleepTime, + final boolean expectFailure) { + this.name = name; + this.sleepTime = sleepTime; + this.expectFailure = expectFailure; + } + + @Override + public void run() { + try { + invoked = true; + invokedOrder = INVOCATION_COUNT.incrementAndGet(); + startTime = System.currentTimeMillis(); + LOG.info("Starting shutdown of {} with sleep time of {}", + name, sleepTime); + if (sleepTime > 0) { + sleep(sleepTime); + } + LOG.info("Completed shutdown of {}", name); + completed = true; + if (expectFailure) { + assertion = new AssertionError("Expected a failure of " + name); + } + } catch (InterruptedException ex) { + LOG.info("Shutdown {} interrupted exception", name, ex); + interrupted = true; + if (!expectFailure) { + assertion = new AssertionError("Timeout of " + name, ex); + } + } + maybeThrowAssertion(); + } + + /** + * Raise any exception generated during the shutdown process. + * @throws AssertionError any assertion from the shutdown. + */ + void maybeThrowAssertion() throws AssertionError { + if (assertion != null) { + throw assertion; + } + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Hook{"); + sb.append("name='").append(name).append('\''); + sb.append(", sleepTime=").append(sleepTime); + sb.append(", expectFailure=").append(expectFailure); + sb.append(", invoked=").append(invoked); + sb.append(", invokedOrder=").append(invokedOrder); + sb.append(", completed=").append(completed); + sb.append(", interrupted=").append(interrupted); + sb.append('}'); + return sb.toString(); + } } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestStringUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestStringUtils.java index 3fdc1bb8f8b..f05b5895676 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestStringUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestStringUtils.java @@ -503,6 +503,15 @@ public void testEscapeHTML() { escapedStr, StringUtils.escapeHTML(htmlStr)); } + @Test + public void testCreateStartupShutdownMessage() { + //pass null args and method must still return a string beginning with + // "STARTUP_MSG" + String msg = StringUtils.createStartupShutdownMessage( + this.getClass().getName(), "test.host", null); + assertTrue(msg.startsWith("STARTUP_MSG:")); + } + // Benchmark for StringUtils split public static void main(String []args) { final String TO_SPLIT = "foo,bar,baz,blah,blah"; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java index 486e89a6265..a2156ee6d93 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.util.Arrays; @@ -30,6 +31,7 @@ import org.apache.hadoop.util.ZKUtil; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Stat; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -97,6 +99,27 @@ public void testChildren() throws Exception { assertEquals(2, children.size()); } + @Test + public void testGetStringData() throws Exception { + String node1 = "/node1"; + String node2 = "/node2"; + assertFalse(curator.exists(node1)); + curator.create(node1); + assertNull(curator.getStringData(node1)); + + byte[] setData = "setData".getBytes("UTF-8"); + curator.setData(node1, setData, -1); + assertEquals("setData", curator.getStringData(node1)); + + Stat stat = new Stat(); + assertFalse(curator.exists(node2)); + curator.create(node2); + assertNull(curator.getStringData(node2, stat)); + + curator.setData(node2, setData, -1); + assertEquals("setData", curator.getStringData(node2, stat)); + + } @Test public void testTransaction() throws Exception { List zkAcl = ZKUtil.parseACLs(CommonConfigurationKeys.ZK_ACL_DEFAULT); diff --git a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml index 6a3d53ad2de..1798563e224 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml @@ -839,6 +839,57 @@ + + help: help for touch + + -help touch + + + + + + RegexpComparator + ^-touch \[-a\] \[-m\] \[-t TIMESTAMP \] \[-c\] <path> \.\.\. :( )* + + + RegexpComparator + ^\s*Updates the access and modification times of the file specified by the <path> to( )* + + + RegexpComparator + ^\s*the current time. If the file does not exist, then a zero length file is created( )* + + + RegexpComparator + ^\s*at <path> with current time as the timestamp of that <path>.( )* + + + RegexpComparator + ^\s*-a\s+Change only the access time( )* + + + RegexpComparator + ^\s*-a\s+Change only the access time( )* + + + RegexpComparator + ^\s*-m\s+Change only the modification time( )* + + + RegexpComparator + ^\s*-t\s+TIMESTAMP\s+Use specified timestamp \(in format yyyyMMddHHmmss\) instead of + + + RegexpComparator + ^\s*current time( )* + + + RegexpComparator + ^\s*-c\s+Do not create any files( )* + + + + help: help for touchz diff --git a/hadoop-common-project/hadoop-kms/src/main/conf/kms-log4j.properties b/hadoop-common-project/hadoop-kms/src/main/conf/kms-log4j.properties index 04a3cf3566d..e2afd41be08 100644 --- a/hadoop-common-project/hadoop-kms/src/main/conf/kms-log4j.properties +++ b/hadoop-common-project/hadoop-kms/src/main/conf/kms-log4j.properties @@ -37,4 +37,6 @@ log4j.logger.org.apache.hadoop=INFO log4j.logger.com.sun.jersey.server.wadl.generators.WadlGeneratorJAXBGrammarGenerator=OFF # make zookeeper log level an explicit config, and not changing with rootLogger. log4j.logger.org.apache.zookeeper=INFO -log4j.logger.org.apache.curator=INFO \ No newline at end of file +log4j.logger.org.apache.curator=INFO +# make jetty log level an explicit config, and not changing with rootLogger. +log4j.logger.org.eclipse.jetty=INFO \ No newline at end of file diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java index 18eec19a72d..35ffb429816 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java @@ -20,6 +20,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.log4j.PropertyConfigurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -103,6 +104,8 @@ public static final boolean KEY_AUTHORIZATION_ENABLE_DEFAULT = true; + private static final String LOG4J_PROPERTIES = "kms-log4j.properties"; + static { Configuration.addDefaultResource(KMS_DEFAULT_XML); Configuration.addDefaultResource(KMS_SITE_XML); @@ -159,4 +162,32 @@ public static boolean isACLsFileNewer(long time) { } return newer; } + + public static void initLogging() { + String confDir = System.getProperty(KMS_CONFIG_DIR); + if (confDir == null) { + throw new RuntimeException("System property '" + + KMSConfiguration.KMS_CONFIG_DIR + "' not defined"); + } + if (System.getProperty("log4j.configuration") == null) { + System.setProperty("log4j.defaultInitOverride", "true"); + boolean fromClasspath = true; + File log4jConf = new File(confDir, LOG4J_PROPERTIES).getAbsoluteFile(); + if (log4jConf.exists()) { + PropertyConfigurator.configureAndWatch(log4jConf.getPath(), 1000); + fromClasspath = false; + } else { + ClassLoader cl = Thread.currentThread().getContextClassLoader(); + URL log4jUrl = cl.getResource(LOG4J_PROPERTIES); + if (log4jUrl != null) { + PropertyConfigurator.configure(log4jUrl); + } + } + LOG.debug("KMS log starting"); + if (fromClasspath) { + LOG.warn("Log4j configuration file '{}' not found", LOG4J_PROPERTIES); + LOG.warn("Logging with INFO level to standard output"); + } + } + } } diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java index cb4bf7e15af..0640e25b76c 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java @@ -17,10 +17,8 @@ */ package org.apache.hadoop.crypto.key.kms.server; -import java.io.File; import java.io.IOException; import java.net.URI; -import java.net.URL; import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener; @@ -37,14 +35,13 @@ import org.apache.hadoop.crypto.key.KeyProviderFactory; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.VersionInfo; -import org.apache.log4j.PropertyConfigurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @InterfaceAudience.Private public class KMSWebApp implements ServletContextListener { - private static final String LOG4J_PROPERTIES = "kms-log4j.properties"; + private static final Logger LOG = LoggerFactory.getLogger(KMSWebApp.class); private static final String METRICS_PREFIX = "hadoop.kms."; private static final String ADMIN_CALLS_METER = METRICS_PREFIX + @@ -66,7 +63,6 @@ private static final String REENCRYPT_EEK_BATCH_METER = METRICS_PREFIX + "reencrypt_eek_batch.calls.meter"; - private static Logger LOG; private static MetricRegistry metricRegistry; private JmxReporter jmxReporter; @@ -84,42 +80,10 @@ private static KMSAudit kmsAudit; private static KeyProviderCryptoExtension keyProviderCryptoExtension; - private void initLogging(String confDir) { - if (System.getProperty("log4j.configuration") == null) { - System.setProperty("log4j.defaultInitOverride", "true"); - boolean fromClasspath = true; - File log4jConf = new File(confDir, LOG4J_PROPERTIES).getAbsoluteFile(); - if (log4jConf.exists()) { - PropertyConfigurator.configureAndWatch(log4jConf.getPath(), 1000); - fromClasspath = false; - } else { - ClassLoader cl = Thread.currentThread().getContextClassLoader(); - URL log4jUrl = cl.getResource(LOG4J_PROPERTIES); - if (log4jUrl != null) { - PropertyConfigurator.configure(log4jUrl); - } - } - LOG = LoggerFactory.getLogger(KMSWebApp.class); - LOG.debug("KMS log starting"); - if (fromClasspath) { - LOG.warn("Log4j configuration file '{}' not found", LOG4J_PROPERTIES); - LOG.warn("Logging with INFO level to standard output"); - } - } else { - LOG = LoggerFactory.getLogger(KMSWebApp.class); - } - } - @Override public void contextInitialized(ServletContextEvent sce) { try { - String confDir = System.getProperty(KMSConfiguration.KMS_CONFIG_DIR); - if (confDir == null) { - throw new RuntimeException("System property '" + - KMSConfiguration.KMS_CONFIG_DIR + "' not defined"); - } kmsConf = KMSConfiguration.getKMSConf(); - initLogging(confDir); UserGroupInformation.setConfiguration(kmsConf); LOG.info("-------------------------------------------------------------"); LOG.info(" Java runtime version : {}", System.getProperty( diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java index 19f72271271..036231de70d 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebServer.java @@ -166,6 +166,7 @@ public URL getKMSUrl() { } public static void main(String[] args) throws Exception { + KMSConfiguration.initLogging(); StringUtils.startupShutdownMessage(KMSWebServer.class, args, LOG); Configuration conf = new ConfigurationWithLogging( KMSConfiguration.getKMSConf()); diff --git a/hadoop-common-project/hadoop-kms/src/main/resources/kms-default.xml b/hadoop-common-project/hadoop-kms/src/main/resources/kms-default.xml index 9f4171bf793..783f4e6c03b 100644 --- a/hadoop-common-project/hadoop-kms/src/main/resources/kms-default.xml +++ b/hadoop-common-project/hadoop-kms/src/main/resources/kms-default.xml @@ -93,6 +93,21 @@ + + hadoop.http.socket.backlog.size + 500 + + KMS Server accept queue size. + + + + + hadoop.http.idle_timeout.ms + 1000 + + KMS Server connection timeout in milliseconds. + + diff --git a/hadoop-common-project/hadoop-kms/src/test/resources/log4j.properties b/hadoop-common-project/hadoop-kms/src/test/resources/log4j.properties index e319af66569..b8e6353b393 100644 --- a/hadoop-common-project/hadoop-kms/src/test/resources/log4j.properties +++ b/hadoop-common-project/hadoop-kms/src/test/resources/log4j.properties @@ -31,4 +31,6 @@ log4j.logger.org.apache.directory.server.core=OFF log4j.logger.org.apache.hadoop.util.NativeCodeLoader=OFF # make zookeeper log level an explicit config, and not changing with rootLogger. log4j.logger.org.apache.zookeeper=INFO -log4j.logger.org.apache.curator=INFO \ No newline at end of file +log4j.logger.org.apache.curator=INFO +# make jetty log level an explicit config, and not changing with rootLogger. +log4j.logger.org.eclipse.jetty=INFO \ No newline at end of file diff --git a/hadoop-dist/pom.xml b/hadoop-dist/pom.xml index 5de6759ce9b..da05015e437 100644 --- a/hadoop-dist/pom.xml +++ b/hadoop-dist/pom.xml @@ -276,23 +276,6 @@ maven-resources-plugin - - copy-docker-compose - - copy-resources - - prepare-package - - ${project.build.directory}/compose - - - - src/main/compose - true - - - - copy-dockerfile diff --git a/hadoop-dist/src/main/compose/README.md b/hadoop-dist/src/main/compose/README.md new file mode 100644 index 00000000000..8189d2c169a --- /dev/null +++ b/hadoop-dist/src/main/compose/README.md @@ -0,0 +1,51 @@ + + +# Docker cluster definitions + +This directory contains multiple docker cluster definitions to start local pseudo cluster with different configuration. + +It helps to start local (multi-node like) pseudo cluster with docker and docker-compose and obviously it's not for production. + +You may find more information in the specific subdirectories but in generic you can use the following commands: + +## Usage + +To start a cluster go to a subdirectory and start the cluster: + +``` +docker-compose up -d +``` + +You can check the logs of all the components with: + +``` +docker-compose logs +``` + +In case of a problem you can destroy the cluster an delete all the local state with: + +``` +docker-compose down +``` + +(Note: a simple docker-compose stop may not delete all the local data). + +You can scale up and down the components: + +``` +docker-compose scale datanode=5 +``` + +Usually the key webui ports are published on the docker host. diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/.env b/hadoop-dist/src/main/compose/ozone-hdfs/.env similarity index 93% rename from hadoop-ozone/acceptance-test/src/test/acceptance/basic/.env rename to hadoop-dist/src/main/compose/ozone-hdfs/.env index 98234cb1122..c437513bbd8 100644 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/.env +++ b/hadoop-dist/src/main/compose/ozone-hdfs/.env @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -OZONEDIR=../../../../../../hadoop-dist/target/ozone +HADOOP_VERSION=3.1.0 \ No newline at end of file diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/docker-compose.yaml b/hadoop-dist/src/main/compose/ozone-hdfs/docker-compose.yaml similarity index 68% rename from hadoop-ozone/acceptance-test/src/test/acceptance/basic/docker-compose.yaml rename to hadoop-dist/src/main/compose/ozone-hdfs/docker-compose.yaml index 99f28310f15..b89052dd0cc 100644 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/docker-compose.yaml +++ b/hadoop-dist/src/main/compose/ozone-hdfs/docker-compose.yaml @@ -16,35 +16,45 @@ version: "3" services: + namenode: + image: flokkr/hadoop:${HADOOP_VERSION} + ports: + - 9870:9870 + env_file: + - ./docker-config + environment: + ENSURE_NAMENODE_DIR: "/tmp/hadoop-root/dfs/name" + command: ["hdfs", "namenode"] datanode: - image: apache/hadoop-runner - volumes: - - ${OZONEDIR}:/opt/hadoop + image: flokkr/hadoop:${HADOOP_VERSION} ports: - 9864 - command: ["/opt/hadoop/bin/ozone","datanode"] + volumes: + - ../..:/opt/ozone + command: ["hdfs","datanode"] + environment: + HADOOP_CLASSPATH: /opt/ozone/share/hadoop/ozoneplugin/*.jar env_file: - ./docker-config ozoneManager: image: apache/hadoop-runner - hostname: ozoneManager volumes: - - ${OZONEDIR}:/opt/hadoop + - ../..:/opt/hadoop ports: - - 9874 + - 9874:9874 environment: ENSURE_OM_INITIALIZED: /data/metadata/ozoneManager/current/VERSION env_file: - ./docker-config - command: ["/opt/hadoop/bin/ozone","om"] + command: ["ozone","om"] scm: image: apache/hadoop-runner volumes: - - ${OZONEDIR}:/opt/hadoop + - ../..:/opt/hadoop ports: - - 9876 + - 9876:9876 env_file: - ./docker-config environment: ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION - command: ["/opt/hadoop/bin/ozone","scm"] + command: ["ozone","scm"] diff --git a/hadoop-dist/src/main/compose/ozone-hdfs/docker-config b/hadoop-dist/src/main/compose/ozone-hdfs/docker-config new file mode 100644 index 00000000000..3b2819fbf77 --- /dev/null +++ b/hadoop-dist/src/main/compose/ozone-hdfs/docker-config @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +OZONE-SITE.XML_ozone.om.address=ozoneManager +OZONE-SITE.XML_ozone.om.http-address=ozoneManager:9874 +OZONE-SITE.XML_ozone.scm.names=scm +OZONE-SITE.XML_ozone.enabled=true +OZONE-SITE.XML_ozone.scm.datanode.id=/data/datanode.id +OZONE-SITE.XML_ozone.scm.block.client.address=scm +OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata +OZONE-SITE.XML_ozone.scm.client.address=scm +OZONE-SITE.XML_ozone.replication=1 +OZONE-SITE.XML_hdds.datanode.plugins=org.apache.hadoop.ozone.web.OzoneHddsDatanodeService + +HDFS-SITE.XML_dfs.datanode.plugins=org.apache.hadoop.ozone.HddsDatanodeService +HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:9000 +HDFS-SITE.XML_rpc.metrics.quantile.enable=true +HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 + +LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout +LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender +LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n + +#Enable this variable to print out all hadoop rpc traffic to the stdout. See http://byteman.jboss.org/ to define your own instrumentation. +#BYTEMAN_SCRIPT_URL=https://raw.githubusercontent.com/apache/hadoop/trunk/dev-support/byteman/hadooprpc.btm + +#LOG4J2.PROPERTIES_* are for Ozone Audit Logging +LOG4J2.PROPERTIES_monitorInterval=30 +LOG4J2.PROPERTIES_filter=read,write +LOG4J2.PROPERTIES_filter.read.type=MarkerFilter +LOG4J2.PROPERTIES_filter.read.marker=READ +LOG4J2.PROPERTIES_filter.read.onMatch=DENY +LOG4J2.PROPERTIES_filter.read.onMismatch=NEUTRAL +LOG4J2.PROPERTIES_filter.write.type=MarkerFilter +LOG4J2.PROPERTIES_filter.write.marker=WRITE +LOG4J2.PROPERTIES_filter.write.onMatch=NEUTRAL +LOG4J2.PROPERTIES_filter.write.onMismatch=NEUTRAL +LOG4J2.PROPERTIES_appenders=console, rolling +LOG4J2.PROPERTIES_appender.console.type=Console +LOG4J2.PROPERTIES_appender.console.name=STDOUT +LOG4J2.PROPERTIES_appender.console.layout.type=PatternLayout +LOG4J2.PROPERTIES_appender.console.layout.pattern=%d{DEFAULT} | %-5level | %c{1} | %msg | %throwable{3} %n +LOG4J2.PROPERTIES_appender.rolling.type=RollingFile +LOG4J2.PROPERTIES_appender.rolling.name=RollingFile +LOG4J2.PROPERTIES_appender.rolling.fileName =${sys:hadoop.log.dir}/om-audit-${hostName}.log +LOG4J2.PROPERTIES_appender.rolling.filePattern=${sys:hadoop.log.dir}/om-audit-${hostName}-%d{yyyy-MM-dd-HH-mm-ss}-%i.log.gz +LOG4J2.PROPERTIES_appender.rolling.layout.type=PatternLayout +LOG4J2.PROPERTIES_appender.rolling.layout.pattern=%d{DEFAULT} | %-5level | %c{1} | %msg | %throwable{3} %n +LOG4J2.PROPERTIES_appender.rolling.policies.type=Policies +LOG4J2.PROPERTIES_appender.rolling.policies.time.type=TimeBasedTriggeringPolicy +LOG4J2.PROPERTIES_appender.rolling.policies.time.interval=86400 +LOG4J2.PROPERTIES_appender.rolling.policies.size.type=SizeBasedTriggeringPolicy +LOG4J2.PROPERTIES_appender.rolling.policies.size.size=64MB +LOG4J2.PROPERTIES_loggers=audit +LOG4J2.PROPERTIES_logger.audit.type=AsyncLogger +LOG4J2.PROPERTIES_logger.audit.name=OMAudit +LOG4J2.PROPERTIES_logger.audit.level=INFO +LOG4J2.PROPERTIES_logger.audit.appenderRefs=rolling +LOG4J2.PROPERTIES_logger.audit.appenderRef.file.ref=RollingFile +LOG4J2.PROPERTIES_rootLogger.level=INFO +LOG4J2.PROPERTIES_rootLogger.appenderRefs=stdout +LOG4J2.PROPERTIES_rootLogger.appenderRef.stdout.ref=STDOUT diff --git a/hadoop-dist/src/main/compose/ozone/docker-compose.yaml b/hadoop-dist/src/main/compose/ozone/docker-compose.yaml index bb5e8dd5354..0a6a9d80280 100644 --- a/hadoop-dist/src/main/compose/ozone/docker-compose.yaml +++ b/hadoop-dist/src/main/compose/ozone/docker-compose.yaml @@ -19,7 +19,7 @@ services: datanode: image: apache/hadoop-runner volumes: - - ../../ozone:/opt/hadoop + - ../..:/opt/hadoop ports: - 9864 command: ["/opt/hadoop/bin/ozone","datanode"] @@ -28,7 +28,7 @@ services: ozoneManager: image: apache/hadoop-runner volumes: - - ../../ozone:/opt/hadoop + - ../..:/opt/hadoop ports: - 9874:9874 environment: @@ -39,7 +39,7 @@ services: scm: image: apache/hadoop-runner volumes: - - ../../ozone:/opt/hadoop + - ../..:/opt/hadoop ports: - 9876:9876 env_file: diff --git a/hadoop-dist/src/main/compose/ozone/docker-config b/hadoop-dist/src/main/compose/ozone/docker-config index 50abb18e1ab..f2c8db1f3bb 100644 --- a/hadoop-dist/src/main/compose/ozone/docker-config +++ b/hadoop-dist/src/main/compose/ozone/docker-config @@ -15,6 +15,7 @@ # limitations under the License. OZONE-SITE.XML_ozone.om.address=ozoneManager +OZONE-SITE.XML_ozone.om.http-address=ozoneManager:9874 OZONE-SITE.XML_ozone.scm.names=scm OZONE-SITE.XML_ozone.enabled=True OZONE-SITE.XML_ozone.scm.datanode.id=/data/datanode.id @@ -22,9 +23,53 @@ OZONE-SITE.XML_ozone.scm.block.client.address=scm OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata OZONE-SITE.XML_ozone.handler.type=distributed OZONE-SITE.XML_ozone.scm.client.address=scm +OZONE-SITE.XML_ozone.replication=1 HDFS-SITE.XML_rpc.metrics.quantile.enable=true HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n +LOG4J.PROPERTIES_log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR +LOG4J.PROPERTIES_log4j.logger.org.apache.ratis.conf.ConfUtils=WARN +LOG4J.PROPERTIES_log4j.logger.org.apache.hadoop.security.ShellBasedUnixGroupsMapping=ERROR + +#Enable this variable to print out all hadoop rpc traffic to the stdout. See http://byteman.jboss.org/ to define your own instrumentation. +#BYTEMAN_SCRIPT_URL=https://raw.githubusercontent.com/apache/hadoop/trunk/dev-support/byteman/hadooprpc.btm + +#LOG4J2.PROPERTIES_* are for Ozone Audit Logging +LOG4J2.PROPERTIES_monitorInterval=30 +LOG4J2.PROPERTIES_filter=read,write +LOG4J2.PROPERTIES_filter.read.type=MarkerFilter +LOG4J2.PROPERTIES_filter.read.marker=READ +LOG4J2.PROPERTIES_filter.read.onMatch=DENY +LOG4J2.PROPERTIES_filter.read.onMismatch=NEUTRAL +LOG4J2.PROPERTIES_filter.write.type=MarkerFilter +LOG4J2.PROPERTIES_filter.write.marker=WRITE +LOG4J2.PROPERTIES_filter.write.onMatch=NEUTRAL +LOG4J2.PROPERTIES_filter.write.onMismatch=NEUTRAL +LOG4J2.PROPERTIES_appenders=console, rolling +LOG4J2.PROPERTIES_appender.console.type=Console +LOG4J2.PROPERTIES_appender.console.name=STDOUT +LOG4J2.PROPERTIES_appender.console.layout.type=PatternLayout +LOG4J2.PROPERTIES_appender.console.layout.pattern=%d{DEFAULT} | %-5level | %c{1} | %msg | %throwable{3} %n +LOG4J2.PROPERTIES_appender.rolling.type=RollingFile +LOG4J2.PROPERTIES_appender.rolling.name=RollingFile +LOG4J2.PROPERTIES_appender.rolling.fileName =${sys:hadoop.log.dir}/om-audit-${hostName}.log +LOG4J2.PROPERTIES_appender.rolling.filePattern=${sys:hadoop.log.dir}/om-audit-${hostName}-%d{yyyy-MM-dd-HH-mm-ss}-%i.log.gz +LOG4J2.PROPERTIES_appender.rolling.layout.type=PatternLayout +LOG4J2.PROPERTIES_appender.rolling.layout.pattern=%d{DEFAULT} | %-5level | %c{1} | %msg | %throwable{3} %n +LOG4J2.PROPERTIES_appender.rolling.policies.type=Policies +LOG4J2.PROPERTIES_appender.rolling.policies.time.type=TimeBasedTriggeringPolicy +LOG4J2.PROPERTIES_appender.rolling.policies.time.interval=86400 +LOG4J2.PROPERTIES_appender.rolling.policies.size.type=SizeBasedTriggeringPolicy +LOG4J2.PROPERTIES_appender.rolling.policies.size.size=64MB +LOG4J2.PROPERTIES_loggers=audit +LOG4J2.PROPERTIES_logger.audit.type=AsyncLogger +LOG4J2.PROPERTIES_logger.audit.name=OMAudit +LOG4J2.PROPERTIES_logger.audit.level=INFO +LOG4J2.PROPERTIES_logger.audit.appenderRefs=rolling +LOG4J2.PROPERTIES_logger.audit.appenderRef.file.ref=RollingFile +LOG4J2.PROPERTIES_rootLogger.level=INFO +LOG4J2.PROPERTIES_rootLogger.appenderRefs=stdout +LOG4J2.PROPERTIES_rootLogger.appenderRef.stdout.ref=STDOUT diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/docker-compose.yaml b/hadoop-dist/src/main/compose/ozonefs/docker-compose.yaml similarity index 88% rename from hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/docker-compose.yaml rename to hadoop-dist/src/main/compose/ozonefs/docker-compose.yaml index 6b7b7bd946d..a1e874849ee 100644 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/docker-compose.yaml +++ b/hadoop-dist/src/main/compose/ozonefs/docker-compose.yaml @@ -19,7 +19,7 @@ services: datanode: image: apache/hadoop-runner volumes: - - ${OZONEDIR}:/opt/hadoop + - ../..:/opt/hadoop ports: - 9864 command: ["/opt/hadoop/bin/ozone","datanode"] @@ -29,7 +29,7 @@ services: image: apache/hadoop-runner hostname: ozoneManager volumes: - - ${OZONEDIR}:/opt/hadoop + - ../..:/opt/hadoop ports: - 9874 environment: @@ -40,7 +40,7 @@ services: scm: image: apache/hadoop-runner volumes: - - ${OZONEDIR}:/opt/hadoop + - ../..:/opt/hadoop ports: - 9876 env_file: @@ -51,9 +51,9 @@ services: hadooplast: image: flokkr/hadoop:3.1.0 volumes: - - ${OZONEDIR}:/opt/ozone + - ../..:/opt/ozone env_file: - ./docker-config environment: - HADOOP_CLASSPATH: /opt/ozone/share/hadoop/ozonefs/hadoop-ozone-filesystem.jar + HADOOP_CLASSPATH: /opt/ozone/share/hadoop/ozonefs/*.jar command: ["watch","-n","100000","ls"] diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/docker-config b/hadoop-dist/src/main/compose/ozonefs/docker-config similarity index 92% rename from hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/docker-config rename to hadoop-dist/src/main/compose/ozonefs/docker-config index b0129bce0ff..3171f089e1b 100644 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/docker-config +++ b/hadoop-dist/src/main/compose/ozonefs/docker-config @@ -31,4 +31,5 @@ LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n -LOG4J.PROPERTIES_log4j.category.org.apache.hadoop.util.NativeCodeLoader=ERROR +LOG4J.PROPERTIES_log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR +LOG4J.PROPERTIES_log4j.logger.org.apache.ratis.conf.ConfUtils=WARN diff --git a/hadoop-dist/src/main/compose/ozoneperf/docker-compose.yaml b/hadoop-dist/src/main/compose/ozoneperf/docker-compose.yaml index 6d1d9cadb34..3b5cc7b3b4e 100644 --- a/hadoop-dist/src/main/compose/ozoneperf/docker-compose.yaml +++ b/hadoop-dist/src/main/compose/ozoneperf/docker-compose.yaml @@ -19,7 +19,7 @@ services: datanode: image: apache/hadoop-runner volumes: - - ../../ozone:/opt/hadoop + - ../..:/opt/hadoop - ./jmxpromo.jar:/opt/jmxpromo.jar ports: - 9864 @@ -29,7 +29,7 @@ services: ozoneManager: image: apache/hadoop-runner volumes: - - ../../ozone:/opt/hadoop + - ../..:/opt/hadoop - ./jmxpromo.jar:/opt/jmxpromo.jar ports: - 9874:9874 @@ -41,7 +41,7 @@ services: scm: image: apache/hadoop-runner volumes: - - ../../ozone:/opt/hadoop + - ../..:/opt/hadoop - ./jmxpromo.jar:/opt/jmxpromo.jar ports: - 9876:9876 diff --git a/hadoop-dist/src/main/compose/ozoneperf/docker-config b/hadoop-dist/src/main/compose/ozoneperf/docker-config index 253995014c7..454601e715d 100644 --- a/hadoop-dist/src/main/compose/ozoneperf/docker-config +++ b/hadoop-dist/src/main/compose/ozoneperf/docker-config @@ -15,6 +15,7 @@ # limitations under the License. OZONE-SITE.XML_ozone.om.address=ozoneManager +OZONE-SITE.XML_ozone.om.http-address=ozoneManager:9874 OZONE-SITE.XML_ozone.scm.names=scm OZONE-SITE.XML_ozone.enabled=True OZONE-SITE.XML_ozone.scm.datanode.id=/data/datanode.id @@ -29,4 +30,7 @@ LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n HADOOP_OPTS=-javaagent:/opt/jmxpromo.jar=port=0:consulHost=consul:consulMode=node +LOG4J.PROPERTIES_log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR +LOG4J.PROPERTIES_log4j.logger.org.apache.ratis.conf.ConfUtils=WARN +LOG4J.PROPERTIES_log4j.logger.org.apache.hadoop.security.ShellBasedUnixGroupsMapping=ERROR diff --git a/hadoop-dist/src/main/compose/ozonescripts/.ssh/authorized_keys b/hadoop-dist/src/main/compose/ozonescripts/.ssh/authorized_keys new file mode 100644 index 00000000000..ae390529c7e --- /dev/null +++ b/hadoop-dist/src/main/compose/ozonescripts/.ssh/authorized_keys @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDgEmLpYm4BrWtq1KG9hhZXCZgGrETntu0eNTo21U3VKc9nH9/ot7M6lAawsFcT9uXu4b58PTlnfvwH/TATlCFjC8n0Z7SOx+FU6L3Sn8URh9HaX4L0tF8u87oCAD4dBrUGhhB36eiuH9dBBWly6RKffYJvrjatbc7GxBO/e5OSUMtqk/DSVKksmBhZxutrKivCNjDish9ViGIf8b5yS/MlEGmaVKApik1fJ5iOlloM/GgpB60YV/hbqfCecbWgeiM1gK92gdOcA/Wx1C7fj8BSI5iDSE6eZeF80gM3421lvyPDWyVhFaGbka4rXBX/fb9QSRBA9RTqhRKAEmAIf49H hadoop@cdae967fa87a diff --git a/hadoop-ozone/acceptance-test/dev-support/docker/docker-compose.yaml b/hadoop-dist/src/main/compose/ozonescripts/.ssh/config similarity index 86% rename from hadoop-ozone/acceptance-test/dev-support/docker/docker-compose.yaml rename to hadoop-dist/src/main/compose/ozonescripts/.ssh/config index 6f16b0ac3b8..6506916ded0 100644 --- a/hadoop-ozone/acceptance-test/dev-support/docker/docker-compose.yaml +++ b/hadoop-dist/src/main/compose/ozonescripts/.ssh/config @@ -13,11 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -version: "3" -services: - robotenv: - build: . - privileged: true - volumes: - - ../../../..:/opt/hadoop +Host * + UserKnownHostsFile /dev/null + StrictHostKeyChecking no diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/.env b/hadoop-dist/src/main/compose/ozonescripts/.ssh/environment similarity index 93% rename from hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/.env rename to hadoop-dist/src/main/compose/ozonescripts/.ssh/environment index 98234cb1122..5685453be12 100644 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/.env +++ b/hadoop-dist/src/main/compose/ozonescripts/.ssh/environment @@ -13,5 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -OZONEDIR=../../../../../../hadoop-dist/target/ozone +JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64/ diff --git a/hadoop-dist/src/main/compose/ozonescripts/.ssh/id_rsa b/hadoop-dist/src/main/compose/ozonescripts/.ssh/id_rsa new file mode 100644 index 00000000000..6632ce51c54 --- /dev/null +++ b/hadoop-dist/src/main/compose/ozonescripts/.ssh/id_rsa @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEA4BJi6WJuAa1ratShvYYWVwmYBqxE57btHjU6NtVN1SnPZx/f +6LezOpQGsLBXE/bl7uG+fD05Z378B/0wE5QhYwvJ9Ge0jsfhVOi90p/FEYfR2l+C +9LRfLvO6AgA+HQa1BoYQd+norh/XQQVpcukSn32Cb642rW3OxsQTv3uTklDLapPw +0lSpLJgYWcbrayorwjYw4rIfVYhiH/G+ckvzJRBpmlSgKYpNXyeYjpZaDPxoKQet +GFf4W6nwnnG1oHojNYCvdoHTnAP1sdQu34/AUiOYg0hOnmXhfNIDN+NtZb8jw1sl +YRWhm5GuK1wV/32/UEkQQPUU6oUSgBJgCH+PRwIDAQABAoIBAQDI1TH6ZNKchkck +9XgSWsBjOqIcOQN5fCeDT8nho8WjLVpL3/Hcr+ngsxRcAXHK3xyvw33r9SQic1qJ +/pC8u6RBFivo95qJ7vU0GXcp9TG4yLd6tui1U4WMm784U+dYNM7EDh1snSaECt3v +1V3yNJ0QfnoOh2NShn0zAkOA+M4H8Nx2SudMCsjcbK9+fYxzW3hX+sJpMKdjG1HW +DUz+I7cW7t0EGaVrgVSV+eR58LiXu+14YDNMrySiejB4nD2sKrx93XgiCBECCsBN +GLQGJCztaXoAY+5Kf/aJ9EEf2wBF3GecRk+XIAd87PeDmeahLQAVkAJ/rD1vsKFs +8kWi6CrhAoGBAP7leG/dMBhlfvROpBddIfWm2i8B+oZiAlSjdYGz+/ZhUaByXk18 +pupMGiiMQR1ziPnEg0gNgR2ZkH54qrXPn5WcQa4rlSEtUsZEp5v5WblhfX2QwKzY +G/uhA+mB7wXpQkSmXo0LclfPF2teROQrG1OyfWkWbxFH4i3+em7sL95jAoGBAOEK +v+wscqkMLW7Q8ONbWMCCBlmMHr6baB3VDCYZx25lr+GIF5zmJJFTmF2rq2VSAlts +qx1AGmaUSo78kC5FuJvSNTL6a1Us5ucdthQZM3N8pAz+OAE+QEU+BsdA27yAh3tO +yKDsMFNHKtXcgy5LeB5gzENLlNyw2jgkRv2Ef77NAoGAVH8DHqoHEH9Mx3XuRWR1 +JnaqKx0PzE5fEWmiQV3Fr5XxNivTgQJKXq7dmQVtbHLpPErdbhwz6fkHAjXD+UMb +VsAWscL2y6m3n8wQd87/5EkiDWbXyDRXimGE53pQHviFJDa2bzEVNXCMBeaZFb4I +cAViN1zdcrAOlUqfkXewIpsCgYB8wsXl/DpRB+RENGfn0+OfTjaQ/IKq72NIbq1+ +jfondQ6N/TICFQEe5HZrL9okoNOXteYjoD9CsWGoZdLVJGgVUvOVYImSvgMBDFK+ +T75bfzU/0sxfvBBLkviVDJsFpUf3D5VgybB86s6Po+HCD6r3RHjZshRESXOhflMx +B3z+3QKBgE2Lwo0DuwUGkm6k8psyn3x8EiXNsiNw12cojicFTyKUYLHxMBeVbCLW +3j3pxSggJgRuBLLzixUHbHp91r2ARTy28naK7R/la8yKVqK6ojcikN2mQsCHYtwB +nuFwXr42ytn6G+9Wn4xT64tGjRCqyZn0/v0XsPjVCyrZ6G7EtNHP +-----END RSA PRIVATE KEY----- diff --git a/hadoop-dist/src/main/compose/ozonescripts/.ssh/id_rsa.pub b/hadoop-dist/src/main/compose/ozonescripts/.ssh/id_rsa.pub new file mode 100644 index 00000000000..ae390529c7e --- /dev/null +++ b/hadoop-dist/src/main/compose/ozonescripts/.ssh/id_rsa.pub @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDgEmLpYm4BrWtq1KG9hhZXCZgGrETntu0eNTo21U3VKc9nH9/ot7M6lAawsFcT9uXu4b58PTlnfvwH/TATlCFjC8n0Z7SOx+FU6L3Sn8URh9HaX4L0tF8u87oCAD4dBrUGhhB36eiuH9dBBWly6RKffYJvrjatbc7GxBO/e5OSUMtqk/DSVKksmBhZxutrKivCNjDish9ViGIf8b5yS/MlEGmaVKApik1fJ5iOlloM/GgpB60YV/hbqfCecbWgeiM1gK92gdOcA/Wx1C7fj8BSI5iDSE6eZeF80gM3421lvyPDWyVhFaGbka4rXBX/fb9QSRBA9RTqhRKAEmAIf49H hadoop@cdae967fa87a diff --git a/hadoop-dist/src/main/compose/ozonescripts/Dockerfile b/hadoop-dist/src/main/compose/ozonescripts/Dockerfile new file mode 100644 index 00000000000..3bdcb0cc982 --- /dev/null +++ b/hadoop-dist/src/main/compose/ozonescripts/Dockerfile @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +FROM apache/hadoop-runner +RUN sudo apt-get update && sudo apt-get install -y openssh-server + +RUN sudo mkdir -p /run/sshd +RUN sudo sed -i "s/.*UsePrivilegeSeparation.*/UsePrivilegeSeparation no/g" /etc/ssh/sshd_config +RUN sudo sed -i "s/.*PermitUserEnvironment.*/PermitUserEnvironment yes/g" /etc/ssh/sshd_config +RUN sudo sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd + +#/opt/hadoop is mounted, we can't use it as a home +RUN sudo usermod -d /opt hadoop +ADD .ssh /opt/.ssh +RUN sudo chown -R hadoop /opt/.ssh +RUN sudo chown hadoop /opt +RUN sudo chmod 600 /opt/.ssh/* +RUN sudo chmod 700 /opt/.ssh + +RUN sudo sh -c 'echo "export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64/" >> /etc/profile' +CMD ["sudo","/usr/sbin/sshd","-D"] diff --git a/hadoop-dist/src/main/compose/ozonescripts/README.md b/hadoop-dist/src/main/compose/ozonescripts/README.md new file mode 100644 index 00000000000..2531fa43660 --- /dev/null +++ b/hadoop-dist/src/main/compose/ozonescripts/README.md @@ -0,0 +1,38 @@ + + +# start-ozone environment + +This is an example environment to use/test `./sbin/start-ozone.sh` and `./sbin/stop-ozone.sh` scripts. + +There are ssh connections between the containers and the start/stop scripts could handle the start/stop process +similar to a real cluster. + +To use it, first start the cluster: + +``` +docker-copmose up -d +``` + +After a successfull startup (which starts only the ssh daemons) you can start ozone: + +``` +./start.sh +``` + +Check it the java processes are started: + +``` +./ps.sh +``` \ No newline at end of file diff --git a/hadoop-dist/src/main/compose/ozonescripts/docker-compose.yaml b/hadoop-dist/src/main/compose/ozonescripts/docker-compose.yaml new file mode 100644 index 00000000000..62f116368f4 --- /dev/null +++ b/hadoop-dist/src/main/compose/ozonescripts/docker-compose.yaml @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3" +services: + datanode: + build: . + volumes: + - ../..:/opt/hadoop + ports: + - 9864 + env_file: + - ./docker-config + om: + build: . + volumes: + - ../..:/opt/hadoop + ports: + - 9874:9874 + env_file: + - ./docker-config + scm: + build: . + volumes: + - ../..:/opt/hadoop + ports: + - 9876:9876 + env_file: + - ./docker-config diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/docker-config b/hadoop-dist/src/main/compose/ozonescripts/docker-config similarity index 68% rename from hadoop-ozone/acceptance-test/src/test/acceptance/basic/docker-config rename to hadoop-dist/src/main/compose/ozonescripts/docker-config index b72085b22fd..1afec73e0ed 100644 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/docker-config +++ b/hadoop-dist/src/main/compose/ozonescripts/docker-config @@ -14,20 +14,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -OZONE-SITE.XML_ozone.om.address=ozoneManager -OZONE-SITE.XML_ozone.om.http-address=ozoneManager:9874 +JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64/ +CORE-SITE.XML_fs.defaultFS=hdfs://namenode:9000 +OZONE-SITE.XML_ozone.ksm.address=ksm OZONE-SITE.XML_ozone.scm.names=scm -OZONE-SITE.XML_ozone.enabled=True +OZONE-SITE.XML_ozone.enabled=true OZONE-SITE.XML_ozone.scm.datanode.id=/data/datanode.id +OZONE-SITE.XML_ozone.om.address=om +OZONE-SITE.XML_ozone.om.http-address=om:9874 OZONE-SITE.XML_ozone.scm.block.client.address=scm OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata -OZONE-SITE.XML_ozone.handler.type=distributed OZONE-SITE.XML_ozone.scm.client.address=scm -OZONE-SITE.XML_ozone.scm.heartbeat.interval=3s +OZONE-SITE.XML_hdds.datanode.plugins=org.apache.hadoop.ozone.web.OzoneHddsDatanodeService +HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:9000 +HDFS-SITE.XML_dfs.namenode.name.dir=/data/namenode HDFS-SITE.XML_rpc.metrics.quantile.enable=true HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 +HDFS-SITE.XML_dfs.datanode.plugins=org.apache.hadoop.ozone.HddsDatanodeService LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout -LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +LOG4J.PROPERTIES_log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR, stdout LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender -LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n -LOG4J.PROPERTIES_log4j.category.org.apache.hadoop.util.NativeCodeLoader=ERROR +LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n \ No newline at end of file diff --git a/hadoop-ozone/acceptance-test/dev-support/docker/Dockerfile b/hadoop-dist/src/main/compose/ozonescripts/ps.sh old mode 100644 new mode 100755 similarity index 84% rename from hadoop-ozone/acceptance-test/dev-support/docker/Dockerfile rename to hadoop-dist/src/main/compose/ozonescripts/ps.sh index 06feda6221b..d5e2c386752 --- a/hadoop-ozone/acceptance-test/dev-support/docker/Dockerfile +++ b/hadoop-dist/src/main/compose/ozonescripts/ps.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -13,9 +14,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -FROM docker:18-dind -RUN apk add --update python3 bash curl jq sudo -RUN pip3 install robotframework docker-compose -WORKDIR /root -USER root +docker-compose ps -q | xargs -n1 -I CONTAINER docker exec CONTAINER ps xa \ No newline at end of file diff --git a/hadoop-dist/src/main/compose/ozonescripts/start.sh b/hadoop-dist/src/main/compose/ozonescripts/start.sh new file mode 100755 index 00000000000..3358b07c4e9 --- /dev/null +++ b/hadoop-dist/src/main/compose/ozonescripts/start.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +docker-compose ps | grep datanode | awk '{print $1}' | xargs -n1 docker inspect --format '{{ .Config.Hostname }}' > ../../etc/hadoop/workers +docker-compose exec scm /opt/hadoop/bin/ozone scm -init +docker-compose exec scm /opt/hadoop/sbin/start-ozone.sh +#We need a running SCM for om objectstore creation +#TODO create a utility to wait for the startup +sleep 10 +docker-compose exec om /opt/hadoop/bin/ozone om -createObjectStore +docker-compose exec scm /opt/hadoop/sbin/start-ozone.sh diff --git a/hadoop-dist/src/main/compose/ozonescripts/stop.sh b/hadoop-dist/src/main/compose/ozonescripts/stop.sh new file mode 100755 index 00000000000..a3ce08af573 --- /dev/null +++ b/hadoop-dist/src/main/compose/ozonescripts/stop.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +docker-compose exec scm /opt/hadoop/sbin/stop-ozone.sh diff --git a/hadoop-dist/src/main/ozone/README.txt b/hadoop-dist/src/main/ozone/README.txt new file mode 100644 index 00000000000..6bbd83ffd04 --- /dev/null +++ b/hadoop-dist/src/main/ozone/README.txt @@ -0,0 +1,51 @@ + + +This is the distribution of Apache Hadoop Ozone. + +Ozone is a submodule of Hadoop with separated release cycle. For more information, check + + http://ozone.hadoop.apache.org + + and + + https://cwiki.apache.org/confluence/display/HADOOP/Ozone+Contributor+Guide + +For more information about Hadoop, check: + + http://hadoop.apache.org + +This distribution includes cryptographic software. The country in +which you currently reside may have restrictions on the import, +possession, use, and/or re-export to another country, of +encryption software. BEFORE using any encryption software, please +check your country's laws, regulations and policies concerning the +import, possession, or use, and re-export of encryption software, to +see if this is permitted. See for more +information. + +The U.S. Government Department of Commerce, Bureau of Industry and +Security (BIS), has classified this software as Export Commodity +Control Number (ECCN) 5D002.C.1, which includes information security +software using or performing cryptographic functions with asymmetric +algorithms. The form and manner of this Apache Software Foundation +distribution makes it eligible for export under the License Exception +ENC Technology Software Unrestricted (TSU) exception (see the BIS +Export Administration Regulations, Section 740.13) for both object +code and source code. + +The following provides more details on the included cryptographic +software: + Hadoop Core uses the SSL libraries from the Jetty project written +by mortbay.org. diff --git a/hadoop-dist/src/main/smoketest/README.md b/hadoop-dist/src/main/smoketest/README.md new file mode 100644 index 00000000000..c521a54beb2 --- /dev/null +++ b/hadoop-dist/src/main/smoketest/README.md @@ -0,0 +1,30 @@ + + +## Ozone Acceptance Tests + +This directory contains a [robotframework](http://robotframework.org/) based test suite for Ozone to make it easier to check the current state of the package. + +You can run in in any environment after [installing](https://github.com/robotframework/robotframework/blob/master/INSTALL.rst) + +``` +cd $DIRECTORY_OF_OZONE +robot smoketest/basic +``` + +The argument of the `robot` could be any robot file or directory. + +The current configuration in the robot files (hostnames, ports) are adjusted for the docker-based setup but you can easily modify it for any environment. + +The `./test.sh` in this directory can start multiple type of clusters (ozone standalone or ozone + hdfs) and execute the test framework with all of the clusters. diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/basic.robot b/hadoop-dist/src/main/smoketest/basic/basic.robot similarity index 52% rename from hadoop-ozone/acceptance-test/src/test/acceptance/basic/basic.robot rename to hadoop-dist/src/main/smoketest/basic/basic.robot index 6d6fea02733..a69450dbba8 100644 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/basic.robot +++ b/hadoop-dist/src/main/smoketest/basic/basic.robot @@ -16,35 +16,32 @@ *** Settings *** Documentation Smoketest ozone cluster startup Library OperatingSystem -Suite Setup Startup Ozone cluster with size 5 -Suite Teardown Teardown Ozone cluster Resource ../commonlib.robot *** Variables *** ${COMMON_REST_HEADER} -H "x-ozone-user: bilbo" -H "x-ozone-version: v1" -H "Date: Mon, 26 Jun 2017 04:23:30 GMT" -H "Authorization:OZONE root" -${COMPOSEFILE} ${CURDIR}/docker-compose.yaml -${PROJECTDIR} ${CURDIR}/../../../../../.. +${DATANODE_HOST} localhost *** Test Cases *** Test rest interface - ${result} = Execute on datanode curl -i -X POST ${COMMON_RESTHEADER} "http://localhost:9880/volume1" + ${result} = Execute curl -i -X POST ${COMMON_RESTHEADER} "http://${DATANODE_HOST}:9880/volume1" Should contain ${result} 201 Created - ${result} = Execute on datanode curl -i -X POST ${COMMON_RESTHEADER} "http://localhost:9880/volume1/bucket1" + ${result} = Execute curl -i -X POST ${COMMON_RESTHEADER} "http://${DATANODE_HOST}:9880/volume1/bucket1" Should contain ${result} 201 Created - ${result} = Execute on datanode curl -i -X DELETE ${COMMON_RESTHEADER} "http://localhost:9880/volume1/bucket1" + ${result} = Execute curl -i -X DELETE ${COMMON_RESTHEADER} "http://${DATANODE_HOST}:9880/volume1/bucket1" Should contain ${result} 200 OK - ${result} = Execute on datanode curl -i -X DELETE ${COMMON_RESTHEADER} "http://localhost:9880/volume1" + ${result} = Execute curl -i -X DELETE ${COMMON_RESTHEADER} "http://${DATANODE_HOST}:9880/volume1" Should contain ${result} 200 OK Check webui static resources - ${result} = Execute on scm curl -s -I http://localhost:9876/static/bootstrap-3.3.7/js/bootstrap.min.js - Should contain ${result} 200 - ${result} = Execute on ozoneManager curl -s -I http://localhost:9874/static/bootstrap-3.3.7/js/bootstrap.min.js - Should contain ${result} 200 + ${result} = Execute curl -s -I http://scm:9876/static/bootstrap-3.3.7/js/bootstrap.min.js + Should contain ${result} 200 + ${result} = Execute curl -s -I http://ozoneManager:9874/static/bootstrap-3.3.7/js/bootstrap.min.js + Should contain ${result} 200 Start freon testing - ${result} = Execute on ozoneManager ozone freon -numOfVolumes 5 -numOfBuckets 5 -numOfKeys 5 -numOfThreads 10 - Wait Until Keyword Succeeds 3min 10sec Should contain ${result} Number of Keys added: 125 - Should Not Contain ${result} ERROR + ${result} = Execute ozone freon randomkeys --numOfVolumes 5 --numOfBuckets 5 --numOfKeys 5 --numOfThreads 10 + Wait Until Keyword Succeeds 3min 10sec Should contain ${result} Number of Keys added: 125 + Should Not Contain ${result} ERROR diff --git a/hadoop-dist/src/main/smoketest/basic/ozone-shell.robot b/hadoop-dist/src/main/smoketest/basic/ozone-shell.robot new file mode 100644 index 00000000000..14a576170d7 --- /dev/null +++ b/hadoop-dist/src/main/smoketest/basic/ozone-shell.robot @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Test ozone shell CLI usage +Library OperatingSystem +Resource ../commonlib.robot +Test Timeout 2 minute + +*** Variables *** + +*** Test Cases *** +RestClient without http port + Test ozone shell http:// ozoneManager restwoport + +RestClient with http port + Test ozone shell http:// ozoneManager:9874 restwport + +RestClient without host name + Test ozone shell http:// ${EMPTY} restwohost + +RpcClient with port + Test ozone shell o3:// ozoneManager:9862 rpcwoport + +RpcClient without host + Test ozone shell o3:// ${EMPTY} rpcwport + +RpcClient without scheme + Test ozone shell ${EMPTY} ${EMPTY} rpcwoscheme + + +*** Keywords *** +Test ozone shell + [arguments] ${protocol} ${server} ${volume} + ${result} = Execute ozone sh volume create ${protocol}${server}/${volume} --user bilbo --quota 100TB --root + Should not contain ${result} Failed + Should contain ${result} Creating Volume: ${volume} + ${result} = Execute ozone sh volume list ${protocol}${server}/ --user bilbo | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '.[] | select(.volumeName=="${volume}")' + Should contain ${result} createdOn + ${result} = Execute ozone sh volume list --user bilbo | grep -Ev 'Removed|DEBUG|ERROR|INFO|TRACE|WARN' | jq -r '.[] | select(.volumeName=="${volume}")' + Should contain ${result} createdOn + Execute ozone sh volume update ${protocol}${server}/${volume} --user bill --quota 10TB + ${result} = Execute ozone sh volume info ${protocol}${server}/${volume} | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.volumeName=="${volume}") | .owner | .name' + Should Be Equal ${result} bill + ${result} = Execute ozone sh volume info ${protocol}${server}/${volume} | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.volumeName=="${volume}") | .quota | .size' + Should Be Equal ${result} 10 + Execute ozone sh bucket create ${protocol}${server}/${volume}/bb1 + ${result} = Execute ozone sh bucket info ${protocol}${server}/${volume}/bb1 | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.bucketName=="bb1") | .storageType' + Should Be Equal ${result} DISK + ${result} = Execute ozone sh bucket update ${protocol}${server}/${volume}/bb1 --addAcl user:frodo:rw,group:samwise:r | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.bucketName=="bb1") | .acls | .[] | select(.name=="samwise") | .type' + Should Be Equal ${result} GROUP + ${result} = Execute ozone sh bucket update ${protocol}${server}/${volume}/bb1 --removeAcl group:samwise:r | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.bucketName=="bb1") | .acls | .[] | select(.name=="frodo") | .type' + Should Be Equal ${result} USER + ${result} = Execute ozone sh bucket list ${protocol}${server}/${volume}/ | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '.[] | select(.bucketName=="bb1") | .volumeName' + Should Be Equal ${result} ${volume} + Run Keyword Test key handling ${protocol} ${server} ${volume} + Execute ozone sh bucket delete ${protocol}${server}/${volume}/bb1 + Execute ozone sh volume delete ${protocol}${server}/${volume} --user bilbo + +Test key handling + [arguments] ${protocol} ${server} ${volume} + Execute ozone sh key put ${protocol}${server}/${volume}/bb1/key1 /opt/hadoop/NOTICE.txt + Execute rm -f NOTICE.txt.1 + Execute ozone sh key get ${protocol}${server}/${volume}/bb1/key1 NOTICE.txt.1 + Execute ls -l NOTICE.txt.1 + ${result} = Execute ozone sh key info ${protocol}${server}/${volume}/bb1/key1 | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.keyName=="key1")' + Should contain ${result} createdOn + ${result} = Execute ozone sh key list ${protocol}${server}/${volume}/bb1 | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '.[] | select(.keyName=="key1") | .keyName' + Should Be Equal ${result} key1 + Execute ozone sh key delete ${protocol}${server}/${volume}/bb1/key1 diff --git a/hadoop-ozone/docs/config.toml b/hadoop-dist/src/main/smoketest/commonlib.robot similarity index 69% rename from hadoop-ozone/docs/config.toml rename to hadoop-dist/src/main/smoketest/commonlib.robot index eed74a96bc0..e2620fa4340 100644 --- a/hadoop-ozone/docs/config.toml +++ b/hadoop-dist/src/main/smoketest/commonlib.robot @@ -13,11 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -baseURL = "/" +*** Keywords *** -languageCode = "en-us" -DefaultContentLanguage = "en" -title = "Ozone" -theme = "ozonedoc" -pygmentsCodeFences = true -pygmentsStyle = "monokailight" + +Execute + [arguments] ${command} + ${rc} ${output} = Run And Return Rc And Output ${command} + Log ${output} + Should Be Equal As Integers ${rc} 0 + [return] ${output} diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/ozonefs.robot b/hadoop-dist/src/main/smoketest/ozonefs/ozonefs.robot similarity index 53% rename from hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/ozonefs.robot rename to hadoop-dist/src/main/smoketest/ozonefs/ozonefs.robot index ea473c0de71..fb7b98cec63 100644 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/ozonefs/ozonefs.robot +++ b/hadoop-dist/src/main/smoketest/ozonefs/ozonefs.robot @@ -16,24 +16,20 @@ *** Settings *** Documentation Ozonefs test Library OperatingSystem -Suite Setup Startup Ozone cluster with size 5 -Suite Teardown Teardown Ozone cluster Resource ../commonlib.robot *** Variables *** -${COMPOSEFILE} ${CURDIR}/docker-compose.yaml -${PROJECTDIR} ${CURDIR}/../../../../../.. *** Test Cases *** Create volume and bucket - Execute on datanode ozone oz -createVolume http://ozoneManager/fstest -user bilbo -quota 100TB -root - Execute on datanode ozone oz -createBucket http://ozoneManager/fstest/bucket1 + Execute ozone sh volume create http://ozoneManager/fstest --user bilbo --quota 100TB --root + Execute ozone sh bucket create http://ozoneManager/fstest/bucket1 Check volume from ozonefs - ${result} = Execute on hadooplast hdfs dfs -ls o3://bucket1.fstest/ + ${result} = Execute ozone fs -ls o3://bucket1.fstest/ Create directory from ozonefs - Execute on hadooplast hdfs dfs -mkdir -p o3://bucket1.fstest/testdir/deep - ${result} = Execute on ozoneManager ozone oz -listKey o3://ozoneManager/fstest/bucket1 | grep -v WARN | jq -r '.[].keyName' - Should contain ${result} testdir/deep + Execute ozone fs -mkdir -p o3://bucket1.fstest/testdir/deep + ${result} = Execute ozone sh key list o3://ozoneManager/fstest/bucket1 | grep -v WARN | jq -r '.[].keyName' + Should contain ${result} testdir/deep diff --git a/hadoop-dist/src/main/smoketest/test.sh b/hadoop-dist/src/main/smoketest/test.sh new file mode 100755 index 00000000000..534bbb701f7 --- /dev/null +++ b/hadoop-dist/src/main/smoketest/test.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" + +execute_tests(){ + COMPOSE_FILE=$DIR/../compose/$1/docker-compose.yaml + TESTS=$2 + echo "Executing test ${TESTS[*]} with $COMPOSE_FILE" + docker-compose -f "$COMPOSE_FILE" down + docker-compose -f "$COMPOSE_FILE" up -d + docker-compose -f "$COMPOSE_FILE" exec datanode sudo apt-get update + docker-compose -f "$COMPOSE_FILE" exec datanode sudo apt-get install -y python-pip + docker-compose -f "$COMPOSE_FILE" exec datanode sudo pip install robotframework + for TEST in "${TESTS[@]}"; do + set +e + docker-compose -f "$COMPOSE_FILE" exec datanode python -m robot "smoketest/$TEST" + set -e + done + if [ "$KEEP_RUNNING" = false ]; then + docker-compose -f "$COMPOSE_FILE" down + fi +} +RUN_ALL=true +KEEP_RUNNING=false +POSITIONAL=() +while [[ $# -gt 0 ]] +do +key="$1" + +case $key in + --env) + DOCKERENV="$2" + RUN_ALL=false + shift # past argument + shift # past value + ;; + --keep) + KEEP_RUNNING=true + shift # past argument + ;; + --help|-h|-help) + cat << EOF + + Acceptance test executor for ozone. + + This is a lightweight test executor for ozone. + + You can run it with + + ./test.sh + + Which executes all the tests in all the available environments. + + Or you can run manually one test with + + ./test.sh --keep --env ozone-hdfs basic + + --keep means that docker cluster won't be stopped after the test (optional) + --env defines the subdirectory under the compose dir + The remaining parameters define the test suites under smoketest dir. + Could be any directory or robot file relative to the smoketest dir. +EOF + exit 0 + ;; + *) + POSITIONAL+=("$1") # save it in an array for later + shift # past argument + ;; +esac +done + +if [ "$RUN_ALL" = true ]; then +# +# This is the definition of the ozone acceptance test suite +# +# We select the test suites and execute them on multiple type of clusters +# + DEFAULT_TESTS=("basic") + execute_tests ozone "${DEFAULT_TESTS[@]}" + TESTS=("ozonefs") + execute_tests ozonefs "${TESTS[@]}" + +else + execute_tests "$DOCKERENV" "${POSITIONAL[@]}" +fi diff --git a/hadoop-hdds/client/pom.xml b/hadoop-hdds/client/pom.xml index d6db9c63f04..6a4cc9deebf 100644 --- a/hadoop-hdds/client/pom.xml +++ b/hadoop-hdds/client/pom.xml @@ -20,11 +20,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-hdds-client - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Distributed Data Store Client Library Apache Hadoop HDDS Client jar @@ -38,7 +38,6 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds-common - provided diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClient.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClient.java deleted file mode 100644 index 709f0dc6cb7..00000000000 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClient.java +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdds.scm; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import org.apache.ratis.shaded.io.netty.bootstrap.Bootstrap; -import org.apache.ratis.shaded.io.netty.channel.Channel; -import org.apache.ratis.shaded.io.netty.channel.EventLoopGroup; -import org.apache.ratis.shaded.io.netty.channel.nio.NioEventLoopGroup; -import org.apache.ratis.shaded.io.netty.channel.socket.nio.NioSocketChannel; -import org.apache.ratis.shaded.io.netty.handler.logging.LogLevel; -import org.apache.ratis.shaded.io.netty.handler.logging.LoggingHandler; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.scm.client.HddsClientUtils; -import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Semaphore; - -/** - * A Client for the storageContainer protocol. - */ -public class XceiverClient extends XceiverClientSpi { - static final Logger LOG = LoggerFactory.getLogger(XceiverClient.class); - private final Pipeline pipeline; - private final Configuration config; - private Channel channel; - private Bootstrap b; - private EventLoopGroup group; - private final Semaphore semaphore; - private boolean closed = false; - - /** - * Constructs a client that can communicate with the Container framework on - * data nodes. - * - * @param pipeline - Pipeline that defines the machines. - * @param config -- Ozone Config - */ - public XceiverClient(Pipeline pipeline, Configuration config) { - super(); - Preconditions.checkNotNull(pipeline); - Preconditions.checkNotNull(config); - this.pipeline = pipeline; - this.config = config; - this.semaphore = - new Semaphore(HddsClientUtils.getMaxOutstandingRequests(config)); - } - - @Override - public void connect() throws Exception { - if (closed) { - throw new IOException("This channel is not connected."); - } - - if (channel != null && channel.isActive()) { - throw new IOException("This client is already connected to a host."); - } - - group = new NioEventLoopGroup(); - b = new Bootstrap(); - b.group(group) - .channel(NioSocketChannel.class) - .handler(new LoggingHandler(LogLevel.INFO)) - .handler(new XceiverClientInitializer(this.pipeline, semaphore)); - DatanodeDetails leader = this.pipeline.getLeader(); - - // read port from the data node, on failure use default configured - // port. - int port = leader.getPort(DatanodeDetails.Port.Name.STANDALONE).getValue(); - if (port == 0) { - port = config.getInt(OzoneConfigKeys.DFS_CONTAINER_IPC_PORT, - OzoneConfigKeys.DFS_CONTAINER_IPC_PORT_DEFAULT); - } - LOG.debug("Connecting to server Port : " + port); - channel = b.connect(leader.getHostName(), port).sync().channel(); - } - - public void reconnect() throws IOException { - try { - connect(); - if (channel == null || !channel.isActive()) { - throw new IOException("This channel is not connected."); - } - } catch (Exception e) { - LOG.error("Error while connecting: ", e); - throw new IOException(e); - } - } - - /** - * Returns if the exceiver client connects to a server. - * - * @return True if the connection is alive, false otherwise. - */ - @VisibleForTesting - public boolean isConnected() { - return channel.isActive(); - } - - @Override - public void close() { - closed = true; - if (group != null) { - group.shutdownGracefully().awaitUninterruptibly(); - } - } - - @Override - public Pipeline getPipeline() { - return pipeline; - } - - @Override - public ContainerProtos.ContainerCommandResponseProto sendCommand( - ContainerProtos.ContainerCommandRequestProto request) throws IOException { - try { - if ((channel == null) || (!channel.isActive())) { - reconnect(); - } - XceiverClientHandler handler = - channel.pipeline().get(XceiverClientHandler.class); - - return handler.sendCommand(request); - } catch (ExecutionException | InterruptedException e) { - /** - * In case the netty channel handler throws an exception, - * the exception thrown will be wrapped within {@link ExecutionException}. - * Unwarpping here so that original exception gets passed - * to to the client. - */ - if (e instanceof ExecutionException) { - Throwable cause = e.getCause(); - if (cause instanceof IOException) { - throw (IOException) cause; - } - } - throw new IOException( - "Unexpected exception during execution:" + e.getMessage()); - } - } - - /** - * Sends a given command to server gets a waitable future back. - * - * @param request Request - * @return Response to the command - * @throws IOException - */ - @Override - public CompletableFuture - sendCommandAsync(ContainerProtos.ContainerCommandRequestProto request) - throws IOException, ExecutionException, InterruptedException { - if ((channel == null) || (!channel.isActive())) { - reconnect(); - } - XceiverClientHandler handler = - channel.pipeline().get(XceiverClientHandler.class); - return handler.sendCommandAsync(request); - } - - /** - * Create a pipeline. - * - * @param pipelineID - Name of the pipeline. - * @param datanodes - Datanodes - */ - @Override - public void createPipeline(String pipelineID, List datanodes) - throws IOException { - // For stand alone pipeline, there is no notion called setup pipeline. - return; - } - - /** - * Returns pipeline Type. - * - * @return - Stand Alone as the type. - */ - @Override - public HddsProtos.ReplicationType getPipelineType() { - return HddsProtos.ReplicationType.STAND_ALONE; - } -} diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java index 92df46e492f..3cdbc7cc998 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java @@ -38,7 +38,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.Semaphore; @@ -88,7 +87,7 @@ public void connect() throws Exception { } LOG.debug("Connecting to server Port : " + leader.getIpAddress()); channel = NettyChannelBuilder.forAddress(leader.getIpAddress(), port) - .usePlaintext(true) + .usePlaintext() .maxInboundMessageSize(OzoneConfigKeys.DFS_CONTAINER_CHUNK_MAX_SIZE) .build(); asyncStub = XceiverClientProtocolServiceGrpc.newStub(channel); @@ -217,15 +216,14 @@ private void reconnect() throws IOException { /** * Create a pipeline. - * - * @param pipelineID - Name of the pipeline. - * @param datanodes - Datanodes */ @Override - public void createPipeline(String pipelineID, List datanodes) - throws IOException { + public void createPipeline() { // For stand alone pipeline, there is no notion called setup pipeline. - return; + } + + public void destroyPipeline() { + // For stand alone pipeline, there is no notion called destroy pipeline. } /** diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientHandler.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientHandler.java deleted file mode 100644 index 7c568f6b879..00000000000 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientHandler.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.scm; - -import com.google.common.base.Preconditions; -import org.apache.ratis.shaded.io.netty.channel.Channel; -import org.apache.ratis.shaded.io.netty.channel.ChannelHandlerContext; -import org.apache.ratis.shaded.io.netty.channel.SimpleChannelInboundHandler; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerCommandResponseProto; -import org.apache.hadoop.util.Time; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Iterator; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.Semaphore; - -/** - * Netty client handler. - */ -public class XceiverClientHandler extends - SimpleChannelInboundHandler { - - static final Logger LOG = LoggerFactory.getLogger(XceiverClientHandler.class); - private final ConcurrentMap responses = - new ConcurrentHashMap<>(); - - private final Pipeline pipeline; - private volatile Channel channel; - private XceiverClientMetrics metrics; - private final Semaphore semaphore; - - /** - * Constructs a client that can communicate to a container server. - */ - public XceiverClientHandler(Pipeline pipeline, Semaphore semaphore) { - super(false); - Preconditions.checkNotNull(pipeline); - this.pipeline = pipeline; - this.metrics = XceiverClientManager.getXceiverClientMetrics(); - this.semaphore = semaphore; - } - - /** - * Please keep in mind that this method will be renamed to {@code - * messageReceived(ChannelHandlerContext, I)} in 5.0. - *

- * Is called for each message of type {@link ContainerProtos - * .ContainerCommandResponseProto}. - * - * @param ctx the {@link ChannelHandlerContext} which this {@link - * SimpleChannelInboundHandler} belongs to - * @param msg the message to handle - * @throws Exception is thrown if an error occurred - */ - @Override - public void channelRead0(ChannelHandlerContext ctx, - ContainerProtos.ContainerCommandResponseProto msg) - throws Exception { - Preconditions.checkNotNull(msg); - metrics.decrPendingContainerOpsMetrics(msg.getCmdType()); - - String key = msg.getTraceID(); - ResponseFuture response = responses.remove(key); - semaphore.release(); - - if (response != null) { - response.getFuture().complete(msg); - - long requestTime = response.getRequestTime(); - metrics.addContainerOpsLatency(msg.getCmdType(), - Time.monotonicNowNanos() - requestTime); - } else { - LOG.error("A reply received for message that was not queued. trace " + - "ID: {}", msg.getTraceID()); - } - } - - @Override - public void channelRegistered(ChannelHandlerContext ctx) { - LOG.debug("channelRegistered: Connected to ctx"); - channel = ctx.channel(); - } - - @Override - public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { - LOG.info("Exception in client " + cause.toString()); - Iterator keyIterator = responses.keySet().iterator(); - while (keyIterator.hasNext()) { - ResponseFuture response = responses.remove(keyIterator.next()); - response.getFuture().completeExceptionally(cause); - semaphore.release(); - } - ctx.close(); - } - - /** - * Since netty is async, we send a work request and then wait until a response - * appears in the reply queue. This is simple sync interface for clients. we - * should consider building async interfaces for client if this turns out to - * be a performance bottleneck. - * - * @param request - request. - * @return -- response - */ - - public ContainerCommandResponseProto sendCommand( - ContainerProtos.ContainerCommandRequestProto request) - throws ExecutionException, InterruptedException { - Future future = sendCommandAsync(request); - return future.get(); - } - - /** - * SendCommandAsyc queues a command to the Netty Subsystem and returns a - * CompletableFuture. This Future is marked compeleted in the channelRead0 - * when the call comes back. - * @param request - Request to execute - * @return CompletableFuture - */ - public CompletableFuture sendCommandAsync( - ContainerProtos.ContainerCommandRequestProto request) - throws InterruptedException { - - // Throw an exception of request doesn't have traceId - if (StringUtils.isEmpty(request.getTraceID())) { - throw new IllegalArgumentException("Invalid trace ID"); - } - - // Setting the datanode ID in the commands, so that we can distinguish - // commands when the cluster simulator is running. - if(!request.hasDatanodeUuid()) { - throw new IllegalArgumentException("Invalid Datanode ID"); - } - - metrics.incrPendingContainerOpsMetrics(request.getCmdType()); - - CompletableFuture future - = new CompletableFuture<>(); - ResponseFuture response = new ResponseFuture(future, - Time.monotonicNowNanos()); - semaphore.acquire(); - ResponseFuture previous = responses.putIfAbsent( - request.getTraceID(), response); - if (previous != null) { - LOG.error("Command with Trace already exists. Ignoring this command. " + - "{}. Previous Command: {}", request.getTraceID(), - previous.toString()); - throw new IllegalStateException("Duplicate trace ID. Command with this " + - "trace ID is already executing. Please ensure that " + - "trace IDs are not reused. ID: " + request.getTraceID()); - } - - channel.writeAndFlush(request); - return response.getFuture(); - } - - /** - * Class wraps response future info. - */ - static class ResponseFuture { - private final long requestTime; - private final CompletableFuture future; - - ResponseFuture(CompletableFuture future, - long requestTime) { - this.future = future; - this.requestTime = requestTime; - } - - public long getRequestTime() { - return requestTime; - } - - public CompletableFuture getFuture() { - return future; - } - } -} diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientInitializer.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientInitializer.java deleted file mode 100644 index 90e2f5ad91a..00000000000 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientInitializer.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.scm; - -import org.apache.ratis.shaded.io.netty.channel.ChannelInitializer; -import org.apache.ratis.shaded.io.netty.channel.ChannelPipeline; -import org.apache.ratis.shaded.io.netty.channel.socket.SocketChannel; -import org.apache.ratis.shaded.io.netty.handler.codec.protobuf.ProtobufDecoder; -import org.apache.ratis.shaded.io.netty.handler.codec.protobuf.ProtobufEncoder; -import org.apache.ratis.shaded.io.netty.handler.codec.protobuf - .ProtobufVarint32FrameDecoder; -import org.apache.ratis.shaded.io.netty.handler.codec.protobuf - .ProtobufVarint32LengthFieldPrepender; -import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; - -import java.util.concurrent.Semaphore; - -/** - * Setup the netty pipeline. - */ -public class XceiverClientInitializer extends - ChannelInitializer { - private final Pipeline pipeline; - private final Semaphore semaphore; - - /** - * Constructs an Initializer for the client pipeline. - * @param pipeline - Pipeline. - */ - public XceiverClientInitializer(Pipeline pipeline, Semaphore semaphore) { - this.pipeline = pipeline; - this.semaphore = semaphore; - } - - /** - * This method will be called once when the Channel is registered. After - * the method returns this instance will be removed from the - * ChannelPipeline of the Channel. - * - * @param ch Channel which was registered. - * @throws Exception is thrown if an error occurs. In that case the - * Channel will be closed. - */ - @Override - protected void initChannel(SocketChannel ch) throws Exception { - ChannelPipeline p = ch.pipeline(); - - p.addLast(new ProtobufVarint32FrameDecoder()); - p.addLast(new ProtobufDecoder(ContainerProtos - .ContainerCommandResponseProto.getDefaultInstance())); - - p.addLast(new ProtobufVarint32LengthFieldPrepender()); - p.addLast(new ProtobufEncoder()); - - p.addLast(new XceiverClientHandler(this.pipeline, this.semaphore)); - - } -} diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientManager.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientManager.java index 89197971eed..97624061c31 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientManager.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientManager.java @@ -60,7 +60,6 @@ private final Configuration conf; private final Cache clientCache; private final boolean useRatis; - private final boolean useGrpc; private static XceiverClientMetrics metrics; /** @@ -78,8 +77,6 @@ public XceiverClientManager(Configuration conf) { this.useRatis = conf.getBoolean( ScmConfigKeys.DFS_CONTAINER_RATIS_ENABLED_KEY, ScmConfigKeys.DFS_CONTAINER_RATIS_ENABLED_DEFAULT); - this.useGrpc = conf.getBoolean(ScmConfigKeys.DFS_CONTAINER_GRPC_ENABLED_KEY, - ScmConfigKeys.DFS_CONTAINER_GRPC_ENABLED_DEFAULT); this.conf = conf; this.clientCache = CacheBuilder.newBuilder() .expireAfterAccess(staleThresholdMs, TimeUnit.MILLISECONDS) @@ -153,12 +150,11 @@ public XceiverClientSpi call() throws Exception { client = XceiverClientRatis.newXceiverClientRatis(pipeline, conf); break; case STAND_ALONE: - client = useGrpc ? new XceiverClientGrpc(pipeline, conf) : - new XceiverClient(pipeline, conf); + client = new XceiverClientGrpc(pipeline, conf); break; case CHAINED: default: - throw new IOException ("not implemented" + pipeline.getType()); + throw new IOException("not implemented" + pipeline.getType()); } client.connect(); return client; diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java index 0effa8fff33..946abfbba7e 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java @@ -19,6 +19,9 @@ package org.apache.hadoop.hdds.scm; import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.HddsUtils; +import org.apache.hadoop.io.MultipleIOException; +import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.shaded.com.google.protobuf .InvalidProtocolBufferException; import org.apache.hadoop.conf.Configuration; @@ -38,10 +41,13 @@ import org.apache.ratis.rpc.RpcType; import org.apache.ratis.rpc.SupportedRpcType; import org.apache.ratis.shaded.com.google.protobuf.ByteString; +import org.apache.ratis.util.CheckedBiConsumer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.concurrent.CompletableFuture; @@ -63,35 +69,48 @@ public static XceiverClientRatis newXceiverClientRatis( ScmConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_DEFAULT); final int maxOutstandingRequests = HddsClientUtils.getMaxOutstandingRequests(ozoneConf); + final RetryPolicy retryPolicy = RatisHelper.createRetryPolicy(ozoneConf); return new XceiverClientRatis(pipeline, - SupportedRpcType.valueOfIgnoreCase(rpcType), maxOutstandingRequests); + SupportedRpcType.valueOfIgnoreCase(rpcType), maxOutstandingRequests, + retryPolicy); } private final Pipeline pipeline; private final RpcType rpcType; private final AtomicReference client = new AtomicReference<>(); private final int maxOutstandingRequests; + private final RetryPolicy retryPolicy; /** * Constructs a client. */ private XceiverClientRatis(Pipeline pipeline, RpcType rpcType, - int maxOutStandingChunks) { + int maxOutStandingChunks, RetryPolicy retryPolicy) { super(); this.pipeline = pipeline; this.rpcType = rpcType; this.maxOutstandingRequests = maxOutStandingChunks; + this.retryPolicy = retryPolicy; } /** * {@inheritDoc} */ - public void createPipeline(String clusterId, List datanodes) - throws IOException { - RaftGroup group = RatisHelper.newRaftGroup(datanodes); - LOG.debug("initializing pipeline:{} with nodes:{}", clusterId, - group.getPeers()); - reinitialize(datanodes, group); + public void createPipeline() throws IOException { + final RaftGroup group = RatisHelper.newRaftGroup(pipeline); + LOG.debug("creating pipeline:{} with {}", pipeline.getId(), group); + callRatisRpc(pipeline.getMachines(), + (raftClient, peer) -> raftClient.groupAdd(group, peer.getId())); + } + + /** + * {@inheritDoc} + */ + public void destroyPipeline() throws IOException { + final RaftGroup group = RatisHelper.newRaftGroup(pipeline); + LOG.debug("destroying pipeline:{} with {}", pipeline.getId(), group); + callRatisRpc(pipeline.getMachines(), (raftClient, peer) -> raftClient + .groupRemove(group.getGroupId(), peer.getId())); } /** @@ -104,47 +123,28 @@ public void createPipeline(String clusterId, List datanodes) return HddsProtos.ReplicationType.RATIS; } - private void reinitialize(List datanodes, RaftGroup group) + private void callRatisRpc(List datanodes, + CheckedBiConsumer rpc) throws IOException { if (datanodes.isEmpty()) { return; } - IOException exception = null; - for (DatanodeDetails d : datanodes) { - try { - reinitialize(d, group); + final List exceptions = + Collections.synchronizedList(new ArrayList<>()); + datanodes.parallelStream().forEach(d -> { + final RaftPeer p = RatisHelper.toRaftPeer(d); + try (RaftClient client = RatisHelper + .newRaftClient(rpcType, p, retryPolicy)) { + rpc.accept(client, p); } catch (IOException ioe) { - if (exception == null) { - exception = new IOException( - "Failed to reinitialize some of the RaftPeer(s)", ioe); - } else { - exception.addSuppressed(ioe); - } + exceptions.add( + new IOException("Failed invoke Ratis rpc " + rpc + " for " + d, + ioe)); } - } - if (exception != null) { - throw exception; - } - } - - /** - * Adds a new peers to the Ratis Ring. - * - * @param datanode - new datanode - * @param group - Raft group - * @throws IOException - on Failure. - */ - private void reinitialize(DatanodeDetails datanode, RaftGroup group) - throws IOException { - final RaftPeer p = RatisHelper.toRaftPeer(datanode); - try (RaftClient client = RatisHelper.newRaftClient(rpcType, p)) { - client.reinitialize(group, p.getId()); - } catch (IOException ioe) { - LOG.error("Failed to reinitialize RaftPeer:{} datanode: {} ", - p, datanode, ioe); - throw new IOException("Failed to reinitialize RaftPeer " + p - + "(datanode=" + datanode + ")", ioe); + }); + if (!exceptions.isEmpty()) { + throw MultipleIOException.createIOException(exceptions); } } @@ -156,13 +156,13 @@ public Pipeline getPipeline() { @Override public void connect() throws Exception { LOG.debug("Connecting to pipeline:{} leader:{}", - getPipeline().getPipelineName(), + getPipeline().getId(), RatisHelper.toRaftPeerId(pipeline.getLeader())); // TODO : XceiverClient ratis should pass the config value of // maxOutstandingRequests so as to set the upper bound on max no of async // requests to be handled by raft client if (!client.compareAndSet(null, - RatisHelper.newRaftClient(rpcType, getPipeline()))) { + RatisHelper.newRaftClient(rpcType, getPipeline(), retryPolicy))) { throw new IllegalStateException("Client is already connected."); } } @@ -183,34 +183,9 @@ private RaftClient getClient() { return Objects.requireNonNull(client.get(), "client is null"); } - private boolean isReadOnly(ContainerCommandRequestProto proto) { - switch (proto.getCmdType()) { - case ReadContainer: - case ReadChunk: - case ListKey: - case GetKey: - case GetSmallFile: - case ListContainer: - case ListChunk: - return true; - case CloseContainer: - case WriteChunk: - case UpdateContainer: - case CompactChunk: - case CreateContainer: - case DeleteChunk: - case DeleteContainer: - case DeleteKey: - case PutKey: - case PutSmallFile: - default: - return false; - } - } - private RaftClientReply sendRequest(ContainerCommandRequestProto request) throws IOException { - boolean isReadOnlyRequest = isReadOnly(request); + boolean isReadOnlyRequest = HddsUtils.isReadOnly(request); ByteString byteString = request.toByteString(); LOG.debug("sendCommand {} {}", isReadOnlyRequest, request); final RaftClientReply reply = isReadOnlyRequest ? @@ -222,7 +197,7 @@ private RaftClientReply sendRequest(ContainerCommandRequestProto request) private CompletableFuture sendRequestAsync( ContainerCommandRequestProto request) throws IOException { - boolean isReadOnlyRequest = isReadOnly(request); + boolean isReadOnlyRequest = HddsUtils.isReadOnly(request); ByteString byteString = request.toByteString(); LOG.debug("sendCommandAsync {} {}", isReadOnlyRequest, request); return isReadOnlyRequest ? getClient().sendReadOnlyAsync(() -> byteString) : @@ -233,6 +208,10 @@ private RaftClientReply sendRequest(ContainerCommandRequestProto request) public ContainerCommandResponseProto sendCommand( ContainerCommandRequestProto request) throws IOException { final RaftClientReply reply = sendRequest(request); + if (reply == null) { + throw new IOException( + String.format("Could not execute the request %s", request)); + } Preconditions.checkState(reply.isSuccess()); return ContainerCommandResponseProto.parseFrom( reply.getMessage().getContent()); diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/ContainerOperationClient.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/ContainerOperationClient.java index e7bdaf0f682..fed589c81de 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/ContainerOperationClient.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/ContainerOperationClient.java @@ -158,7 +158,7 @@ public void createContainer(XceiverClientSpi client, private void createPipeline(XceiverClientSpi client, Pipeline pipeline) throws IOException { - Preconditions.checkNotNull(pipeline.getPipelineName(), "Pipeline " + + Preconditions.checkNotNull(pipeline.getId(), "Pipeline " + "name cannot be null when client create flag is set."); // Pipeline creation is a three step process. @@ -180,8 +180,7 @@ private void createPipeline(XceiverClientSpi client, Pipeline pipeline) // ObjectStageChangeRequestProto.Op.create, // ObjectStageChangeRequestProto.Stage.begin); - client.createPipeline(pipeline.getPipelineName(), - pipeline.getMachines()); + client.createPipeline(); //storageContainerLocationClient.notifyObjectStageChange( // ObjectStageChangeRequestProto.Type.pipeline, @@ -258,6 +257,15 @@ public Pipeline createReplicationPipeline(HddsProtos.ReplicationType type, factor, nodePool); } + @Override + public void close() { + try { + xceiverClientManager.close(); + } catch (Exception ex) { + LOG.error("Can't close " + this.getClass().getSimpleName(), ex); + } + } + /** * Deletes an existing container. * diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/HddsClientUtils.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/HddsClientUtils.java index a6813ebf605..9c590385a4e 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/HddsClientUtils.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/HddsClientUtils.java @@ -74,7 +74,7 @@ private HddsClientUtils() { */ public static String formatDateTime(long millis) { ZonedDateTime dateTime = ZonedDateTime.ofInstant( - Instant.ofEpochSecond(millis), DATE_FORMAT.get().getZone()); + Instant.ofEpochMilli(millis), DATE_FORMAT.get().getZone()); return DATE_FORMAT.get().format(dateTime); } @@ -85,7 +85,7 @@ public static String formatDateTime(long millis) { public static long formatDateTime(String date) throws ParseException { Preconditions.checkNotNull(date, "Date string should not be null."); return ZonedDateTime.parse(date, DATE_FORMAT.get()) - .toInstant().getEpochSecond(); + .toInstant().toEpochMilli(); } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java index 020c6847bbd..a483197b0ea 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java @@ -65,7 +65,8 @@ * @param chunks list of chunks to read * @param traceID container protocol call traceID */ - public ChunkInputStream(BlockID blockID, XceiverClientManager xceiverClientManager, + public ChunkInputStream( + BlockID blockID, XceiverClientManager xceiverClientManager, XceiverClientSpi xceiverClient, List chunks, String traceID) { this.blockID = blockID; this.traceID = traceID; @@ -120,12 +121,17 @@ public synchronized int read(byte[] b, int off, int len) throws IOException { return 0; } checkOpen(); - int available = prepareRead(len); - if (available == EOF) { - return EOF; + int total = 0; + while (len > 0) { + int available = prepareRead(len); + if (available == EOF) { + return total != 0 ? total : EOF; + } + buffers.get(bufferIndex).get(b, off + total, available); + len -= available; + total += available; } - buffers.get(bufferIndex).get(b, off, available); - return available; + return total; } @Override @@ -195,13 +201,20 @@ private synchronized void readChunkFromContainer() throws IOException { // next chunk chunkIndex += 1; final ReadChunkResponseProto readChunkResponse; + final ChunkInfo chunkInfo = chunks.get(chunkIndex); try { - readChunkResponse = ContainerProtocolCalls.readChunk(xceiverClient, - chunks.get(chunkIndex), blockID, traceID); + readChunkResponse = ContainerProtocolCalls + .readChunk(xceiverClient, chunkInfo, blockID, traceID); } catch (IOException e) { throw new IOException("Unexpected OzoneException: " + e.toString(), e); } ByteString byteString = readChunkResponse.getData(); + if (byteString.size() != chunkInfo.getLen()) { + // Bytes read from chunk should be equal to chunk size. + throw new IOException(String + .format("Inconsistent read for chunk=%s len=%d bytesRead=%d", + chunkInfo.getChunkName(), chunkInfo.getLen(), byteString.size())); + } buffers = byteString.asReadOnlyByteBufferList(); bufferIndex = 0; } @@ -211,8 +224,8 @@ public synchronized void seek(long pos) throws IOException { if (pos < 0 || (chunks.size() == 0 && pos > 0) || pos >= chunkOffset[chunks.size() - 1] + chunks.get(chunks.size() - 1) .getLen()) { - throw new EOFException( - "EOF encountered pos: " + pos + " container key: " + blockID.getLocalID()); + throw new EOFException("EOF encountered pos: " + pos + " container key: " + + blockID.getLocalID()); } if (chunkIndex == -1) { chunkIndex = Arrays.binarySearch(chunkOffset, pos); @@ -259,4 +272,8 @@ public synchronized long getPos() throws IOException { public boolean seekToNewSource(long targetPos) throws IOException { return false; } + + public BlockID getBlockID() { + return blockID; + } } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkOutputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkOutputStream.java index 779e63696a6..8d311d0e1b7 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkOutputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkOutputStream.java @@ -76,8 +76,8 @@ * @param chunkSize chunk size */ public ChunkOutputStream(BlockID blockID, String key, - XceiverClientManager xceiverClientManager, XceiverClientSpi xceiverClient, - String traceID, int chunkSize) { + XceiverClientManager xceiverClientManager, + XceiverClientSpi xceiverClient, String traceID, int chunkSize) { this.blockID = blockID; this.key = key; this.traceID = traceID; @@ -94,8 +94,12 @@ public ChunkOutputStream(BlockID blockID, String key, this.chunkIndex = 0; } + public ByteBuffer getBuffer() { + return buffer; + } + @Override - public synchronized void write(int b) throws IOException { + public void write(int b) throws IOException { checkOpen(); int rollbackPosition = buffer.position(); int rollbackLimit = buffer.limit(); @@ -106,7 +110,8 @@ public synchronized void write(int b) throws IOException { } @Override - public void write(byte[] b, int off, int len) throws IOException { + public void write(byte[] b, int off, int len) + throws IOException { if (b == null) { throw new NullPointerException(); } @@ -132,7 +137,7 @@ public void write(byte[] b, int off, int len) throws IOException { } @Override - public synchronized void flush() throws IOException { + public void flush() throws IOException { checkOpen(); if (buffer.position() > 0) { int rollbackPosition = buffer.position(); @@ -142,25 +147,28 @@ public synchronized void flush() throws IOException { } @Override - public synchronized void close() throws IOException { - if (xceiverClientManager != null && xceiverClient != null && - buffer != null) { + public void close() throws IOException { + if (xceiverClientManager != null && xceiverClient != null + && buffer != null) { + if (buffer.position() > 0) { + writeChunkToContainer(); + } try { - if (buffer.position() > 0) { - writeChunkToContainer(); - } putKey(xceiverClient, containerKeyData.build(), traceID); } catch (IOException e) { throw new IOException( "Unexpected Storage Container Exception: " + e.toString(), e); } finally { - xceiverClientManager.releaseClient(xceiverClient); - xceiverClientManager = null; - xceiverClient = null; - buffer = null; + cleanup(); } } + } + public void cleanup() { + xceiverClientManager.releaseClient(xceiverClient); + xceiverClientManager = null; + xceiverClient = null; + buffer = null; } /** @@ -168,7 +176,7 @@ public synchronized void close() throws IOException { * * @throws IOException if stream is closed */ - private synchronized void checkOpen() throws IOException { + private void checkOpen() throws IOException { if (xceiverClient == null) { throw new IOException("ChunkOutputStream has been closed."); } @@ -183,7 +191,7 @@ private synchronized void checkOpen() throws IOException { * @param rollbackLimit limit to restore in buffer if write fails * @throws IOException if there is an I/O error while performing the call */ - private synchronized void flushBufferToChunk(int rollbackPosition, + private void flushBufferToChunk(int rollbackPosition, int rollbackLimit) throws IOException { boolean success = false; try { @@ -205,7 +213,7 @@ private synchronized void flushBufferToChunk(int rollbackPosition, * * @throws IOException if there is an I/O error while performing the call */ - private synchronized void writeChunkToContainer() throws IOException { + private void writeChunkToContainer() throws IOException { buffer.flip(); ByteString data = ByteString.copyFrom(buffer); ChunkInfo chunk = ChunkInfo diff --git a/hadoop-hdds/common/dev-support/findbugsExcludeFile.xml b/hadoop-hdds/common/dev-support/findbugsExcludeFile.xml index daf6fec79a9..c7db6794cc0 100644 --- a/hadoop-hdds/common/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdds/common/dev-support/findbugsExcludeFile.xml @@ -21,4 +21,8 @@ + + + + diff --git a/hadoop-hdds/common/pom.xml b/hadoop-hdds/common/pom.xml index 406852280b5..d08a5a9e6af 100644 --- a/hadoop-hdds/common/pom.xml +++ b/hadoop-hdds/common/pom.xml @@ -20,19 +20,21 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-hdds-common - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Distributed Data Store Common Apache Hadoop HDDS Common jar + 0.3.0-SNAPSHOT hdds true 2.11.0 3.4.2 + ${hdds.version} @@ -73,7 +75,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.rocksdb rocksdbjni - 5.8.0 + 5.14.2 @@ -98,10 +100,31 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> disruptor ${disruptor.version} + + org.apache.commons + commons-pool2 + 2.6.0 + + + + ${basedir}/src/main/resources + + hdds-version-info.properties + + false + + + ${basedir}/src/main/resources + + hdds-version-info.properties + + true + + kr.motd.maven @@ -169,6 +192,22 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-maven-plugins + + version-info + generate-resources + + version-info + + + + ${basedir}/../ + + */src/main/java/**/*.java + */src/main/proto/*.proto + + + + compile-protoc diff --git a/hadoop-hdds/common/src/main/conf/log4j.properties b/hadoop-hdds/common/src/main/conf/log4j.properties new file mode 100644 index 00000000000..663e2548ce5 --- /dev/null +++ b/hadoop-hdds/common/src/main/conf/log4j.properties @@ -0,0 +1,157 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Define some default values that can be overridden by system properties +hadoop.root.logger=INFO,console +hadoop.log.dir=. +hadoop.log.file=hadoop.log + +# Define the root logger to the system property "hadoop.root.logger". +log4j.rootLogger=${hadoop.root.logger}, EventCounter + +# Logging Threshold +log4j.threshold=ALL + +# Null Appender +log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender + +# +# Rolling File Appender - cap space usage at 5gb. +# +hadoop.log.maxfilesize=256MB +hadoop.log.maxbackupindex=20 +log4j.appender.RFA=org.apache.log4j.RollingFileAppender +log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} + +log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize} +log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex} + +log4j.appender.RFA.layout=org.apache.log4j.PatternLayout + +# Pattern format: Date LogLevel LoggerName LogMessage +log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +# Debugging Pattern format +#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + +# +# Daily Rolling File Appender +# + +log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} + +# Rollover at midnight +log4j.appender.DRFA.DatePattern=.yyyy-MM-dd + +log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout + +# Pattern format: Date LogLevel LoggerName LogMessage +log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +# Debugging Pattern format +#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + +# +# console +# Add "console" to rootlogger above if you want to use this +# + +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n + +# +# TaskLog Appender +# +log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender + +log4j.appender.TLA.layout=org.apache.log4j.PatternLayout +log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n + +# +# HDFS block state change log from block manager +# +# Uncomment the following to log normal block state change +# messages from BlockManager in NameNode. +#log4j.logger.BlockStateChange=DEBUG + +# +#Security appender +# +hadoop.security.logger=INFO,NullAppender +hadoop.security.log.maxfilesize=256MB +hadoop.security.log.maxbackupindex=20 +log4j.category.SecurityLogger=${hadoop.security.logger} +hadoop.security.log.file=SecurityAuth-${user.name}.audit +log4j.appender.RFAS=org.apache.log4j.RollingFileAppender +log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} +log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout +log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize} +log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex} + +# +# Daily Rolling Security appender +# +log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} +log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout +log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n +log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd + + +# Custom Logging levels +# AWS SDK & S3A FileSystem +#log4j.logger.com.amazonaws=ERROR +log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR +#log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN + +# +# Event Counter Appender +# Sends counts of logging messages at different severity levels to Hadoop Metrics. +# +log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter + + +log4j.logger.org.apache.hadoop.ozone=DEBUG,OZONE,FILE + +# Do not log into datanode logs. Remove this line to have single log. +log4j.additivity.org.apache.hadoop.ozone=false + +# For development purposes, log both to console and log file. +log4j.appender.OZONE=org.apache.log4j.ConsoleAppender +log4j.appender.OZONE.Threshold=info +log4j.appender.OZONE.layout=org.apache.log4j.PatternLayout +log4j.appender.OZONE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p \ + %X{component} %X{function} %X{resource} %X{user} %X{request} - %m%n + +# Real ozone logger that writes to ozone.log +log4j.appender.FILE=org.apache.log4j.DailyRollingFileAppender +log4j.appender.FILE.File=${hadoop.log.dir}/ozone.log +log4j.appender.FILE.Threshold=debug +log4j.appender.FILE.layout=org.apache.log4j.PatternLayout +log4j.appender.FILE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p \ +(%F:%L) %X{function} %X{resource} %X{user} %X{request} - \ +%m%n + +# Log levels of third-party libraries +log4j.logger.org.apache.commons.beanutils=WARN + +log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR +log4j.logger.org.apache.ratis.conf.ConfUtils=WARN +log4j.logger.org.apache.hadoop.security.ShellBasedUnixGroupsMapping=ERROR diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index dec2c1c44a3..492be823594 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -17,7 +17,76 @@ */ package org.apache.hadoop.hdds; +import org.apache.hadoop.utils.db.DBProfile; + +/** + * This class contains constants for configuration keys and default values + * used in hdds. + */ public final class HddsConfigKeys { + + /** + * Do not instantiate. + */ private HddsConfigKeys() { } + + public static final String HDDS_HEARTBEAT_INTERVAL = + "hdds.heartbeat.interval"; + public static final String HDDS_HEARTBEAT_INTERVAL_DEFAULT = + "30s"; + + public static final String HDDS_NODE_REPORT_INTERVAL = + "hdds.node.report.interval"; + public static final String HDDS_NODE_REPORT_INTERVAL_DEFAULT = + "60s"; + + public static final String HDDS_CONTAINER_REPORT_INTERVAL = + "hdds.container.report.interval"; + public static final String HDDS_CONTAINER_REPORT_INTERVAL_DEFAULT = + "60s"; + + public static final String HDDS_COMMAND_STATUS_REPORT_INTERVAL = + "hdds.command.status.report.interval"; + public static final String HDDS_COMMAND_STATUS_REPORT_INTERVAL_DEFAULT = + "60s"; + + public static final String HDDS_CONTAINER_ACTION_MAX_LIMIT = + "hdds.container.action.max.limit"; + public static final int HDDS_CONTAINER_ACTION_MAX_LIMIT_DEFAULT = + 20; + + public static final String HDDS_PIPELINE_ACTION_MAX_LIMIT = + "hdds.pipeline.action.max.limit"; + public static final int HDDS_PIPELINE_ACTION_MAX_LIMIT_DEFAULT = + 20; + + // Configuration to allow volume choosing policy. + public static final String HDDS_DATANODE_VOLUME_CHOOSING_POLICY = + "hdds.datanode.volume.choosing.policy"; + + // DB Profiles used by ROCKDB instances. + public static final String HDDS_DB_PROFILE = "hdds.db.profile"; + public static final DBProfile HDDS_DEFAULT_DB_PROFILE = DBProfile.SSD; + + // Once a container usage crosses this threshold, it is eligible for + // closing. + public static final String HDDS_CONTAINER_CLOSE_THRESHOLD = + "hdds.container.close.threshold"; + public static final float HDDS_CONTAINER_CLOSE_THRESHOLD_DEFAULT = 0.9f; + + public static final String HDDS_SCM_CHILLMODE_ENABLED = + "hdds.scm.chillmode.enabled"; + public static final boolean HDDS_SCM_CHILLMODE_ENABLED_DEFAULT = true; + + // % of containers which should have at least one reported replica + // before SCM comes out of chill mode. + public static final String HDDS_SCM_CHILLMODE_THRESHOLD_PCT = + "hdds.scm.chillmode.threshold.pct"; + public static final double HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT = 0.99; + + public static final String HDDS_LOCK_MAX_CONCURRENCY = + "hdds.lock.max.concurrency"; + public static final int HDDS_LOCK_MAX_CONCURRENCY_DEFAULT = 100; + } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsIdFactory.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsIdFactory.java new file mode 100644 index 00000000000..b244b8cf75d --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsIdFactory.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds; + +import java.util.UUID; +import java.util.concurrent.atomic.AtomicLong; + +/** + * HDDS Id generator. + */ +public final class HddsIdFactory { + private HddsIdFactory() { + } + + private static final AtomicLong LONG_COUNTER = new AtomicLong( + System.currentTimeMillis()); + + /** + * Returns an incrementing long. This class doesn't + * persist initial value for long Id's, so incremental id's after restart + * may collide with previously generated Id's. + * + * @return long + */ + public static long getLongId() { + return LONG_COUNTER.incrementAndGet(); + } + + /** + * Returns a uuid. + * + * @return UUID. + */ + public static UUID getUUId() { + return UUID.randomUUID(); + } + +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java index 48c6dce4240..33bf90c04d3 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java @@ -24,6 +24,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.net.DNS; import org.apache.hadoop.net.NetUtils; @@ -315,4 +316,36 @@ public static String getHostName(Configuration conf) return name; } + /** + * Checks if the container command is read only or not. + * @param proto ContainerCommand Request proto + * @return True if its readOnly , false otherwise. + */ + public static boolean isReadOnly( + ContainerProtos.ContainerCommandRequestProto proto) { + switch (proto.getCmdType()) { + case ReadContainer: + case ReadChunk: + case ListKey: + case GetKey: + case GetSmallFile: + case ListContainer: + case ListChunk: + case GetCommittedBlockLength: + return true; + case CloseContainer: + case WriteChunk: + case UpdateContainer: + case CompactChunk: + case CreateContainer: + case DeleteChunk: + case DeleteContainer: + case DeleteKey: + case PutKey: + case PutSmallFile: + default: + return false; + } + } + } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java new file mode 100644 index 00000000000..e56810cdca4 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.cli; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.Callable; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; + +import com.google.common.annotations.VisibleForTesting; +import picocli.CommandLine; +import picocli.CommandLine.ExecutionException; +import picocli.CommandLine.Option; +import picocli.CommandLine.RunLast; + +/** + * This is a generic parent class for all the ozone related cli tools. + */ +public class GenericCli implements Callable, GenericParentCommand { + + @Option(names = {"--verbose"}, + description = "More verbose output. Show the stack trace of the errors.") + private boolean verbose; + + @Option(names = {"-D", "--set"}) + private Map configurationOverrides = new HashMap<>(); + + private final CommandLine cmd; + + public GenericCli() { + cmd = new CommandLine(this); + } + + public void run(String[] argv) { + try { + execute(argv); + } catch (ExecutionException ex) { + printError(ex.getCause()); + System.exit(-1); + } + } + + @VisibleForTesting + public void execute(String[] argv) { + cmd.parseWithHandler(new RunLast(), argv); + } + + private void printError(Throwable error) { + if (verbose) { + error.printStackTrace(System.err); + } else { + System.err.println(error.getMessage().split("\n")[0]); + } + if(error instanceof MissingSubcommandException){ + System.err.println(((MissingSubcommandException) error).getUsage()); + } + } + + @Override + public Void call() throws Exception { + throw new MissingSubcommandException(cmd.getUsageMessage()); + } + + public OzoneConfiguration createOzoneConfiguration() { + OzoneConfiguration ozoneConf = new OzoneConfiguration(); + if (configurationOverrides != null) { + for (Entry entry : configurationOverrides.entrySet()) { + ozoneConf + .set(entry.getKey(), configurationOverrides.get(entry.getValue())); + } + } + return ozoneConf; + } + + @VisibleForTesting + public picocli.CommandLine getCmd() { + return cmd; + } + + @Override + public boolean isVerbose() { + return verbose; + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericParentCommand.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericParentCommand.java new file mode 100644 index 00000000000..a1d21715e77 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericParentCommand.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.cli; + +/** + * Interface to access the higher level parameters. + */ +public interface GenericParentCommand { + + boolean isVerbose(); +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/HddsVersionProvider.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/HddsVersionProvider.java new file mode 100644 index 00000000000..7110839546f --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/HddsVersionProvider.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.cli; + +import org.apache.hadoop.utils.HddsVersionInfo; + +import picocli.CommandLine.IVersionProvider; + +/** + * Version provider for the CLI interface. + */ +public class HddsVersionProvider implements IVersionProvider { + @Override + public String[] getVersion() throws Exception { + String[] result = new String[] { + HddsVersionInfo.getBuildVersion() + }; + return result; + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/MissingSubcommandException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/MissingSubcommandException.java new file mode 100644 index 00000000000..9f0c4943246 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/MissingSubcommandException.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.cli; + +/** + * Exception to throw if subcommand is not selected but required. + */ +public class MissingSubcommandException extends RuntimeException { + + private String usage; + + public MissingSubcommandException(String usage) { + super("Incomplete command"); + this.usage = usage; + } + + public String getUsage() { + return usage; + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/package-info.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/package-info.java new file mode 100644 index 00000000000..8dcc1d1a3c9 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Generic helper class to make instantiate picocli based cli tools. + */ +package org.apache.hadoop.hdds.cli; \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/BlockID.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/BlockID.java index 62b12e3e041..81497406d6b 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/BlockID.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/BlockID.java @@ -20,8 +20,10 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import java.util.Objects; + /** - * BlockID of ozone (containerID + localID) + * BlockID of ozone (containerID localID). */ public class BlockID { private long containerID; @@ -63,9 +65,26 @@ public static BlockID getFromProtobuf(HddsProtos.BlockID blockID) { setContainerID(containerID).setLocalID(localID).build(); } - public static BlockID getFromProtobuf(ContainerProtos.DatanodeBlockID blockID) { + public static BlockID getFromProtobuf( + ContainerProtos.DatanodeBlockID blockID) { return new BlockID(blockID.getContainerID(), blockID.getLocalID()); } + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + BlockID blockID = (BlockID) o; + return containerID == blockID.containerID && localID == blockID.localID; + } + + @Override + public int hashCode() { + return Objects.hash(containerID, localID); + } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 4f1b1c8e2c8..63f59168288 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -19,6 +19,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.ratis.shaded.proto.RaftProtos.ReplicationLevel; import org.apache.ratis.util.TimeDuration; import java.util.concurrent.TimeUnit; @@ -49,10 +50,6 @@ = "dfs.container.ratis.enabled"; public static final boolean DFS_CONTAINER_RATIS_ENABLED_DEFAULT = false; - public static final String DFS_CONTAINER_GRPC_ENABLED_KEY - = "dfs.container.grpc.enabled"; - public static final boolean DFS_CONTAINER_GRPC_ENABLED_DEFAULT - = false; public static final String DFS_CONTAINER_RATIS_RPC_TYPE_KEY = "dfs.container.ratis.rpc.type"; public static final String DFS_CONTAINER_RATIS_RPC_TYPE_DEFAULT @@ -61,6 +58,10 @@ = "dfs.container.ratis.num.write.chunk.threads"; public static final int DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_DEFAULT = 60; + public static final String DFS_CONTAINER_RATIS_REPLICATION_LEVEL_KEY + = "dfs.container.ratis.replication.level"; + public static final ReplicationLevel + DFS_CONTAINER_RATIS_REPLICATION_LEVEL_DEFAULT = ReplicationLevel.MAJORITY; public static final String DFS_CONTAINER_RATIS_SEGMENT_SIZE_KEY = "dfs.container.ratis.segment.size"; public static final int DFS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT = @@ -74,11 +75,36 @@ public static final TimeDuration DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_DEFAULT = TimeDuration.valueOf(3000, TimeUnit.MILLISECONDS); + public static final String DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY = + "dfs.ratis.client.request.max.retries"; + public static final int DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_DEFAULT = 180; + public static final String DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY = + "dfs.ratis.client.request.retry.interval"; + public static final TimeDuration + DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT = + TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); + public static final String DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY = + "dfs.ratis.server.retry-cache.timeout.duration"; + public static final TimeDuration + DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT = + TimeDuration.valueOf(600000, TimeUnit.MILLISECONDS); public static final String DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY = "dfs.ratis.server.request.timeout.duration"; public static final TimeDuration DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_DEFAULT = TimeDuration.valueOf(3000, TimeUnit.MILLISECONDS); + public static final String + DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY = + "dfs.ratis.leader.election.minimum.timeout.duration"; + public static final TimeDuration + DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT = + TimeDuration.valueOf(1, TimeUnit.SECONDS); + + public static final String DFS_RATIS_SERVER_FAILURE_DURATION_KEY = + "dfs.ratis.server.failure.duration"; + public static final TimeDuration + DFS_RATIS_SERVER_FAILURE_DURATION_DEFAULT = + TimeDuration.valueOf(120, TimeUnit.SECONDS); // TODO : this is copied from OzoneConsts, may need to move to a better place public static final String OZONE_SCM_CHUNK_SIZE_KEY = "ozone.scm.chunk.size"; @@ -141,6 +167,7 @@ public static final String HDDS_REST_HTTP_ADDRESS_KEY = "hdds.rest.http-address"; public static final String HDDS_REST_HTTP_ADDRESS_DEFAULT = "0.0.0.0:9880"; + public static final String HDDS_DATANODE_DIR_KEY = "hdds.datanode.dir"; public static final String HDDS_REST_CSRF_ENABLED_KEY = "hdds.rest.rest-csrf.enabled"; public static final boolean HDDS_REST_CSRF_ENABLED_DEFAULT = false; @@ -155,11 +182,6 @@ "ozone.scm.handler.count.key"; public static final int OZONE_SCM_HANDLER_COUNT_DEFAULT = 10; - public static final String OZONE_SCM_HEARTBEAT_INTERVAL = - "ozone.scm.heartbeat.interval"; - public static final String OZONE_SCM_HEARBEAT_INTERVAL_DEFAULT = - "30s"; - public static final String OZONE_SCM_DEADNODE_INTERVAL = "ozone.scm.dead.node.interval"; public static final String OZONE_SCM_DEADNODE_INTERVAL_DEFAULT = @@ -219,9 +241,9 @@ "ozone.scm.db.cache.size.mb"; public static final int OZONE_SCM_DB_CACHE_SIZE_DEFAULT = 128; - public static final String OZONE_SCM_CONTAINER_SIZE_GB = - "ozone.scm.container.size.gb"; - public static final int OZONE_SCM_CONTAINER_SIZE_DEFAULT = 5; + public static final String OZONE_SCM_CONTAINER_SIZE = + "ozone.scm.container.size"; + public static final String OZONE_SCM_CONTAINER_SIZE_DEFAULT = "5GB"; public static final String OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY = "ozone.scm.container.placement.impl"; @@ -230,8 +252,9 @@ "ozone.scm.container.provision_batch_size"; public static final int OZONE_SCM_CONTAINER_PROVISION_BATCH_SIZE_DEFAULT = 20; - public static final String OZONE_SCM_CONTAINER_DELETION_CHOOSING_POLICY = - "ozone.scm.container.deletion-choosing.policy"; + public static final String + OZONE_SCM_KEY_VALUE_CONTAINER_DELETION_CHOOSING_POLICY = + "ozone.scm.keyvalue.container.deletion-choosing.policy"; public static final String OZONE_SCM_CONTAINER_CREATION_LEASE_TIMEOUT = "ozone.scm.container.creation.lease.timeout"; @@ -239,16 +262,22 @@ public static final String OZONE_SCM_CONTAINER_CREATION_LEASE_TIMEOUT_DEFAULT = "60s"; + public static final String OZONE_SCM_PIPELINE_CREATION_LEASE_TIMEOUT = + "ozone.scm.pipeline.creation.lease.timeout"; + + public static final String + OZONE_SCM_PIPELINE_CREATION_LEASE_TIMEOUT_DEFAULT = "60s"; public static final String OZONE_SCM_BLOCK_DELETION_MAX_RETRY = "ozone.scm.block.deletion.max.retry"; public static final int OZONE_SCM_BLOCK_DELETION_MAX_RETRY_DEFAULT = 4096; - // Once a container usage crosses this threshold, it is eligible for - // closing. - public static final String OZONE_SCM_CONTAINER_CLOSE_THRESHOLD = - "ozone.scm.container.close.threshold"; - public static final float OZONE_SCM_CONTAINER_CLOSE_THRESHOLD_DEFAULT = 0.9f; + public static final String HDDS_SCM_WATCHER_TIMEOUT = + "hdds.scm.watcher.timeout"; + + public static final String HDDS_SCM_WATCHER_TIMEOUT_DEFAULT = + "10m"; + /** * Never constructed. */ diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientSpi.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientSpi.java index 56cc74125b1..e8ef5c572c2 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientSpi.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientSpi.java @@ -20,7 +20,6 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos @@ -29,7 +28,6 @@ import java.io.Closeable; import java.io.IOException; -import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicInteger; @@ -113,12 +111,14 @@ public abstract ContainerCommandResponseProto sendCommand( /** * Create a pipeline. - * - * @param pipelineID - Name of the pipeline. - * @param datanodes - Datanodes */ - public abstract void createPipeline(String pipelineID, - List datanodes) throws IOException; + public abstract void createPipeline() throws IOException; + + /** + * Destroy a pipeline. + * @throws IOException + */ + public abstract void destroyPipeline() throws IOException; /** * Returns pipeline Type. diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 7955179a17f..184c5478fdf 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -25,6 +25,7 @@ .ContainerData; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import java.io.Closeable; import java.io.IOException; import java.util.List; @@ -39,7 +40,7 @@ * this interface will likely be removed. */ @InterfaceStability.Unstable -public interface ScmClient { +public interface ScmClient extends Closeable { /** * Creates a Container on SCM and returns the pipeline. * @return ContainerInfo @@ -61,7 +62,8 @@ * @return ContainerWithPipeline * @throws IOException */ - ContainerWithPipeline getContainerWithPipeline(long containerId) throws IOException; + ContainerWithPipeline getContainerWithPipeline(long containerId) + throws IOException; /** * Close a container. @@ -87,7 +89,8 @@ * @param force - true to forcibly delete the container. * @throws IOException */ - void deleteContainer(long containerId, Pipeline pipeline, boolean force) throws IOException; + void deleteContainer(long containerId, Pipeline pipeline, boolean force) + throws IOException; /** * Deletes an existing container. diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerID.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerID.java index 9845c048380..49af297130a 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerID.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerID.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hdds.scm.container; import com.google.common.base.Preconditions; -import org.apache.commons.math3.util.MathUtils; +import org.apache.commons.lang3.builder.CompareToBuilder; +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; /** * Container ID is an integer that is a value between 1..MAX_CONTAINER ID. @@ -48,7 +50,6 @@ public ContainerID(long id) { * @return ContainerID. */ public static ContainerID valueof(long containerID) { - Preconditions.checkState(containerID > 0); return new ContainerID(containerID); } @@ -66,28 +67,37 @@ public boolean equals(Object o) { if (this == o) { return true; } + if (o == null || getClass() != o.getClass()) { return false; } ContainerID that = (ContainerID) o; - return id == that.id; + return new EqualsBuilder() + .append(getId(), that.getId()) + .isEquals(); } @Override public int hashCode() { - return MathUtils.hash(id); + return new HashCodeBuilder(61, 71) + .append(getId()) + .toHashCode(); } @Override public int compareTo(Object o) { Preconditions.checkNotNull(o); - if (o instanceof ContainerID) { - return Long.compare(((ContainerID) o).getId(), this.getId()); + if(getClass() != o.getClass()) { + throw new ClassCastException("ContainerID class expected. found:" + + o.getClass().toString()); } - throw new IllegalArgumentException("Object O, should be an instance " + - "of ContainerID"); + + ContainerID that = (ContainerID) o; + return new CompareToBuilder() + .append(this.getId(), that.getId()) + .build(); } @Override diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/AllocatedBlock.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/AllocatedBlock.java index 9b8946978cb..63781a87dd5 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/AllocatedBlock.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/AllocatedBlock.java @@ -43,8 +43,8 @@ public Builder setPipeline(Pipeline p) { return this; } - public Builder setBlockID(BlockID blockID) { - this.blockID = blockID; + public Builder setBlockID(BlockID blockId) { + this.blockID = blockId; return this; } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneContainerTranslation.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/BlockNotCommittedException.java similarity index 55% rename from hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneContainerTranslation.java rename to hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/BlockNotCommittedException.java index e74fffd7db2..86f5a66cf4c 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneContainerTranslation.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/BlockNotCommittedException.java @@ -15,36 +15,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.hadoop.hdds.scm.container.common.helpers; -package org.apache.hadoop.ozone.client.io; - - -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.KeyData; -import org.apache.hadoop.hdds.client.BlockID; - +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; /** - * This class contains methods that define the translation between the Ozone - * domain model and the storage container domain model. + * Exceptions thrown when a block is yet to be committed on the datanode. */ -final class OzoneContainerTranslation { +public class BlockNotCommittedException extends StorageContainerException { /** - * Creates key data intended for reading a container key. + * Constructs an {@code IOException} with the specified detail message. * - * @param blockID - ID of the block. - * @return KeyData intended for reading the container key - */ - public static KeyData containerKeyDataForRead(BlockID blockID) { - return KeyData - .newBuilder() - .setBlockID(blockID.getDatanodeBlockIDProtobuf()) - .build(); - } - - /** - * There is no need to instantiate this class. + * @param message The detail message (which is saved for later retrieval by + * the {@link #getMessage()} method) */ - private OzoneContainerTranslation() { + public BlockNotCommittedException(String message) { + super(message, ContainerProtos.Result.BLOCK_NOT_COMMITTED); } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ContainerInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ContainerInfo.java index 9593717ced1..5abcd14c521 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ContainerInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ContainerInfo.java @@ -58,7 +58,8 @@ } private HddsProtos.LifeCycleState state; - private String pipelineName; + @JsonIgnore + private PipelineID pipelineID; private ReplicationFactor replicationFactor; private ReplicationType replicationType; // Bytes allocated by SCM for clients. @@ -82,7 +83,7 @@ ContainerInfo( long containerID, HddsProtos.LifeCycleState state, - String pipelineName, + PipelineID pipelineID, long allocatedBytes, long usedBytes, long numberOfKeys, @@ -92,7 +93,7 @@ ReplicationFactor replicationFactor, ReplicationType repType) { this.containerID = containerID; - this.pipelineName = pipelineName; + this.pipelineID = pipelineID; this.allocatedBytes = allocatedBytes; this.usedBytes = usedBytes; this.numberOfKeys = numberOfKeys; @@ -105,6 +106,13 @@ this.replicationType = repType; } + public ContainerInfo(ContainerInfo info) { + this(info.getContainerID(), info.getState(), info.getPipelineID(), + info.getAllocatedBytes(), info.getUsedBytes(), info.getNumberOfKeys(), + info.getStateEnterTime(), info.getOwner(), + info.getDeleteTransactionId(), info.getReplicationFactor(), + info.getReplicationType()); + } /** * Needed for serialization findbugs. */ @@ -113,7 +121,8 @@ public ContainerInfo() { public static ContainerInfo fromProtobuf(HddsProtos.SCMContainerInfo info) { ContainerInfo.Builder builder = new ContainerInfo.Builder(); - return builder.setPipelineName(info.getPipelineName()) + return builder.setPipelineID( + PipelineID.getFromProtobuf(info.getPipelineID())) .setAllocatedBytes(info.getAllocatedBytes()) .setUsedBytes(info.getUsedBytes()) .setNumberOfKeys(info.getNumberOfKeys()) @@ -147,8 +156,8 @@ public ReplicationFactor getReplicationFactor() { return replicationFactor; } - public String getPipelineName() { - return pipelineName; + public PipelineID getPipelineID() { + return pipelineID; } public long getAllocatedBytes() { @@ -211,13 +220,14 @@ public void allocate(long size) { public HddsProtos.SCMContainerInfo getProtobuf() { HddsProtos.SCMContainerInfo.Builder builder = HddsProtos.SCMContainerInfo.newBuilder(); + Preconditions.checkState(containerID > 0); return builder.setAllocatedBytes(getAllocatedBytes()) .setContainerID(getContainerID()) .setUsedBytes(getUsedBytes()) .setNumberOfKeys(getNumberOfKeys()).setState(getState()) .setStateEnterTime(getStateEnterTime()).setContainerID(getContainerID()) .setDeleteTransactionId(getDeleteTransactionId()) - .setPipelineName(getPipelineName()) + .setPipelineID(getPipelineID().getProtobuf()) .setReplicationFactor(getReplicationFactor()) .setReplicationType(getReplicationType()) .setOwner(getOwner()) @@ -235,8 +245,9 @@ public void setOwner(String owner) { @Override public String toString() { return "ContainerInfo{" - + "state=" + state - + ", pipelineName=" + pipelineName + + "id=" + containerID + + ", state=" + state + + ", pipelineID=" + pipelineID + ", stateEnterTime=" + stateEnterTime + ", owner=" + owner + '}'; @@ -389,18 +400,18 @@ public void readExternal(ObjectInput in) private String owner; private long containerID; private long deleteTransactionId; - private String pipelineName; + private PipelineID pipelineID; private ReplicationFactor replicationFactor; private ReplicationType replicationType; public Builder setReplicationType( - ReplicationType replicationType) { - this.replicationType = replicationType; + ReplicationType repType) { + this.replicationType = repType; return this; } - public Builder setPipelineName(String pipelineName) { - this.pipelineName = pipelineName; + public Builder setPipelineID(PipelineID pipelineId) { + this.pipelineID = pipelineId; return this; } @@ -445,15 +456,27 @@ public Builder setOwner(String containerOwner) { return this; } - public Builder setDeleteTransactionId(long deleteTransactionId) { - this.deleteTransactionId = deleteTransactionId; + public Builder setDeleteTransactionId(long deleteTransactionID) { + this.deleteTransactionId = deleteTransactionID; return this; } public ContainerInfo build() { - return new ContainerInfo(containerID, state, pipelineName, allocated, + return new ContainerInfo(containerID, state, pipelineID, allocated, used, keys, stateEnterTime, owner, deleteTransactionId, replicationFactor, replicationType); } } + + /** + * Check if a container is in open state, this will check if the + * container is either open, allocated, creating or creating. + * Any containers in these states is managed as an open container by SCM. + */ + public boolean isContainerOpen() { + return state == HddsProtos.LifeCycleState.ALLOCATED || + state == HddsProtos.LifeCycleState.CREATING || + state == HddsProtos.LifeCycleState.OPEN || + state == HddsProtos.LifeCycleState.CLOSING; + } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ContainerWithPipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ContainerWithPipeline.java index e71d429b988..64f42b30767 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ContainerWithPipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/ContainerWithPipeline.java @@ -26,8 +26,8 @@ /** * Class wraps ozone container info. */ -public class ContainerWithPipeline - implements Comparator, Comparable { +public class ContainerWithPipeline implements Comparator, + Comparable { private final ContainerInfo containerInfo; private final Pipeline pipeline; @@ -45,7 +45,8 @@ public Pipeline getPipeline() { return pipeline; } - public static ContainerWithPipeline fromProtobuf(HddsProtos.ContainerWithPipeline allocatedContainer) { + public static ContainerWithPipeline fromProtobuf( + HddsProtos.ContainerWithPipeline allocatedContainer) { return new ContainerWithPipeline( ContainerInfo.fromProtobuf(allocatedContainer.getContainerInfo()), Pipeline.getFromProtoBuf(allocatedContainer.getPipeline())); diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/Pipeline.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/Pipeline.java index c5794f4c036..ef148e55503 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/Pipeline.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/Pipeline.java @@ -64,9 +64,7 @@ private HddsProtos.LifeCycleState lifeCycleState; private HddsProtos.ReplicationType type; private HddsProtos.ReplicationFactor factor; - private String name; - // TODO: change to long based id - //private long id; + private PipelineID id; /** * Constructs a new pipeline data structure. @@ -75,19 +73,43 @@ * @param lifeCycleState - Pipeline State * @param replicationType - Replication protocol * @param replicationFactor - replication count on datanodes - * @param name - pipelineName + * @param id - pipeline ID */ public Pipeline(String leaderID, HddsProtos.LifeCycleState lifeCycleState, HddsProtos.ReplicationType replicationType, - HddsProtos.ReplicationFactor replicationFactor, String name) { + HddsProtos.ReplicationFactor replicationFactor, PipelineID id) { this.leaderID = leaderID; this.lifeCycleState = lifeCycleState; this.type = replicationType; this.factor = replicationFactor; - this.name = name; + this.id = id; datanodes = new TreeMap<>(); } + @Override + public int hashCode() { + return id.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + Pipeline that = (Pipeline) o; + + return id.equals(that.id) + && factor.equals(that.factor) + && type.equals(that.type) + && lifeCycleState.equals(that.lifeCycleState) + && leaderID.equals(that.leaderID); + + } + /** * Gets pipeline object from protobuf. * @@ -102,7 +124,7 @@ public static Pipeline getFromProtoBuf( pipelineProto.getState(), pipelineProto.getType(), pipelineProto.getFactor(), - pipelineProto.getName()); + PipelineID.getFromProtobuf(pipelineProto.getId())); for (HddsProtos.DatanodeDetailsProto dataID : pipelineProto.getMembersList()) { @@ -171,8 +193,8 @@ public String getLeaderID() { */ public List getDatanodeHosts() { List dataHosts = new ArrayList<>(); - for (DatanodeDetails id :getDatanodes().values()) { - dataHosts.add(id.getHostName()); + for (DatanodeDetails datanode : getDatanodes().values()) { + dataHosts.add(datanode.getHostName()); } return dataHosts; } @@ -191,15 +213,19 @@ public String getLeaderID() { } builder.setLeaderID(leaderID); - if (this.getLifeCycleState() != null) { - builder.setState(this.getLifeCycleState()); + if (lifeCycleState != null) { + builder.setState(lifeCycleState); } - if (this.getType() != null) { - builder.setType(this.getType()); + if (type != null) { + builder.setType(type); + } + + if (factor != null) { + builder.setFactor(factor); } - if (this.getFactor() != null) { - builder.setFactor(this.getFactor()); + if (id != null) { + builder.setId(id.getProtobuf()); } return builder.build(); } @@ -214,12 +240,19 @@ public String getLeaderID() { } /** - * Gets the pipeline Name. + * Update the State of the pipeline. + */ + public void setLifeCycleState(HddsProtos.LifeCycleState nextState) { + lifeCycleState = nextState; + } + + /** + * Gets the pipeline id. * - * @return - Name of the pipeline + * @return - Id of the pipeline */ - public String getPipelineName() { - return name; + public PipelineID getId() { + return id; } /** @@ -235,10 +268,9 @@ public String getPipelineName() { public String toString() { final StringBuilder b = new StringBuilder(getClass().getSimpleName()) .append("["); - getDatanodes().keySet().stream() - .forEach(id -> b. - append(id.endsWith(getLeaderID()) ? "*" + id : id)); - b.append(" name:").append(getPipelineName()); + getDatanodes().keySet().forEach( + node -> b.append(node.endsWith(getLeaderID()) ? "*" + id : id)); + b.append(" id:").append(id); if (getType() != null) { b.append(" type:").append(getType().toString()); } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/PipelineID.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/PipelineID.java new file mode 100644 index 00000000000..473ebc5a999 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/common/helpers/PipelineID.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.common.helpers; + +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.ratis.protocol.RaftGroupId; + +import java.util.UUID; + +/** + * ID for the pipeline, the ID is based on UUID so that it can be used + * in Ratis as RaftGroupId, GroupID is used by the datanodes to initialize + * the ratis group they are part of. + */ +public class PipelineID { + + private UUID id; + private RaftGroupId groupId; + + private PipelineID(UUID id) { + this.id = id; + this.groupId = RaftGroupId.valueOf(id); + } + + public static PipelineID randomId() { + return new PipelineID(UUID.randomUUID()); + } + + public static PipelineID valueOf(RaftGroupId groupId) { + return new PipelineID(groupId.getUuid()); + } + + public RaftGroupId getRaftGroupID() { + return groupId; + } + + public UUID getId() { + return id; + } + + public HddsProtos.PipelineID getProtobuf() { + return HddsProtos.PipelineID.newBuilder().setId(id.toString()).build(); + } + + public static PipelineID getFromProtobuf(HddsProtos.PipelineID protos) { + return new PipelineID(UUID.fromString(protos.getId())); + } + + @Override + public String toString() { + return "pipelineId=" + id; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + PipelineID that = (PipelineID) o; + + return id.equals(that.id); + } + + @Override + public int hashCode() { + return id.hashCode(); + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index 581fbd0f06a..c55062b8fba 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -38,7 +38,8 @@ * set of datanodes that should be used creating this container. * */ - ContainerWithPipeline allocateContainer(HddsProtos.ReplicationType replicationType, + ContainerWithPipeline allocateContainer( + HddsProtos.ReplicationType replicationType, HddsProtos.ReplicationFactor factor, String owner) throws IOException; @@ -61,7 +62,8 @@ ContainerWithPipeline allocateContainer(HddsProtos.ReplicationType replicationTy * @return ContainerWithPipeline - the container info with the pipeline. * @throws IOException */ - ContainerWithPipeline getContainerWithPipeline(long containerID) throws IOException; + ContainerWithPipeline getContainerWithPipeline(long containerID) + throws IOException; /** * Ask SCM a list of containers with a range of container names diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index ac12ea29685..0441469e780 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -97,8 +97,9 @@ public StorageContainerLocationProtocolClientSideTranslatorPB( * @throws IOException */ @Override - public ContainerWithPipeline allocateContainer(HddsProtos.ReplicationType type, - HddsProtos.ReplicationFactor factor, String owner) throws IOException { + public ContainerWithPipeline allocateContainer( + HddsProtos.ReplicationType type, HddsProtos.ReplicationFactor factor, + String owner) throws IOException { ContainerRequestProto request = ContainerRequestProto.newBuilder() .setReplicationFactor(factor) @@ -116,7 +117,8 @@ public ContainerWithPipeline allocateContainer(HddsProtos.ReplicationType type, throw new IOException(response.hasErrorMessage() ? response.getErrorMessage() : "Allocate container failed."); } - return ContainerWithPipeline.fromProtobuf(response.getContainerWithPipeline()); + return ContainerWithPipeline.fromProtobuf( + response.getContainerWithPipeline()); } public ContainerInfo getContainer(long containerID) throws IOException { @@ -138,17 +140,18 @@ public ContainerInfo getContainer(long containerID) throws IOException { /** * {@inheritDoc} */ - public ContainerWithPipeline getContainerWithPipeline(long containerID) throws IOException { + public ContainerWithPipeline getContainerWithPipeline(long containerID) + throws IOException { Preconditions.checkState(containerID >= 0, "Container ID cannot be negative"); - GetContainerWithPipelineRequestProto request = GetContainerWithPipelineRequestProto - .newBuilder() - .setContainerID(containerID) - .build(); + GetContainerWithPipelineRequestProto request = + GetContainerWithPipelineRequestProto.newBuilder() + .setContainerID(containerID).build(); try { GetContainerWithPipelineResponseProto response = rpcProxy.getContainerWithPipeline(NULL_RPC_CONTROLLER, request); - return ContainerWithPipeline.fromProtobuf(response.getContainerWithPipeline()); + return ContainerWithPipeline.fromProtobuf( + response.getContainerWithPipeline()); } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java index d3af0836dea..1d6a89d73aa 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/storage/ContainerProtocolCalls.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hdds.scm.storage; +import org.apache.hadoop.hdds.scm.container.common.helpers + .BlockNotCommittedException; import org.apache.ratis.shaded.com.google.protobuf.ByteString; import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers @@ -28,6 +30,10 @@ .ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .CloseContainerRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .DatanodeBlockID; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .GetKeyRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos @@ -73,29 +79,62 @@ private ContainerProtocolCalls() { * Calls the container protocol to get a container key. * * @param xceiverClient client to perform call - * @param containerKeyData key data to identify container + * @param datanodeBlockID blockID to identify container * @param traceID container protocol call args * @return container protocol get key response * @throws IOException if there is an I/O error while performing the call */ public static GetKeyResponseProto getKey(XceiverClientSpi xceiverClient, - KeyData containerKeyData, String traceID) throws IOException { + DatanodeBlockID datanodeBlockID, String traceID) throws IOException { GetKeyRequestProto.Builder readKeyRequest = GetKeyRequestProto .newBuilder() - .setKeyData(containerKeyData); + .setBlockID(datanodeBlockID); String id = xceiverClient.getPipeline().getLeader().getUuidString(); + ContainerCommandRequestProto request = ContainerCommandRequestProto .newBuilder() .setCmdType(Type.GetKey) + .setContainerID(datanodeBlockID.getContainerID()) .setTraceID(traceID) .setDatanodeUuid(id) .setGetKey(readKeyRequest) .build(); ContainerCommandResponseProto response = xceiverClient.sendCommand(request); validateContainerResponse(response); + return response.getGetKey(); } + /** + * Calls the container protocol to get the length of a committed block. + * + * @param xceiverClient client to perform call + * @param blockID blockId for the Block + * @param traceID container protocol call args + * @return container protocol getLastCommittedBlockLength response + * @throws IOException if there is an I/O error while performing the call + */ + public static ContainerProtos.GetCommittedBlockLengthResponseProto + getCommittedBlockLength( + XceiverClientSpi xceiverClient, BlockID blockID, String traceID) + throws IOException { + ContainerProtos.GetCommittedBlockLengthRequestProto.Builder + getBlockLengthRequestBuilder = + ContainerProtos.GetCommittedBlockLengthRequestProto.newBuilder(). + setBlockID(blockID.getDatanodeBlockIDProtobuf()); + String id = xceiverClient.getPipeline().getLeader().getUuidString(); + ContainerCommandRequestProto request = + ContainerCommandRequestProto.newBuilder() + .setCmdType(Type.GetCommittedBlockLength) + .setContainerID(blockID.getContainerID()) + .setTraceID(traceID) + .setDatanodeUuid(id) + .setGetCommittedBlockLength(getBlockLengthRequestBuilder).build(); + ContainerCommandResponseProto response = xceiverClient.sendCommand(request); + validateContainerResponse(response); + return response.getGetCommittedBlockLength(); + } + /** * Calls the container protocol to put a container key. * @@ -113,6 +152,7 @@ public static void putKey(XceiverClientSpi xceiverClient, ContainerCommandRequestProto request = ContainerCommandRequestProto .newBuilder() .setCmdType(Type.PutKey) + .setContainerID(containerKeyData.getBlockID().getContainerID()) .setTraceID(traceID) .setDatanodeUuid(id) .setPutKey(createKeyRequest) @@ -141,6 +181,7 @@ public static ReadChunkResponseProto readChunk(XceiverClientSpi xceiverClient, ContainerCommandRequestProto request = ContainerCommandRequestProto .newBuilder() .setCmdType(Type.ReadChunk) + .setContainerID(blockID.getContainerID()) .setTraceID(traceID) .setDatanodeUuid(id) .setReadChunk(readChunkRequest) @@ -172,6 +213,7 @@ public static void writeChunk(XceiverClientSpi xceiverClient, ChunkInfo chunk, ContainerCommandRequestProto request = ContainerCommandRequestProto .newBuilder() .setCmdType(Type.WriteChunk) + .setContainerID(blockID.getContainerID()) .setTraceID(traceID) .setDatanodeUuid(id) .setWriteChunk(writeChunkRequest) @@ -220,6 +262,7 @@ public static void writeSmallFile(XceiverClientSpi client, ContainerCommandRequestProto request = ContainerCommandRequestProto.newBuilder() .setCmdType(Type.PutSmallFile) + .setContainerID(blockID.getContainerID()) .setTraceID(traceID) .setDatanodeUuid(id) .setPutSmallFile(putSmallFileRequest) @@ -240,18 +283,15 @@ public static void createContainer(XceiverClientSpi client, long containerID, ContainerProtos.CreateContainerRequestProto.Builder createRequest = ContainerProtos.CreateContainerRequestProto .newBuilder(); - ContainerProtos.ContainerData.Builder containerData = ContainerProtos - .ContainerData.newBuilder(); - containerData.setContainerID(containerID); - containerData.setContainerType(ContainerProtos.ContainerType + createRequest.setContainerType(ContainerProtos.ContainerType .KeyValueContainer); - createRequest.setContainerData(containerData.build()); String id = client.getPipeline().getLeader().getUuidString(); ContainerCommandRequestProto.Builder request = ContainerCommandRequestProto.newBuilder(); request.setCmdType(ContainerProtos.Type.CreateContainer); - request.setCreateContainer(createRequest); + request.setContainerID(containerID); + request.setCreateContainer(createRequest.build()); request.setDatanodeUuid(id); request.setTraceID(traceID); ContainerCommandResponseProto response = client.sendCommand( @@ -271,12 +311,13 @@ public static void deleteContainer(XceiverClientSpi client, long containerID, boolean force, String traceID) throws IOException { ContainerProtos.DeleteContainerRequestProto.Builder deleteRequest = ContainerProtos.DeleteContainerRequestProto.newBuilder(); - deleteRequest.setContainerID(containerID); deleteRequest.setForceDelete(force); String id = client.getPipeline().getLeader().getUuidString(); + ContainerCommandRequestProto.Builder request = ContainerCommandRequestProto.newBuilder(); request.setCmdType(ContainerProtos.Type.DeleteContainer); + request.setContainerID(containerID); request.setDeleteContainer(deleteRequest); request.setTraceID(traceID); request.setDatanodeUuid(id); @@ -295,15 +336,13 @@ public static void deleteContainer(XceiverClientSpi client, long containerID, */ public static void closeContainer(XceiverClientSpi client, long containerID, String traceID) throws IOException { - ContainerProtos.CloseContainerRequestProto.Builder closeRequest = - ContainerProtos.CloseContainerRequestProto.newBuilder(); - closeRequest.setContainerID(containerID); - String id = client.getPipeline().getLeader().getUuidString(); + ContainerCommandRequestProto.Builder request = ContainerCommandRequestProto.newBuilder(); request.setCmdType(Type.CloseContainer); - request.setCloseContainer(closeRequest); + request.setContainerID(containerID); + request.setCloseContainer(CloseContainerRequestProto.getDefaultInstance()); request.setTraceID(traceID); request.setDatanodeUuid(id); ContainerCommandResponseProto response = @@ -321,24 +360,24 @@ public static void closeContainer(XceiverClientSpi client, public static ReadContainerResponseProto readContainer( XceiverClientSpi client, long containerID, String traceID) throws IOException { - ReadContainerRequestProto.Builder readRequest = - ReadContainerRequestProto.newBuilder(); - readRequest.setContainerID(containerID); String id = client.getPipeline().getLeader().getUuidString(); + ContainerCommandRequestProto.Builder request = ContainerCommandRequestProto.newBuilder(); request.setCmdType(Type.ReadContainer); - request.setReadContainer(readRequest); + request.setContainerID(containerID); + request.setReadContainer(ReadContainerRequestProto.getDefaultInstance()); request.setDatanodeUuid(id); request.setTraceID(traceID); ContainerCommandResponseProto response = client.sendCommand(request.build()); validateContainerResponse(response); + return response.getReadContainer(); } /** - * Reads the data given the blockID + * Reads the data given the blockID. * * @param client * @param blockID - ID of the block @@ -348,28 +387,26 @@ public static ReadContainerResponseProto readContainer( */ public static GetSmallFileResponseProto readSmallFile(XceiverClientSpi client, BlockID blockID, String traceID) throws IOException { - KeyData containerKeyData = KeyData - .newBuilder() - .setBlockID(blockID.getDatanodeBlockIDProtobuf()) - .build(); - GetKeyRequestProto.Builder getKey = GetKeyRequestProto .newBuilder() - .setKeyData(containerKeyData); + .setBlockID(blockID.getDatanodeBlockIDProtobuf()); ContainerProtos.GetSmallFileRequestProto getSmallFileRequest = GetSmallFileRequestProto .newBuilder().setKey(getKey) .build(); String id = client.getPipeline().getLeader().getUuidString(); + ContainerCommandRequestProto request = ContainerCommandRequestProto .newBuilder() .setCmdType(Type.GetSmallFile) + .setContainerID(blockID.getContainerID()) .setTraceID(traceID) .setDatanodeUuid(id) .setGetSmallFile(getSmallFileRequest) .build(); ContainerCommandResponseProto response = client.sendCommand(request); validateContainerResponse(response); + return response.getGetSmallFile(); } @@ -385,6 +422,9 @@ private static void validateContainerResponse( ) throws StorageContainerException { if (response.getResult() == ContainerProtos.Result.SUCCESS) { return; + } else if (response.getResult() + == ContainerProtos.Result.BLOCK_NOT_COMMITTED) { + throw new BlockNotCommittedException(response.getMessage()); } throw new StorageContainerException( response.getMessage(), response.getResult()); diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneAcl.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneAcl.java index ff0ac4e2d48..1827b23bf15 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneAcl.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneAcl.java @@ -25,9 +25,11 @@ * OzoneACL classes define bucket ACLs used in OZONE. * * ACLs in Ozone follow this pattern. - * • user:name:rw - * • group:name:rw - * • world::rw + *

    + *
  • user:name:rw + *
  • group:name:rw + *
  • world::rw + *
*/ public class OzoneAcl { private OzoneACLType type; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 856d088c792..599b4e80bf2 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.scm.ScmConfigKeys; + +import org.apache.ratis.shaded.proto.RaftProtos.ReplicationLevel; import org.apache.ratis.util.TimeDuration; /** @@ -65,16 +67,9 @@ "dfs.container.ratis.ipc.random.port"; public static final boolean DFS_CONTAINER_RATIS_IPC_RANDOM_PORT_DEFAULT = false; - - public static final String OZONE_LOCALSTORAGE_ROOT = - "ozone.localstorage.root"; - public static final String OZONE_LOCALSTORAGE_ROOT_DEFAULT = "/tmp/ozone"; public static final String OZONE_ENABLED = "ozone.enabled"; public static final boolean OZONE_ENABLED_DEFAULT = false; - public static final String OZONE_HANDLER_TYPE_KEY = - "ozone.handler.type"; - public static final String OZONE_HANDLER_TYPE_DEFAULT = "distributed"; public static final String OZONE_TRACE_ENABLED_KEY = "ozone.trace.enabled"; public static final boolean OZONE_TRACE_ENABLED_DEFAULT = false; @@ -200,10 +195,13 @@ public static final int OZONE_BLOCK_DELETING_CONTAINER_LIMIT_PER_INTERVAL_DEFAULT = 10; - public static final String OZONE_CONTAINER_REPORT_INTERVAL = - "ozone.container.report.interval"; - public static final String OZONE_CONTAINER_REPORT_INTERVAL_DEFAULT = - "60s"; + public static final String OZONE_CLIENT_MAX_RETRIES = + "ozone.client.max.retries"; + public static final int OZONE_CLIENT_MAX_RETRIES_DEFAULT = 50; + + public static final String OZONE_CLIENT_RETRY_INTERVAL = + "ozone.client.retry.interval"; + public static final String OZONE_CLIENT_RETRY_INTERVAL_DEFAULT = "200ms"; public static final String DFS_CONTAINER_RATIS_ENABLED_KEY = ScmConfigKeys.DFS_CONTAINER_RATIS_ENABLED_KEY; @@ -217,6 +215,11 @@ = ScmConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_KEY; public static final int DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_DEFAULT = ScmConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_DEFAULT; + public static final String DFS_CONTAINER_RATIS_REPLICATION_LEVEL_KEY + = ScmConfigKeys.DFS_CONTAINER_RATIS_REPLICATION_LEVEL_KEY; + public static final ReplicationLevel + DFS_CONTAINER_RATIS_REPLICATION_LEVEL_DEFAULT + = ScmConfigKeys.DFS_CONTAINER_RATIS_REPLICATION_LEVEL_DEFAULT; public static final String DFS_CONTAINER_RATIS_SEGMENT_SIZE_KEY = ScmConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_SIZE_KEY; public static final int DFS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT @@ -234,11 +237,37 @@ public static final TimeDuration DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_DEFAULT = ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_DEFAULT; + public static final String DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY = + ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY; + public static final int DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_DEFAULT = + ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_DEFAULT; + public static final String DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY = + ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY; + public static final TimeDuration + DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT = + ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT; + public static final String DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY = + ScmConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY; + public static final TimeDuration + DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT = + ScmConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT; public static final String DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY = ScmConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY; public static final TimeDuration DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_DEFAULT = ScmConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_DEFAULT; + public static final String + DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY = + ScmConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY; + public static final TimeDuration + DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT = + ScmConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT; + + public static final String DFS_RATIS_SERVER_FAILURE_DURATION_KEY = + ScmConfigKeys.DFS_RATIS_SERVER_FAILURE_DURATION_KEY; + public static final TimeDuration + DFS_RATIS_SERVER_FAILURE_DURATION_DEFAULT = + ScmConfigKeys.DFS_RATIS_SERVER_FAILURE_DURATION_DEFAULT; public static final String OZONE_SCM_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL = "ozone.web.authentication.kerberos.principal"; @@ -256,6 +285,21 @@ "hdds.datanode.storage.utilization.critical.threshold"; public static final double HDDS_DATANODE_STORAGE_UTILIZATION_CRITICAL_THRESHOLD_DEFAULT = 0.75; + + public static final String + HDDS_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY = + "hdds.write.lock.reporting.threshold.ms"; + public static final long + HDDS_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT = 5000L; + public static final String + HDDS_LOCK_SUPPRESS_WARNING_INTERVAL_MS_KEY = + "hdds.lock.suppress.warning.interval.ms"; + public static final long + HDDS_LOCK_SUPPRESS_WARNING_INTERVAL_MS_DEAFULT = 10000L; + + public static final String OZONE_CONTAINER_COPY_WORKDIR = + "hdds.datanode.replication.work.dir"; + /** * There is no need to instantiate this class. */ diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java index 4fad5d83a8c..0a15ec8b6ec 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java @@ -33,6 +33,11 @@ public static final String OZONE_SIMPLE_ROOT_USER = "root"; public static final String OZONE_SIMPLE_HDFS_USER = "hdfs"; + public static final String STORAGE_ID = "storageID"; + public static final String DATANODE_UUID = "datanodeUuid"; + public static final String CLUSTER_ID = "clusterID"; + public static final String LAYOUTVERSION = "layOutVersion"; + public static final String CTIME = "ctime"; /* * BucketName length is used for both buckets and volume lengths */ @@ -65,12 +70,9 @@ public static final String CONTAINER_EXTENSION = ".container"; public static final String CONTAINER_META = ".meta"; - // container storage is in the following format. - // Data Volume basePath/containers//metadata and - // Data Volume basePath/containers//data/... + // Refer to {@link ContainerReader} for container storage layout on disk. public static final String CONTAINER_PREFIX = "containers"; public static final String CONTAINER_META_PATH = "metadata"; - public static final String CONTAINER_DATA_PATH = "data"; public static final String CONTAINER_TEMPORARY_CHUNK_PREFIX = "tmp"; public static final String CONTAINER_CHUNK_NAME_DELIMITER = "."; public static final String CONTAINER_ROOT_PREFIX = "repository"; @@ -88,13 +90,15 @@ * level DB names used by SCM and data nodes. */ public static final String CONTAINER_DB_SUFFIX = "container.db"; + public static final String PIPELINE_DB_SUFFIX = "pipeline.db"; public static final String SCM_CONTAINER_DB = "scm-" + CONTAINER_DB_SUFFIX; + public static final String SCM_PIPELINE_DB = "scm-" + PIPELINE_DB_SUFFIX; public static final String DN_CONTAINER_DB = "-dn-"+ CONTAINER_DB_SUFFIX; - public static final String BLOCK_DB = "block.db"; - public static final String OPEN_CONTAINERS_DB = "openContainers.db"; public static final String DELETED_BLOCK_DB = "deletedBlock.db"; public static final String OM_DB_NAME = "om.db"; + public static final String STORAGE_DIR_CHUNKS = "chunks"; + /** * Supports Bucket Versioning. */ @@ -106,17 +110,9 @@ public static Versioning getVersioning(boolean versioning) { } } - /** - * Ozone handler types. - */ - public static final String OZONE_HANDLER_DISTRIBUTED = "distributed"; - public static final String OZONE_HANDLER_LOCAL = "local"; - public static final String DELETING_KEY_PREFIX = "#deleting#"; public static final String DELETED_KEY_PREFIX = "#deleted#"; public static final String DELETE_TRANSACTION_KEY_PREFIX = "#delTX#"; - public static final String OPEN_KEY_PREFIX = "#open#"; - public static final String OPEN_KEY_ID_DELIMINATOR = "#"; /** * OM LevelDB prefixes. @@ -140,8 +136,7 @@ public static Versioning getVersioning(boolean versioning) { * | #deleting#/volumeName/bucketName/keyName | KeyInfo | * ---------------------------------------------------------- */ - public static final String OM_VOLUME_PREFIX = "/#"; - public static final String OM_BUCKET_PREFIX = "/#"; + public static final String OM_KEY_PREFIX = "/"; public static final String OM_USER_PREFIX = "$"; @@ -175,4 +170,47 @@ public static Versioning getVersioning(boolean versioning) { private OzoneConsts() { // Never Constructed } + + // YAML fields for .container files + public static final String CONTAINER_ID = "containerID"; + public static final String CONTAINER_TYPE = "containerType"; + public static final String STATE = "state"; + public static final String METADATA = "metadata"; + public static final String MAX_SIZE = "maxSize"; + public static final String METADATA_PATH = "metadataPath"; + public static final String CHUNKS_PATH = "chunksPath"; + public static final String CONTAINER_DB_TYPE = "containerDBType"; + public static final String CHECKSUM = "checksum"; + + // For OM Audit usage + public static final String VOLUME = "volume"; + public static final String BUCKET = "bucket"; + public static final String KEY = "key"; + public static final String QUOTA = "quota"; + public static final String QUOTA_IN_BYTES = "quotaInBytes"; + public static final String CLIENT_ID = "clientID"; + public static final String OWNER = "owner"; + public static final String ADMIN = "admin"; + public static final String USERNAME = "username"; + public static final String PREV_KEY = "prevKey"; + public static final String START_KEY = "startKey"; + public static final String MAX_KEYS = "maxKeys"; + public static final String PREFIX = "prefix"; + public static final String KEY_PREFIX = "keyPrefix"; + public static final String ACLS = "acls"; + public static final String USER_ACL = "userAcl"; + public static final String ADD_ACLS = "addAcls"; + public static final String REMOVE_ACLS = "removeAcls"; + public static final String MAX_NUM_OF_BUCKETS = "maxNumOfBuckets"; + public static final String TO_KEY_NAME = "toKeyName"; + public static final String STORAGE_TYPE = "storageType"; + public static final String IS_VERSION_ENABLED = "isVersionEnabled"; + public static final String CREATION_TIME = "creationTime"; + public static final String DATA_SIZE = "dataSize"; + public static final String REPLICATION_TYPE = "replicationType"; + public static final String REPLICATION_FACTOR = "replicationFactor"; + public static final String KEY_LOCATION_INFO = "keyLocationInfo"; + + + } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/AuditLogger.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/AuditLogger.java index 46ffaab9ef5..ee20c66fd32 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/AuditLogger.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/AuditLogger.java @@ -21,10 +21,8 @@ import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Marker; -import org.apache.logging.log4j.message.StructuredDataMessage; import org.apache.logging.log4j.spi.ExtendedLogger; -import java.util.Map; /** * Class to define Audit Logger for Ozone. @@ -32,16 +30,13 @@ public class AuditLogger { private ExtendedLogger logger; - - private static final String SUCCESS = AuditEventStatus.SUCCESS.getStatus(); - private static final String FAILURE = AuditEventStatus.FAILURE.getStatus(); private static final String FQCN = AuditLogger.class.getName(); private static final Marker WRITE_MARKER = AuditMarker.WRITE.getMarker(); private static final Marker READ_MARKER = AuditMarker.READ.getMarker(); /** * Parametrized Constructor to initialize logger. - * @param type + * @param type Audit Logger Type */ public AuditLogger(AuditLoggerType type){ initializeLogger(type); @@ -60,68 +55,53 @@ public ExtendedLogger getLogger() { return logger; } - public void logWriteSuccess(AuditAction type, Map data) { - logWriteSuccess(type, data, Level.INFO); + public void logWriteSuccess(AuditMessage msg) { + logWriteSuccess(Level.INFO, msg); } - public void logWriteSuccess(AuditAction type, Map data, Level - level) { - StructuredDataMessage msg = new StructuredDataMessage("", SUCCESS, - type.getAction(), data); + public void logWriteSuccess(Level level, AuditMessage msg) { this.logger.logIfEnabled(FQCN, level, WRITE_MARKER, msg, null); } - - public void logWriteFailure(AuditAction type, Map data) { - logWriteFailure(type, data, Level.INFO, null); + public void logWriteFailure(AuditMessage msg) { + logWriteFailure(Level.ERROR, msg); } - public void logWriteFailure(AuditAction type, Map data, Level - level) { - logWriteFailure(type, data, level, null); + public void logWriteFailure(Level level, AuditMessage msg) { + logWriteFailure(level, msg, null); } - public void logWriteFailure(AuditAction type, Map data, - Throwable exception) { - logWriteFailure(type, data, Level.INFO, exception); + public void logWriteFailure(AuditMessage msg, Throwable exception) { + logWriteFailure(Level.ERROR, msg, exception); } - public void logWriteFailure(AuditAction type, Map data, Level - level, Throwable exception) { - StructuredDataMessage msg = new StructuredDataMessage("", FAILURE, - type.getAction(), data); + public void logWriteFailure(Level level, AuditMessage msg, + Throwable exception) { this.logger.logIfEnabled(FQCN, level, WRITE_MARKER, msg, exception); } - public void logReadSuccess(AuditAction type, Map data) { - logReadSuccess(type, data, Level.INFO); + public void logReadSuccess(AuditMessage msg) { + logReadSuccess(Level.INFO, msg); } - public void logReadSuccess(AuditAction type, Map data, Level - level) { - StructuredDataMessage msg = new StructuredDataMessage("", SUCCESS, - type.getAction(), data); + public void logReadSuccess(Level level, AuditMessage msg) { this.logger.logIfEnabled(FQCN, level, READ_MARKER, msg, null); } - public void logReadFailure(AuditAction type, Map data) { - logReadFailure(type, data, Level.INFO, null); + public void logReadFailure(AuditMessage msg) { + logReadFailure(Level.ERROR, msg); } - public void logReadFailure(AuditAction type, Map data, Level - level) { - logReadFailure(type, data, level, null); + public void logReadFailure(Level level, AuditMessage msg) { + logReadFailure(level, msg, null); } - public void logReadFailure(AuditAction type, Map data, - Throwable exception) { - logReadFailure(type, data, Level.INFO, exception); + public void logReadFailure(AuditMessage msg, Throwable exception) { + logReadFailure(Level.ERROR, msg, exception); } - public void logReadFailure(AuditAction type, Map data, Level - level, Throwable exception) { - StructuredDataMessage msg = new StructuredDataMessage("", FAILURE, - type.getAction(), data); + public void logReadFailure(Level level, AuditMessage msg, + Throwable exception) { this.logger.logIfEnabled(FQCN, level, READ_MARKER, msg, exception); } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/AuditMessage.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/AuditMessage.java new file mode 100644 index 00000000000..858695aef14 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/AuditMessage.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.audit; + +import org.apache.logging.log4j.message.Message; + +import java.util.Map; + +/** + * Defines audit message structure. + */ +public class AuditMessage implements Message { + + private String message; + + public AuditMessage(String user, String ip, String op, + Map params, String ret){ + + this.message = String.format("user=%s ip=%s op=%s %s ret=%s", + user, ip, op, params, ret); + } + + @Override + public String getFormattedMessage() { + return message; + } + + @Override + public String getFormat() { + return null; + } + + @Override + public Object[] getParameters() { + return new Object[0]; + } + + @Override + public Throwable getThrowable() { + return null; + } + + /** + * Use when there are custom string to be added to default msg. + * @param customMessage custom string + */ + private void appendMessage(String customMessage) { + this.message += customMessage; + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/package-info.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/package-info.java index 3743fddd4f2..9c00ef7cee5 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/package-info.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/audit/package-info.java @@ -46,12 +46,14 @@ * **** Auditable *** * This is an interface to mark an entity as auditable. * This interface must be implemented by entities requiring audit logging. - * For example - KSMVolumeArgs, KSMBucketArgs. + * For example - OMVolumeArgs, OMBucketArgs. * The implementing class must override toAuditMap() to return an * instance of Map where both Key and Value are String. * - * Key: must not contain any spaces. If the key is multi word then use - * camel case. + * Key: must contain printable US ASCII characters + * May not contain a space, =, ], or " + * If the key is multi word then use camel case. + * * Value: if it is a collection/array, then it must be converted to a comma * delimited string * @@ -81,6 +83,11 @@ * *** AuditMarker *** * Enum to define various Audit Markers used in AuditLogging. * + * *** AuditMessage *** + * Entity to define an audit message to be logged + * It will generate a message formatted as: + * user=xxx ip=xxx op=XXXX_XXXX {key=val, key1=val1..} ret=XXXXXX + * * **************************************************************************** * Usage * **************************************************************************** @@ -88,14 +95,16 @@ * 1. Get a logger by specifying the appropriate logger type * Example: ExtendedLogger AUDIT = new AuditLogger(AuditLoggerType.OMLogger) * - * 2. Log Read/Write and Success/Failure event as needed. + * 2. Construct an instance of AuditMessage + * + * 3. Log Read/Write and Success/Failure event as needed. * Example - * AUDIT.logWriteSuccess(AuditAction type, Map data, Level - * level) + * AUDIT.logWriteSuccess(Level level, AuditMessage msg) * * If logging is done without specifying Level, then Level implicitly - * defaults to INFO - * AUDIT.logWriteSuccess(AuditAction type, Map data) + * defaults to INFO for xxxxSuccess() and ERROR for xxxxFailure() + * AUDIT.logWriteSuccess(AuditMessage msg) + * AUDIT.logWriteFailure(AuditMessage msg) * * See sample invocations in src/test in the following class: * org.apache.hadoop.ozone.audit.TestOzoneAuditLogger diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java index a32d5590236..1826a58b023 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/common/Storage.java @@ -45,8 +45,9 @@ public abstract class Storage { private static final Logger LOG = LoggerFactory.getLogger(Storage.class); - protected static final String STORAGE_DIR_CURRENT = "current"; + public static final String STORAGE_DIR_CURRENT = "current"; protected static final String STORAGE_FILE_VERSION = "VERSION"; + public static final String CONTAINER_DIR = "containerDir"; private final NodeType nodeType; private final File root; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyData.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyData.java index 129e4a8fedb..ee27021393e 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyData.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyData.java @@ -19,12 +19,14 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.client.BlockID; +import com.google.common.base.Preconditions; import java.io.IOException; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.ArrayList; /** * Helper class to convert Protobuf to Java classes. @@ -34,11 +36,23 @@ private final Map metadata; /** + * Represent a list of chunks. + * In order to reduce memory usage, chunkList is declared as an + * {@link Object}. + * When #elements == 0, chunkList is null. + * When #elements == 1, chunkList refers to the only element. + * When #elements > 1, chunkList refers to the list. + * * Please note : when we are working with keys, we don't care what they point * to. So we We don't read chunkinfo nor validate them. It is responsibility * of higher layer like ozone. We just read and write data from network. */ - private List chunks; + private Object chunkList; + + /** + * total size of the key. + */ + private long size; /** * Constructs a KeyData Object. @@ -48,6 +62,7 @@ public KeyData(BlockID blockID) { this.blockID = blockID; this.metadata = new TreeMap<>(); + this.size = 0; } /** @@ -65,6 +80,9 @@ public static KeyData getFromProtoBuf(ContainerProtos.KeyData data) throws data.getMetadata(x).getValue()); } keyData.setChunks(data.getChunksList()); + if (data.hasSize()) { + Preconditions.checkArgument(data.getSize() == keyData.getSize()); + } return keyData; } @@ -76,13 +94,14 @@ public static KeyData getFromProtoBuf(ContainerProtos.KeyData data) throws ContainerProtos.KeyData.Builder builder = ContainerProtos.KeyData.newBuilder(); builder.setBlockID(this.blockID.getDatanodeBlockIDProtobuf()); - builder.addAllChunks(this.chunks); for (Map.Entry entry : metadata.entrySet()) { ContainerProtos.KeyValue.Builder keyValBuilder = ContainerProtos.KeyValue.newBuilder(); builder.addMetadata(keyValBuilder.setKey(entry.getKey()) .setValue(entry.getValue()).build()); } + builder.addAllChunks(getChunks()); + builder.setSize(size); return builder.build(); } @@ -121,17 +140,70 @@ public synchronized void deleteKey(String key) { metadata.remove(key); } + @SuppressWarnings("unchecked") + private List castChunkList() { + return (List)chunkList; + } + /** * Returns chunks list. * * @return list of chunkinfo. */ public List getChunks() { - return chunks; + return chunkList == null? Collections.emptyList() + : chunkList instanceof ContainerProtos.ChunkInfo? + Collections.singletonList((ContainerProtos.ChunkInfo)chunkList) + : Collections.unmodifiableList(castChunkList()); + } + + /** + * Adds chinkInfo to the list. + */ + public void addChunk(ContainerProtos.ChunkInfo chunkInfo) { + if (chunkList == null) { + chunkList = chunkInfo; + } else { + final List list; + if (chunkList instanceof ContainerProtos.ChunkInfo) { + list = new ArrayList<>(2); + list.add((ContainerProtos.ChunkInfo)chunkList); + chunkList = list; + } else { + list = castChunkList(); + } + list.add(chunkInfo); + } + size += chunkInfo.getLen(); + } + + /** + * removes the chunk. + */ + public boolean removeChunk(ContainerProtos.ChunkInfo chunkInfo) { + final boolean removed; + if (chunkList instanceof List) { + final List list = castChunkList(); + removed = list.remove(chunkInfo); + if (list.size() == 1) { + chunkList = list.get(0); + } + } else if (chunkInfo.equals(chunkList)) { + chunkList = null; + removed = true; + } else { + removed = false; + } + + if (removed) { + size -= chunkInfo.getLen(); + } + return removed; } /** * Returns container ID. + * * @return long. */ public long getContainerID() { @@ -160,7 +232,15 @@ public BlockID getBlockID() { * @param chunks - List of chunks. */ public void setChunks(List chunks) { - this.chunks = chunks; + if (chunks == null) { + chunkList = null; + size = 0L; + } else { + final int n = chunks.size(); + chunkList = n == 0? null: n == 1? chunks.get(0): chunks; + size = chunks.parallelStream().mapToLong( + ContainerProtos.ChunkInfo::getLen).sum(); + } } /** @@ -168,7 +248,6 @@ public void setChunks(List chunks) { * @return total size of the key. */ public long getSize() { - return chunks.parallelStream().mapToLong(e->e.getLen()).sum(); + return size; } - } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lease/LeaseManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lease/LeaseManager.java index b8390ddd3d4..756a41af089 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lease/LeaseManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lease/LeaseManager.java @@ -42,6 +42,7 @@ private static final Logger LOG = LoggerFactory.getLogger(LeaseManager.class); + private final String name; private final long defaultTimeout; private Map> activeLeases; private LeaseMonitor leaseMonitor; @@ -51,10 +52,13 @@ /** * Creates an instance of lease manager. * + * @param name + * Name for the LeaseManager instance. * @param defaultTimeout * Default timeout in milliseconds to be used for lease creation. */ - public LeaseManager(long defaultTimeout) { + public LeaseManager(String name, long defaultTimeout) { + this.name = name; this.defaultTimeout = defaultTimeout; } @@ -62,11 +66,11 @@ public LeaseManager(long defaultTimeout) { * Starts the lease manager service. */ public void start() { - LOG.debug("Starting LeaseManager service"); + LOG.debug("Starting {} LeaseManager service", name); activeLeases = new ConcurrentHashMap<>(); leaseMonitor = new LeaseMonitor(); leaseMonitorThread = new Thread(leaseMonitor); - leaseMonitorThread.setName("LeaseManager#LeaseMonitor"); + leaseMonitorThread.setName(name + "-LeaseManager#LeaseMonitor"); leaseMonitorThread.setDaemon(true); leaseMonitorThread.setUncaughtExceptionHandler((thread, throwable) -> { // Let us just restart this thread after logging an error. @@ -75,7 +79,7 @@ public void start() { thread.toString(), throwable); leaseMonitorThread.start(); }); - LOG.debug("Starting LeaseManager#LeaseMonitor Thread"); + LOG.debug("Starting {}-LeaseManager#LeaseMonitor Thread", name); leaseMonitorThread.start(); isRunning = true; } @@ -203,7 +207,7 @@ private LeaseMonitor() { @Override public void run() { while(monitor) { - LOG.debug("LeaseMonitor: checking for lease expiry"); + LOG.debug("{}-LeaseMonitor: checking for lease expiry", name); long sleepTime = Long.MAX_VALUE; for (T resource : activeLeases.keySet()) { diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/ActiveLock.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/ActiveLock.java new file mode 100644 index 00000000000..c3020844927 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/ActiveLock.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.lock; + +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Lock implementation which also maintains counter. + */ +public final class ActiveLock { + + private Lock lock; + private AtomicInteger count; + + /** + * Use ActiveLock#newInstance to create instance. + */ + private ActiveLock() { + this.lock = new ReentrantLock(); + this.count = new AtomicInteger(0); + } + + /** + * Creates a new instance of ActiveLock. + * + * @return new ActiveLock + */ + public static ActiveLock newInstance() { + return new ActiveLock(); + } + + /** + * Acquires the lock. + * + *

If the lock is not available then the current thread becomes + * disabled for thread scheduling purposes and lies dormant until the + * lock has been acquired. + */ + public void lock() { + lock.lock(); + } + + /** + * Releases the lock. + */ + public void unlock() { + lock.unlock(); + } + + /** + * Increment the active count of the lock. + */ + void incrementActiveCount() { + count.incrementAndGet(); + } + + /** + * Decrement the active count of the lock. + */ + void decrementActiveCount() { + count.decrementAndGet(); + } + + /** + * Returns the active count on the lock. + * + * @return Number of active leases on the lock. + */ + int getActiveLockCount() { + return count.get(); + } + + /** + * Resets the active count on the lock. + */ + void resetCounter() { + count.set(0); + } + + @Override + public String toString() { + return lock.toString(); + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/LockManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/LockManager.java new file mode 100644 index 00000000000..49cf544626b --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/LockManager.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.lock; + +import org.apache.commons.pool2.impl.GenericObjectPool; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Manages the locks on a given resource. A new lock is created for each + * and every unique resource. Uniqueness of resource depends on the + * {@code equals} implementation of it. + */ +public class LockManager { + + private static final Logger LOG = LoggerFactory.getLogger(LockManager.class); + + private final Map activeLocks = new ConcurrentHashMap<>(); + private final GenericObjectPool lockPool = + new GenericObjectPool<>(new PooledLockFactory()); + + /** + * Creates new LockManager instance. + * + * @param conf Configuration object + */ + public LockManager(Configuration conf) { + int maxPoolSize = conf.getInt(HddsConfigKeys.HDDS_LOCK_MAX_CONCURRENCY, + HddsConfigKeys.HDDS_LOCK_MAX_CONCURRENCY_DEFAULT); + lockPool.setMaxTotal(maxPoolSize); + } + + + /** + * Acquires the lock on given resource. + * + *

If the lock is not available then the current thread becomes + * disabled for thread scheduling purposes and lies dormant until the + * lock has been acquired. + */ + public void lock(T resource) { + activeLocks.compute(resource, (k, v) -> { + ActiveLock lock; + try { + if (v == null) { + lock = lockPool.borrowObject(); + } else { + lock = v; + } + lock.incrementActiveCount(); + } catch (Exception ex) { + LOG.error("Unable to obtain lock.", ex); + throw new RuntimeException(ex); + } + return lock; + }).lock(); + } + + /** + * Releases the lock on given resource. + */ + public void unlock(T resource) { + ActiveLock lock = activeLocks.get(resource); + if (lock == null) { + // Someone is releasing a lock which was never acquired. Log and return. + LOG.warn("Trying to release the lock on {}, which was never acquired.", + resource); + return; + } + lock.unlock(); + activeLocks.computeIfPresent(resource, (k, v) -> { + v.decrementActiveCount(); + if (v.getActiveLockCount() != 0) { + return v; + } + lockPool.returnObject(v); + return null; + }); + } + +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/PooledLockFactory.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/PooledLockFactory.java new file mode 100644 index 00000000000..4c24ef74b28 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/PooledLockFactory.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.lock; + +import org.apache.commons.pool2.BasePooledObjectFactory; +import org.apache.commons.pool2.PooledObject; +import org.apache.commons.pool2.impl.DefaultPooledObject; + +/** + * Pool factory to create {@code ActiveLock} instances. + */ +public class PooledLockFactory extends BasePooledObjectFactory { + + @Override + public ActiveLock create() throws Exception { + return ActiveLock.newInstance(); + } + + @Override + public PooledObject wrap(ActiveLock activeLock) { + return new DefaultPooledObject<>(activeLock); + } + + @Override + public void activateObject(PooledObject pooledObject) { + pooledObject.getObject().resetCounter(); + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/package-info.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/package-info.java new file mode 100644 index 00000000000..5c677ced745 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lock/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.lock; +/* + This package contains the lock related classes. + */ \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/HddsVersionInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/HddsVersionInfo.java new file mode 100644 index 00000000000..e7f697a0c50 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/HddsVersionInfo.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.utils; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.ClassUtil; +import org.apache.hadoop.util.ThreadUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; + +/** + * This class returns build information about Hadoop components. + */ +@InterfaceAudience.Public +@InterfaceStability.Stable +public class HddsVersionInfo { + private static final Logger LOG = LoggerFactory.getLogger( + HddsVersionInfo.class); + + private Properties info; + + protected HddsVersionInfo(String component) { + info = new Properties(); + String versionInfoFile = component + "-version-info.properties"; + InputStream is = null; + try { + is = ThreadUtil.getResourceAsStream(HddsVersionInfo.class.getClassLoader(), + versionInfoFile); + info.load(is); + } catch (IOException ex) { + LoggerFactory.getLogger(getClass()).warn("Could not read '" + + versionInfoFile + "', " + ex.toString(), ex); + } finally { + IOUtils.closeStream(is); + } + } + + protected String _getVersion() { + return info.getProperty("version", "Unknown"); + } + + protected String _getRevision() { + return info.getProperty("revision", "Unknown"); + } + + protected String _getBranch() { + return info.getProperty("branch", "Unknown"); + } + + protected String _getDate() { + return info.getProperty("date", "Unknown"); + } + + protected String _getUser() { + return info.getProperty("user", "Unknown"); + } + + protected String _getUrl() { + return info.getProperty("url", "Unknown"); + } + + protected String _getSrcChecksum() { + return info.getProperty("srcChecksum", "Unknown"); + } + + protected String _getBuildVersion(){ + return _getVersion() + + " from " + _getRevision() + + " by " + _getUser() + + " source checksum " + _getSrcChecksum(); + } + + protected String _getProtocVersion() { + return info.getProperty("protocVersion", "Unknown"); + } + + private static final HddsVersionInfo HDDS_VERSION_INFO = + new HddsVersionInfo("hdds"); + /** + * Get the HDDS version. + * @return the Hdds version string, eg. "0.6.3-dev" + */ + public static String getVersion() { + return HDDS_VERSION_INFO._getVersion(); + } + + /** + * Get the Git commit hash of the repository when compiled. + * @return the commit hash, eg. "18f64065d5db6208daf50b02c1b5ed4ee3ce547a" + */ + public static String getRevision() { + return HDDS_VERSION_INFO._getRevision(); + } + + /** + * Get the branch on which this originated. + * @return The branch name, e.g. "trunk" or "branches/branch-0.20" + */ + public static String getBranch() { + return HDDS_VERSION_INFO._getBranch(); + } + + /** + * The date that HDDS was compiled. + * @return the compilation date in unix date format + */ + public static String getDate() { + return HDDS_VERSION_INFO._getDate(); + } + + /** + * The user that compiled HDDS. + * @return the username of the user + */ + public static String getUser() { + return HDDS_VERSION_INFO._getUser(); + } + + /** + * Get the URL for the HDDS repository. + * @return the URL of the Hdds repository + */ + public static String getUrl() { + return HDDS_VERSION_INFO._getUrl(); + } + + /** + * Get the checksum of the source files from which HDDS was built. + * @return the checksum of the source files + */ + public static String getSrcChecksum() { + return HDDS_VERSION_INFO._getSrcChecksum(); + } + + /** + * Returns the buildVersion which includes version, + * revision, user and date. + * @return the buildVersion + */ + public static String getBuildVersion(){ + return HDDS_VERSION_INFO._getBuildVersion(); + } + + /** + * Returns the protoc version used for the build. + * @return the protoc version + */ + public static String getProtocVersion(){ + return HDDS_VERSION_INFO._getProtocVersion(); + } + + public static void main(String[] args) { + System.out.println("Using HDDS " + getVersion()); + System.out.println("Source code repository " + getUrl() + " -r " + + getRevision()); + System.out.println("Compiled by " + getUser() + " on " + getDate()); + System.out.println("Compiled with protoc " + getProtocVersion()); + System.out.println("From source with checksum " + getSrcChecksum()); + LOG.debug("This command was run using " + + ClassUtil.findContainingJar(HddsVersionInfo.class)); + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/LevelDBStore.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/LevelDBStore.java index 13b918015e0..ed116a381c3 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/LevelDBStore.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/LevelDBStore.java @@ -379,4 +379,9 @@ public void writeBatch(BatchOperation operation) throws IOException { } return result; } + + @Override + public MetaStoreIterator iterator() { + return new LevelDBStoreIterator(db.iterator()); + } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/LevelDBStoreIterator.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/LevelDBStoreIterator.java new file mode 100644 index 00000000000..7b62f7ad43e --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/LevelDBStoreIterator.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.utils; + +import org.iq80.leveldb.DBIterator; +import java.util.Map; +import java.util.NoSuchElementException; + +import org.apache.hadoop.utils.MetadataStore.KeyValue; + + +/** + * LevelDB store iterator. + */ +public class LevelDBStoreIterator implements MetaStoreIterator { + + + private DBIterator levelDBIterator; + + public LevelDBStoreIterator(DBIterator iterator) { + this.levelDBIterator = iterator; + levelDBIterator.seekToFirst(); + } + + @Override + public boolean hasNext() { + return levelDBIterator.hasNext(); + } + + @Override + public KeyValue next() { + if(levelDBIterator.hasNext()) { + Map.Entry entry = levelDBIterator.next(); + return KeyValue.create(entry.getKey(), entry.getValue()); + } + throw new NoSuchElementException("LevelDB Store has no more elements"); + } + + @Override + public void seekToFirst() { + levelDBIterator.seekToFirst(); + } + + @Override + public void seekToLast() { + levelDBIterator.seekToLast(); + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetaStoreIterator.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetaStoreIterator.java new file mode 100644 index 00000000000..52d0a3efd3b --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetaStoreIterator.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.utils; + +import java.util.Iterator; + +/** + * Iterator for MetaDataStore DB. + * @param + */ +public interface MetaStoreIterator extends Iterator { + + /** + * seek to first entry. + */ + void seekToFirst(); + + /** + * seek to last entry. + */ + void seekToLast(); + +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetadataStore.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetadataStore.java index b90b08f6585..7d3bc6ba9a1 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetadataStore.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetadataStore.java @@ -169,4 +169,59 @@ */ void iterate(byte[] from, EntryConsumer consumer) throws IOException; + + /** + * Returns the iterator for this metadata store. + * @return MetaStoreIterator + */ + MetaStoreIterator iterator(); + + /** + * Class used to represent the key and value pair of a db entry. + */ + class KeyValue { + + private final byte[] key; + private final byte[] value; + + /** + * KeyValue Constructor, used to represent a key and value of a db entry. + * @param key + * @param value + */ + private KeyValue(byte[] key, byte[] value) { + this.key = key; + this.value = value; + } + + /** + * Return key. + * @return byte[] + */ + public byte[] getKey() { + byte[] result = new byte[key.length]; + System.arraycopy(key, 0, result, 0, key.length); + return result; + } + + /** + * Return value. + * @return byte[] + */ + public byte[] getValue() { + byte[] result = new byte[value.length]; + System.arraycopy(value, 0, result, 0, value.length); + return result; + } + + /** + * Create a KeyValue pair. + * @param key + * @param value + * @return KeyValue object. + */ + public static KeyValue create(byte[] key, byte[] value) { + return new KeyValue(key, value); + } + } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetadataStoreBuilder.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetadataStoreBuilder.java index 9e9c32ae561..85cebed6503 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetadataStoreBuilder.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/MetadataStoreBuilder.java @@ -18,12 +18,15 @@ package org.apache.hadoop.utils; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.iq80.leveldb.Options; import org.rocksdb.BlockBasedTableConfig; import org.rocksdb.Statistics; import org.rocksdb.StatsLevel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; @@ -44,10 +47,14 @@ */ public class MetadataStoreBuilder { + @VisibleForTesting + static final Logger LOG = + LoggerFactory.getLogger(MetadataStoreBuilder.class); private File dbFile; private long cacheSize; private boolean createIfMissing = true; private Configuration conf; + private String dbType; public static MetadataStoreBuilder newBuilder() { return new MetadataStoreBuilder(); @@ -73,6 +80,17 @@ public MetadataStoreBuilder setConf(Configuration configuration) { return this; } + /** + * Set the container DB Type. + * @param type + * @return MetadataStoreBuilder + */ + public MetadataStoreBuilder setDBType(String type) { + this.dbType = type; + return this; + } + + public MetadataStore build() throws IOException { if (dbFile == null) { throw new IllegalArgumentException("Failed to build metadata store, " @@ -81,18 +99,26 @@ public MetadataStore build() throws IOException { // Build db store based on configuration MetadataStore store = null; - String impl = conf == null ? - OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_DEFAULT : - conf.getTrimmed(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, - OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_DEFAULT); - if (OZONE_METADATA_STORE_IMPL_LEVELDB.equals(impl)) { + + if(dbType == null) { + LOG.debug("dbType is null, using "); + dbType = conf == null ? + OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_DEFAULT : + conf.getTrimmed(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, + OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_DEFAULT); + LOG.debug("dbType is null, using dbType {} from ozone configuration", + dbType); + } else { + LOG.debug("Using dbType {} for metastore", dbType); + } + if (OZONE_METADATA_STORE_IMPL_LEVELDB.equals(dbType)) { Options options = new Options(); options.createIfMissing(createIfMissing); if (cacheSize > 0) { options.cacheSize(cacheSize); } store = new LevelDBStore(dbFile, options); - } else if (OZONE_METADATA_STORE_IMPL_ROCKSDB.equals(impl)) { + } else if (OZONE_METADATA_STORE_IMPL_ROCKSDB.equals(dbType)) { org.rocksdb.Options opts = new org.rocksdb.Options(); opts.setCreateIfMissing(createIfMissing); @@ -119,7 +145,7 @@ public MetadataStore build() throws IOException { + OzoneConfigKeys.OZONE_METADATA_STORE_IMPL + ". Expecting " + OZONE_METADATA_STORE_IMPL_LEVELDB + " or " + OZONE_METADATA_STORE_IMPL_ROCKSDB - + ", but met " + impl); + + ", but met " + dbType); } return store; } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/RocksDBStore.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/RocksDBStore.java index 0dfca20a8fb..379d9e9d1db 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/RocksDBStore.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/RocksDBStore.java @@ -94,7 +94,7 @@ public RocksDBStore(File dbFile, Options options) } } - private IOException toIOException(String msg, RocksDBException e) { + public static IOException toIOException(String msg, RocksDBException e) { String statusCode = e.getStatus() == null ? "N/A" : e.getStatus().getCodeString(); String errMessage = e.getMessage() == null ? "Unknown error" : @@ -247,7 +247,7 @@ public void writeBatch(BatchOperation operation) for (BatchOperation.SingleOperation opt : operations) { switch (opt.getOpt()) { case DELETE: - writeBatch.remove(opt.getKey()); + writeBatch.delete(opt.getKey()); break; case PUT: writeBatch.put(opt.getKey(), opt.getValue()); @@ -380,4 +380,9 @@ protected ObjectName getStatMBeanName() { return statMBeanName; } + @Override + public MetaStoreIterator iterator() { + return new RocksDBStoreIterator(db.newIterator()); + } + } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/RocksDBStoreIterator.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/RocksDBStoreIterator.java new file mode 100644 index 00000000000..6e9b6958da1 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/RocksDBStoreIterator.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.hadoop.utils; + +import org.rocksdb.RocksIterator; + +import java.util.NoSuchElementException; + +import org.apache.hadoop.utils.MetadataStore.KeyValue; + +/** + * RocksDB store iterator. + */ +public class RocksDBStoreIterator implements MetaStoreIterator { + + private RocksIterator rocksDBIterator; + + public RocksDBStoreIterator(RocksIterator iterator) { + this.rocksDBIterator = iterator; + rocksDBIterator.seekToFirst(); + } + + @Override + public boolean hasNext() { + return rocksDBIterator.isValid(); + } + + @Override + public KeyValue next() { + if (rocksDBIterator.isValid()) { + KeyValue value = KeyValue.create(rocksDBIterator.key(), rocksDBIterator + .value()); + rocksDBIterator.next(); + return value; + } + throw new NoSuchElementException("RocksDB Store has no more elements"); + } + + @Override + public void seekToFirst() { + rocksDBIterator.seekToFirst(); + } + + @Override + public void seekToLast() { + rocksDBIterator.seekToLast(); + } + +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBConfigFromFile.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBConfigFromFile.java new file mode 100644 index 00000000000..94370b1cd5b --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBConfigFromFile.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import com.google.common.base.Preconditions; +import org.eclipse.jetty.util.StringUtil; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.DBOptions; +import org.rocksdb.Env; +import org.rocksdb.OptionsUtil; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +/** + * A Class that controls the standard config options of RocksDB. + *

+ * Important : Some of the functions in this file are magic functions designed + * for the use of OZONE developers only. Due to that this information is + * documented in this files only and is *not* intended for end user consumption. + * Please do not use this information to tune your production environments. + * Please remember the SpiderMan principal; with great power comes great + * responsibility. + */ +public final class DBConfigFromFile { + private static final Logger LOG = + LoggerFactory.getLogger(DBConfigFromFile.class); + + public static final String CONFIG_DIR = "HADOOP_CONF_DIR"; + + private DBConfigFromFile() { + } + + public static File getConfigLocation() throws IOException { + String path = System.getenv(CONFIG_DIR); + + // Make testing easy. + // If there is No Env. defined, let us try to read the JVM property + if (StringUtil.isBlank(path)) { + path = System.getProperty(CONFIG_DIR); + } + + if (StringUtil.isBlank(path)) { + LOG.debug("Unable to find the configuration directory. " + + "Please make sure that HADOOP_CONF_DIR is setup correctly."); + } + if(StringUtil.isBlank(path)){ + return null; + } + return new File(path); + + } + + /** + * This class establishes a magic pattern where we look for DBFile.ini as the + * options for RocksDB. + * + * @param dbFileName - The DBFile Name. For example, OzoneManager.db + * @return Name of the DB File options + */ + public static String getOptionsFileNameFromDB(String dbFileName) { + Preconditions.checkNotNull(dbFileName); + return dbFileName + ".ini"; + } + + /** + * One of the Magic functions designed for the use of Ozone Developers *ONLY*. + * This function takes the name of DB file and looks up the a .ini file that + * follows the ROCKSDB config format and uses that file for DBOptions and + * Column family Options. The Format for this file is specified by RockDB. + *

+ * Here is a sample config from RocksDB sample Repo. + *

+ * https://github.com/facebook/rocksdb/blob/master/examples + * /rocksdb_option_file_example.ini + *

+ * We look for a specific pattern, say OzoneManager.db will have its configs + * specified in OzoneManager.db.ini. This option is used only by the + * performance testing group to allow tuning of all parameters freely. + *

+ * For the end users we offer a set of Predefined options that is easy to use + * and the user does not need to become an expert in RockDB config. + *

+ * This code assumes the .ini file is placed in the same directory as normal + * config files. That is in $HADOOP_DIR/etc/hadoop. For example, if we want to + * control OzoneManager.db configs from a file, we need to create a file + * called OzoneManager.db.ini and place that file in $HADOOP_DIR/etc/hadoop. + * + * @param dbFileName - The DB File Name, for example, OzoneManager.db. + * @param cfDescs - ColumnFamily Handles. + * @return DBOptions, Options to be used for opening/creating the DB. + * @throws IOException + */ + public static DBOptions readFromFile(String dbFileName, + List cfDescs) throws IOException { + Preconditions.checkNotNull(dbFileName); + Preconditions.checkNotNull(cfDescs); + Preconditions.checkArgument(cfDescs.size() > 0); + + //TODO: Add Documentation on how to support RocksDB Mem Env. + Env env = Env.getDefault(); + DBOptions options = null; + File configLocation = getConfigLocation(); + if(configLocation != null && + StringUtil.isNotBlank(configLocation.toString())){ + Path optionsFile = Paths.get(configLocation.toString(), + getOptionsFileNameFromDB(dbFileName)); + + if (optionsFile.toFile().exists()) { + options = new DBOptions(); + try { + OptionsUtil.loadOptionsFromFile(optionsFile.toString(), + env, options, cfDescs, true); + + } catch (RocksDBException rdEx) { + RDBTable.toIOException("Unable to find/open Options file.", rdEx); + } + } + } + return options; + } + +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBProfile.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBProfile.java new file mode 100644 index 00000000000..4d3d6bc7700 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBProfile.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.hadoop.conf.StorageUnit; +import org.rocksdb.BlockBasedTableConfig; +import org.rocksdb.BloomFilter; +import org.rocksdb.ColumnFamilyOptions; +import org.rocksdb.CompactionStyle; +import org.rocksdb.DBOptions; + +import java.math.BigDecimal; + +/** + * User visible configs based RocksDB tuning page. Documentation for Options. + *

+ * https://github.com/facebook/rocksdb/blob/master/include/rocksdb/options.h + *

+ * Most tuning parameters are based on this URL. + *

+ * https://github.com/facebook/rocksdb/wiki/Setup-Options-and-Basic-Tuning + */ +public enum DBProfile { + //TODO : Add more profiles like TEST etc. + SSD { + @Override + public String toString() { + return "DBProfile.SSD"; + } + + @Override + public ColumnFamilyOptions getColumnFamilyOptions() { + + // Set BlockCacheSize to 256 MB. This should not be an issue for HADOOP. + final long blockCacheSize = toLong(StorageUnit.MB.toBytes(256.00)); + + // Set the Default block size to 16KB + final long blockSize = toLong(StorageUnit.KB.toBytes(16)); + + // Write Buffer Size -- set to 128 MB + final long writeBufferSize = toLong(StorageUnit.MB.toBytes(128)); + + return new ColumnFamilyOptions() + .setLevelCompactionDynamicLevelBytes(true) + .setWriteBufferSize(writeBufferSize) + .setTableFormatConfig( + new BlockBasedTableConfig() + .setBlockCacheSize(blockCacheSize) + .setBlockSize(blockSize) + .setCacheIndexAndFilterBlocks(true) + .setPinL0FilterAndIndexBlocksInCache(true) + .setFilter(new BloomFilter())); + } + + @Override + public DBOptions getDBOptions() { + final int maxBackgroundCompactions = 4; + final int maxBackgroundFlushes = 2; + final long bytesPerSync = toLong(StorageUnit.MB.toBytes(1.00)); + final boolean createIfMissing = true; + final boolean createMissingColumnFamilies = true; + return new DBOptions() + .setIncreaseParallelism(Runtime.getRuntime().availableProcessors()) + .setMaxBackgroundCompactions(maxBackgroundCompactions) + .setMaxBackgroundFlushes(maxBackgroundFlushes) + .setBytesPerSync(bytesPerSync) + .setCreateIfMissing(createIfMissing) + .setCreateMissingColumnFamilies(createMissingColumnFamilies); + } + + + }, + DISK { + @Override + public String toString() { + return "DBProfile.DISK"; + } + + @Override + public DBOptions getDBOptions() { + final long readAheadSize = toLong(StorageUnit.MB.toBytes(4.00)); + return SSD.getDBOptions().setCompactionReadaheadSize(readAheadSize); + } + + @Override + public ColumnFamilyOptions getColumnFamilyOptions() { + ColumnFamilyOptions columnFamilyOptions = SSD.getColumnFamilyOptions(); + columnFamilyOptions.setCompactionStyle(CompactionStyle.LEVEL); + return columnFamilyOptions; + } + + + }; + + private static long toLong(double value) { + BigDecimal temp = new BigDecimal(value); + return temp.longValue(); + } + + public abstract DBOptions getDBOptions(); + + public abstract ColumnFamilyOptions getColumnFamilyOptions(); +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBStore.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBStore.java new file mode 100644 index 00000000000..6947a83c8e0 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBStore.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.hadoop.classification.InterfaceStability; +import org.rocksdb.WriteBatch; + +import java.io.IOException; +import java.util.ArrayList; + +/** + * The DBStore interface provides the ability to create Tables, which store + * a specific type of Key-Value pair. Some DB interfaces like LevelDB will not + * be able to do this. In those case a Table creation will map to a default + * store. + * + */ +@InterfaceStability.Evolving +public interface DBStore extends AutoCloseable { + + /** + * Gets an existing TableStore. + * + * @param name - Name of the TableStore to get + * @return - TableStore. + * @throws IOException on Failure + */ + Table getTable(String name) throws IOException; + + /** + * Lists the Known list of Tables in a DB. + * + * @return List of Tables, in case of Rocks DB and LevelDB we will return at + * least one entry called DEFAULT. + * @throws IOException on Failure + */ + ArrayList listTables() throws IOException; + + /** + * Compact the entire database. + * + * @throws IOException on Failure + */ + void compactDB() throws IOException; + + /** + * Moves a key from the Source Table to the destination Table. + * + * @param key - Key to move. + * @param source - Source Table. + * @param dest - Destination Table. + * @throws IOException on Failure + */ + void move(byte[] key, Table source, Table dest) throws IOException; + + /** + * Moves a key from the Source Table to the destination Table and updates the + * destination to the new value. + * + * @param key - Key to move. + * @param value - new value to write to the destination table. + * @param source - Source Table. + * @param dest - Destination Table. + * @throws IOException on Failure + */ + void move(byte[] key, byte[] value, Table source, Table dest) + throws IOException; + + /** + * Moves a key from the Source Table to the destination Table and updates the + * destination with the new key name and value. + * This is similar to deleting an entry in one table and adding an entry in + * another table, here it is done atomically. + * + * @param sourceKey - Key to move. + * @param destKey - Destination key name. + * @param value - new value to write to the destination table. + * @param source - Source Table. + * @param dest - Destination Table. + * @throws IOException on Failure + */ + void move(byte[] sourceKey, byte[] destKey, byte[] value, + Table source, Table dest) throws IOException; + + /** + * Returns an estimated count of keys in this DB. + * + * @return long, estimate of keys in the DB. + */ + long getEstimatedKeyCount() throws IOException; + + /** + * Writes a transaction into the DB using the default write Options. + * @param batch - Batch to write. + */ + void write(WriteBatch batch) throws IOException; + +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBStoreBuilder.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBStoreBuilder.java new file mode 100644 index 00000000000..5ba9b9c87d3 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBStoreBuilder.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; + +import org.apache.hadoop.hdfs.DFSUtil; +import org.eclipse.jetty.util.StringUtil; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyOptions; +import org.rocksdb.DBOptions; +import org.rocksdb.RocksDB; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DEFAULT_DB_PROFILE; + +/** + * DBStore Builder. + */ +public final class DBStoreBuilder { + private static final Logger LOG = + LoggerFactory.getLogger(DBStoreBuilder.class); + private Set tables; + private DBProfile dbProfile; + private DBOptions rocksDBOption; + private String dbname; + private Path dbPath; + private List tableNames; + private Configuration configuration; + + private DBStoreBuilder(Configuration configuration) { + tables = new HashSet<>(); + tableNames = new LinkedList<>(); + this.configuration = configuration; + } + + public static DBStoreBuilder newBuilder(Configuration configuration) { + return new DBStoreBuilder(configuration); + } + + public DBStoreBuilder setProfile(DBProfile profile) { + dbProfile = profile; + return this; + } + + public DBStoreBuilder setName(String name) { + dbname = name; + return this; + } + + public DBStoreBuilder addTable(String tableName) { + tableNames.add(tableName); + return this; + } + + public DBStoreBuilder addTable(String tableName, ColumnFamilyOptions option) + throws IOException { + TableConfig tableConfig = new TableConfig(tableName, option); + if (!tables.add(tableConfig)) { + String message = "Unable to add the table: " + tableName + + ". Please check if this table name is already in use."; + LOG.error(message); + throw new IOException(message); + } + LOG.info("using custom profile for table: {}", tableName); + return this; + } + + public DBStoreBuilder setDBOption(DBOptions option) { + rocksDBOption = option; + return this; + } + + public DBStoreBuilder setPath(Path path) { + Preconditions.checkNotNull(path); + dbPath = path; + return this; + } + + /** + * Builds a DBStore instance and returns that. + * + * @return DBStore + */ + public DBStore build() throws IOException { + if(StringUtil.isBlank(dbname) || (dbPath == null)) { + LOG.error("Required Parameter missing."); + throw new IOException("Required parameter is missing. Please make sure " + + "sure Path and DB name is provided."); + } + processDBProfile(); + processTables(); + DBOptions options = getDbProfile(); + File dbFile = getDBFile(); + if (!dbFile.getParentFile().exists()) { + throw new IOException("The DB destination directory should exist."); + } + return new RDBStore(dbFile, options, tables); + } + + /** + * if the DBProfile is not set, we will default to using default from the + * config file. + */ + private void processDBProfile() { + if (dbProfile == null) { + dbProfile = this.configuration.getEnum(HDDS_DB_PROFILE, + HDDS_DEFAULT_DB_PROFILE); + } + } + + private void processTables() throws IOException { + if (tableNames.size() > 0) { + for (String name : tableNames) { + addTable(name, dbProfile.getColumnFamilyOptions()); + LOG.info("Using default column profile:{} for Table:{}", + dbProfile.toString(), name); + } + } + addTable(DFSUtil.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY), + dbProfile.getColumnFamilyOptions()); + LOG.info("Using default column profile:{} for Table:{}", + dbProfile.toString(), + DFSUtil.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY)); + } + + private DBOptions getDbProfile() { + if (rocksDBOption != null) { + return rocksDBOption; + } + DBOptions option = null; + if (StringUtil.isNotBlank(dbname)) { + List columnFamilyDescriptors = new LinkedList<>(); + + for (TableConfig tc : tables) { + columnFamilyDescriptors.add(tc.getDescriptor()); + } + + if (columnFamilyDescriptors.size() > 0) { + try { + option = DBConfigFromFile.readFromFile(dbname, + columnFamilyDescriptors); + if(option != null) { + LOG.info("Using Configs from {}.ini file", dbname); + } + } catch (IOException ex) { + LOG.info("Unable to read ROCKDB config", ex); + } + } + } + + if (option == null) { + LOG.info("Using default options. {}", dbProfile.toString()); + return dbProfile.getDBOptions(); + } + return option; + } + + private File getDBFile() throws IOException { + if (dbPath == null) { + LOG.error("DB path is required."); + throw new IOException("A Path to for DB file is needed."); + } + + if (StringUtil.isBlank(dbname)) { + LOG.error("DBName is a required."); + throw new IOException("A valid DB name is required."); + } + return Paths.get(dbPath.toString(), dbname).toFile(); + } + +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBStore.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBStore.java new file mode 100644 index 00000000000..5078b3e96cf --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBStore.java @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.metrics2.util.MBeans; +import org.apache.hadoop.utils.RocksDBStoreMBean; +import org.apache.ratis.shaded.com.google.common.annotations.VisibleForTesting; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; + +import org.rocksdb.DBOptions; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.rocksdb.WriteBatch; +import org.rocksdb.WriteOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.management.ObjectName; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Hashtable; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * RocksDB Store that supports creating Tables in DB. + */ +public class RDBStore implements DBStore { + private static final Logger LOG = + LoggerFactory.getLogger(RDBStore.class); + private final RocksDB db; + private final File dbLocation; + private final WriteOptions writeOptions; + private final DBOptions dbOptions; + private final Hashtable handleTable; + private ObjectName statMBeanName; + + public RDBStore(File dbFile, DBOptions options, Set families) + throws IOException { + Preconditions.checkNotNull(dbFile, "DB file location cannot be null"); + Preconditions.checkNotNull(families); + Preconditions.checkArgument(families.size() > 0); + handleTable = new Hashtable<>(); + + final List columnFamilyDescriptors = + new ArrayList<>(); + final List columnFamilyHandles = new ArrayList<>(); + + for (TableConfig family : families) { + columnFamilyDescriptors.add(family.getDescriptor()); + } + + dbOptions = options; + dbLocation = dbFile; + // TODO: Read from the next Config. + writeOptions = new WriteOptions(); + + try { + db = RocksDB.open(dbOptions, dbLocation.getAbsolutePath(), + columnFamilyDescriptors, columnFamilyHandles); + + for (int x = 0; x < columnFamilyHandles.size(); x++) { + handleTable.put( + DFSUtil.bytes2String(columnFamilyHandles.get(x).getName()), + columnFamilyHandles.get(x)); + } + + if (dbOptions.statistics() != null) { + Map jmxProperties = new HashMap<>(); + jmxProperties.put("dbName", dbFile.getName()); + statMBeanName = MBeans.register("Ozone", "RocksDbStore", jmxProperties, + new RocksDBStoreMBean(dbOptions.statistics())); + if (statMBeanName == null) { + LOG.warn("jmx registration failed during RocksDB init, db path :{}", + dbFile.getAbsolutePath()); + } + } + + } catch (RocksDBException e) { + throw toIOException( + "Failed init RocksDB, db path : " + dbFile.getAbsolutePath(), e); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("RocksDB successfully opened."); + LOG.debug("[Option] dbLocation= {}", dbLocation.getAbsolutePath()); + LOG.debug("[Option] createIfMissing = {}", options.createIfMissing()); + LOG.debug("[Option] maxOpenFiles= {}", options.maxOpenFiles()); + } + } + + public static IOException toIOException(String msg, RocksDBException e) { + String statusCode = e.getStatus() == null ? "N/A" : + e.getStatus().getCodeString(); + String errMessage = e.getMessage() == null ? "Unknown error" : + e.getMessage(); + String output = msg + "; status : " + statusCode + + "; message : " + errMessage; + return new IOException(output, e); + } + + @Override + public void compactDB() throws IOException { + if (db != null) { + try { + db.compactRange(); + } catch (RocksDBException e) { + throw toIOException("Failed to compact db", e); + } + } + } + + @Override + public void close() throws IOException { + + for (final ColumnFamilyHandle handle : handleTable.values()) { + handle.close(); + } + + if (statMBeanName != null) { + MBeans.unregister(statMBeanName); + statMBeanName = null; + } + + if (db != null) { + db.close(); + } + + if (dbOptions != null) { + dbOptions.close(); + } + + if (writeOptions != null) { + writeOptions.close(); + } + } + + @Override + public void move(byte[] key, Table source, Table dest) throws IOException { + RDBTable sourceTable; + RDBTable destTable; + if (source instanceof RDBTable) { + sourceTable = (RDBTable) source; + } else { + LOG.error("Unexpected Table type. Expected RocksTable Store for Source."); + throw new IOException("Unexpected TableStore Type in source. Expected " + + "RocksDBTable."); + } + + if (dest instanceof RDBTable) { + destTable = (RDBTable) dest; + } else { + LOG.error("Unexpected Table type. Expected RocksTable Store for Dest."); + throw new IOException("Unexpected TableStore Type in dest. Expected " + + "RocksDBTable."); + } + try (WriteBatch batch = new WriteBatch()) { + byte[] value = sourceTable.get(key); + batch.put(destTable.getHandle(), key, value); + batch.delete(sourceTable.getHandle(), key); + db.write(writeOptions, batch); + } catch (RocksDBException rockdbException) { + LOG.error("Move of key failed. Key:{}", DFSUtil.bytes2String(key)); + throw toIOException("Unable to move key: " + DFSUtil.bytes2String(key), + rockdbException); + } + } + + + @Override + public void move(byte[] key, byte[] value, Table source, + Table dest) throws IOException { + move(key, key, value, source, dest); + } + + @Override + public void move(byte[] sourceKey, byte[] destKey, byte[] value, Table source, + Table dest) throws IOException { + RDBTable sourceTable; + RDBTable destTable; + if (source instanceof RDBTable) { + sourceTable = (RDBTable) source; + } else { + LOG.error("Unexpected Table type. Expected RocksTable Store for Source."); + throw new IOException("Unexpected TableStore Type in source. Expected " + + "RocksDBTable."); + } + + if (dest instanceof RDBTable) { + destTable = (RDBTable) dest; + } else { + LOG.error("Unexpected Table type. Expected RocksTable Store for Dest."); + throw new IOException("Unexpected TableStore Type in dest. Expected " + + "RocksDBTable."); + } + try (WriteBatch batch = new WriteBatch()) { + batch.put(destTable.getHandle(), destKey, value); + batch.delete(sourceTable.getHandle(), sourceKey); + db.write(writeOptions, batch); + } catch (RocksDBException rockdbException) { + LOG.error("Move of key failed. Key:{}", DFSUtil.bytes2String(sourceKey)); + throw toIOException("Unable to move key: " + + DFSUtil.bytes2String(sourceKey), rockdbException); + } + } + + @Override + public long getEstimatedKeyCount() throws IOException { + try { + return db.getLongProperty("rocksdb.estimate-num-keys"); + } catch (RocksDBException e) { + throw toIOException("Unable to get the estimated count.", e); + } + } + + @Override + public void write(WriteBatch batch) throws IOException { + try { + db.write(writeOptions, batch); + } catch (RocksDBException e) { + throw toIOException("Unable to write the batch.", e); + } + } + + @VisibleForTesting + protected ObjectName getStatMBeanName() { + return statMBeanName; + } + + @Override + public Table getTable(String name) throws IOException { + ColumnFamilyHandle handle = handleTable.get(name); + if (handle == null) { + throw new IOException("No such table in this DB. TableName : " + name); + } + return new RDBTable(this.db, handle, this.writeOptions); + } + + @Override + public ArrayList
listTables() throws IOException { + ArrayList
returnList = new ArrayList<>(); + for (ColumnFamilyHandle handle : handleTable.values()) { + returnList.add(new RDBTable(db, handle, writeOptions)); + } + return returnList; + } +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBStoreIterator.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBStoreIterator.java new file mode 100644 index 00000000000..f1f2df62242 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBStoreIterator.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.hadoop.utils.db.Table.KeyValue; +import org.rocksdb.RocksIterator; + +import java.io.IOException; +import java.util.NoSuchElementException; +import java.util.function.Consumer; + +/** + * RocksDB store iterator. + */ +public class RDBStoreIterator implements TableIterator { + + private RocksIterator rocksDBIterator; + + public RDBStoreIterator(RocksIterator iterator) { + this.rocksDBIterator = iterator; + rocksDBIterator.seekToFirst(); + } + + @Override + public void forEachRemaining(Consumer action) { + while(hasNext()) { + action.accept(next()); + } + } + + @Override + public boolean hasNext() { + return rocksDBIterator.isValid(); + } + + @Override + public Table.KeyValue next() { + if (rocksDBIterator.isValid()) { + KeyValue value = KeyValue.create(rocksDBIterator.key(), rocksDBIterator + .value()); + rocksDBIterator.next(); + return value; + } + throw new NoSuchElementException("RocksDB Store has no more elements"); + } + + @Override + public void seekToFirst() { + rocksDBIterator.seekToFirst(); + } + + @Override + public void seekToLast() { + rocksDBIterator.seekToLast(); + } + + @Override + public KeyValue seek(byte[] key) { + rocksDBIterator.seek(key); + if (rocksDBIterator.isValid()) { + return KeyValue.create(rocksDBIterator.key(), + rocksDBIterator.value()); + } + return null; + } + + @Override + public void close() throws IOException { + rocksDBIterator.close(); + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBTable.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBTable.java new file mode 100644 index 00000000000..8cf6b3533fb --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBTable.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.hadoop.hdfs.DFSUtil; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.ReadOptions; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.rocksdb.WriteBatch; +import org.rocksdb.WriteOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +/** + * RocksDB implementation of ozone metadata store. + */ +public class RDBTable implements Table { + + private static final Logger LOG = + LoggerFactory.getLogger(RDBTable.class); + + private final RocksDB db; + private final ColumnFamilyHandle handle; + private final WriteOptions writeOptions; + + /** + * Constructs a TableStore. + * + * @param db - DBstore that we are using. + * @param handle - ColumnFamily Handle. + * @param writeOptions - RocksDB write Options. + */ + public RDBTable(RocksDB db, ColumnFamilyHandle handle, + WriteOptions writeOptions) { + this.db = db; + this.handle = handle; + this.writeOptions = writeOptions; + } + + /** + * Converts RocksDB exception to IOE. + * @param msg - Message to add to exception. + * @param e - Original Exception. + * @return IOE. + */ + public static IOException toIOException(String msg, RocksDBException e) { + String statusCode = e.getStatus() == null ? "N/A" : + e.getStatus().getCodeString(); + String errMessage = e.getMessage() == null ? "Unknown error" : + e.getMessage(); + String output = msg + "; status : " + statusCode + + "; message : " + errMessage; + return new IOException(output, e); + } + + /** + * Returns the Column family Handle. + * + * @return ColumnFamilyHandle. + */ + @Override + public ColumnFamilyHandle getHandle() { + return handle; + } + + @Override + public void put(byte[] key, byte[] value) throws IOException { + try { + db.put(handle, writeOptions, key, value); + } catch (RocksDBException e) { + LOG.error("Failed to write to DB. Key: {}", new String(key, + StandardCharsets.UTF_8)); + throw toIOException("Failed to put key-value to metadata " + + "store", e); + } + } + + @Override + public boolean isEmpty() throws IOException { + try (TableIterator keyIter = iterator()) { + keyIter.seekToFirst(); + return !keyIter.hasNext(); + } + } + + @Override + public byte[] get(byte[] key) throws IOException { + try { + return db.get(handle, key); + } catch (RocksDBException e) { + throw toIOException( + "Failed to get the value for the given key", e); + } + } + + @Override + public void delete(byte[] key) throws IOException { + try { + db.delete(handle, key); + } catch (RocksDBException e) { + throw toIOException("Failed to delete the given key", e); + } + } + + @Override + public void writeBatch(WriteBatch operation) throws IOException { + try { + db.write(writeOptions, operation); + } catch (RocksDBException e) { + throw toIOException("Batch write operation failed", e); + } + } + +// @Override +// public void iterate(byte[] from, EntryConsumer consumer) +// throws IOException { +// +// try (RocksIterator it = db.newIterator(handle)) { +// if (from != null) { +// it.seek(from); +// } else { +// it.seekToFirst(); +// } +// while (it.isValid()) { +// if (!consumer.consume(it.key(), it.value())) { +// break; +// } +// it.next(); +// } +// } +// } + + @Override + public TableIterator iterator() { + ReadOptions readOptions = new ReadOptions(); + return new RDBStoreIterator(db.newIterator(handle, readOptions)); + } + + @Override + public String getName() throws IOException { + try { + return DFSUtil.bytes2String(this.getHandle().getName()); + } catch (RocksDBException rdbEx) { + throw toIOException("Unable to get the table name.", rdbEx); + } + } + + @Override + public void close() throws Exception { + // Nothing do for a Column Family. + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/Table.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/Table.java new file mode 100644 index 00000000000..39425852926 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/Table.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.hadoop.classification.InterfaceStability; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.WriteBatch; + +import java.io.IOException; + +/** + * Interface for key-value store that stores ozone metadata. Ozone metadata is + * stored as key value pairs, both key and value are arbitrary byte arrays. Each + * Table Stores a certain kind of keys and values. This allows a DB to have + * different kind of tables. + */ +@InterfaceStability.Evolving +public interface Table extends AutoCloseable { + + /** + * Puts a key-value pair into the store. + * + * @param key metadata key + * @param value metadata value + */ + void put(byte[] key, byte[] value) throws IOException; + + /** + * @return true if the metadata store is empty. + * @throws IOException on Failure + */ + boolean isEmpty() throws IOException; + + /** + * Returns the value mapped to the given key in byte array or returns null + * if the key is not found. + * + * @param key metadata key + * @return value in byte array or null if the key is not found. + * @throws IOException on Failure + */ + byte[] get(byte[] key) throws IOException; + + /** + * Deletes a key from the metadata store. + * + * @param key metadata key + * @throws IOException on Failure + */ + void delete(byte[] key) throws IOException; + + /** + * Return the Column Family handle. TODO: This leaks an RockDB abstraction + * into Ozone code, cleanup later. + * + * @return ColumnFamilyHandle + */ + ColumnFamilyHandle getHandle(); + + /** + * A batch of PUT, DELETE operations handled as a single atomic write. + * + * @throws IOException write fails + */ + void writeBatch(WriteBatch operation) throws IOException; + + /** + * Returns the iterator for this metadata store. + * + * @return MetaStoreIterator + */ + TableIterator iterator(); + + /** + * Returns the Name of this Table. + * @return - Table Name. + * @throws IOException on failure. + */ + String getName() throws IOException; + + /** + * Class used to represent the key and value pair of a db entry. + */ + class KeyValue { + + private final byte[] key; + private final byte[] value; + + /** + * KeyValue Constructor, used to represent a key and value of a db entry. + * + * @param key - Key Bytes + * @param value - Value bytes + */ + private KeyValue(byte[] key, byte[] value) { + this.key = key; + this.value = value; + } + + /** + * Create a KeyValue pair. + * + * @param key - Key Bytes + * @param value - Value bytes + * @return KeyValue object. + */ + public static KeyValue create(byte[] key, byte[] value) { + return new KeyValue(key, value); + } + + /** + * Return key. + * + * @return byte[] + */ + public byte[] getKey() { + byte[] result = new byte[key.length]; + System.arraycopy(key, 0, result, 0, key.length); + return result; + } + + /** + * Return value. + * + * @return byte[] + */ + public byte[] getValue() { + byte[] result = new byte[value.length]; + System.arraycopy(value, 0, result, 0, value.length); + return result; + } + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TableConfig.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TableConfig.java new file mode 100644 index 00000000000..897028a821f --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TableConfig.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.hadoop.hdfs.DFSUtil; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyOptions; + +/** + * Class that maintains Table Configuration. + */ +public class TableConfig { + private final String name; + private final ColumnFamilyOptions columnFamilyOptions; + + + /** + * Constructs a Table Config. + * @param name - Name of the Table. + * @param columnFamilyOptions - Column Family options. + */ + public TableConfig(String name, ColumnFamilyOptions columnFamilyOptions) { + this.name = name; + this.columnFamilyOptions = columnFamilyOptions; + } + + /** + * Returns the Name for this Table. + * @return - Name String + */ + public String getName() { + return name; + } + + /** + * Returns a ColumnFamilyDescriptor for this table. + * @return ColumnFamilyDescriptor + */ + public ColumnFamilyDescriptor getDescriptor() { + return new ColumnFamilyDescriptor(DFSUtil.string2Bytes(name), + columnFamilyOptions); + } + + /** + * Returns Column family options for this Table. + * @return ColumnFamilyOptions used for the Table. + */ + public ColumnFamilyOptions getColumnFamilyOptions() { + return columnFamilyOptions; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + TableConfig that = (TableConfig) o; + return new EqualsBuilder() + .append(getName(), that.getName()) + .isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(17, 37) + .append(getName()) + .toHashCode(); + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TableIterator.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TableIterator.java new file mode 100644 index 00000000000..83a8f3c8385 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TableIterator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import java.io.Closeable; +import java.util.Iterator; + +/** + * Iterator for MetaDataStore DB. + * + * @param + */ +public interface TableIterator extends Iterator, Closeable { + + /** + * seek to first entry. + */ + void seekToFirst(); + + /** + * seek to last entry. + */ + void seekToLast(); + + /** + * Seek to the specific key. + * + * @param key - Bytes that represent the key. + * @return T. + */ + T seek(byte[] key); + +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/package-info.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/package-info.java new file mode 100644 index 00000000000..17d676d9283 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +/** + * Database interfaces for Ozone. + */ +package org.apache.hadoop.utils.db; \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/ratis/RatisHelper.java b/hadoop-hdds/common/src/main/java/org/apache/ratis/RatisHelper.java index df831161ca4..04bfeb2e848 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/ratis/RatisHelper.java +++ b/hadoop-hdds/common/src/main/java/org/apache/ratis/RatisHelper.java @@ -18,6 +18,7 @@ package org.apache.ratis; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.ozone.OzoneConfigKeys; @@ -28,9 +29,14 @@ import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.retry.RetryPolicies; +import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.rpc.RpcType; import org.apache.ratis.shaded.com.google.protobuf.ByteString; +import org.apache.ratis.shaded.proto.RaftProtos; +import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.SizeInBytes; +import org.apache.ratis.util.TimeDuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,8 +45,13 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.UUID; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY; + /** * Ratis helper methods. */ @@ -48,8 +59,19 @@ Logger LOG = LoggerFactory.getLogger(RatisHelper.class); static String toRaftPeerIdString(DatanodeDetails id) { - return id.getUuidString() + "_" + - id.getPort(DatanodeDetails.Port.Name.RATIS).getValue(); + return id.getUuidString(); + } + + static UUID toDatanodeId(String peerIdString) { + return UUID.fromString(peerIdString); + } + + static UUID toDatanodeId(RaftPeerId peerId) { + return toDatanodeId(peerId.toString()); + } + + static UUID toDatanodeId(RaftProtos.RaftPeerProto peerId) { + return toDatanodeId(RaftPeerId.valueOf(peerId.getId())); } static String toRaftPeerAddressString(DatanodeDetails id) { @@ -88,37 +110,49 @@ static RaftGroup emptyRaftGroup() { return EMPTY_GROUP; } - static RaftGroup newRaftGroup(List datanodes) { - final List newPeers = datanodes.stream() - .map(RatisHelper::toRaftPeer) - .collect(Collectors.toList()); - return RatisHelper.newRaftGroup(newPeers); - } - static RaftGroup newRaftGroup(Collection peers) { return peers.isEmpty()? emptyRaftGroup() : new RaftGroup(DUMMY_GROUP_ID, peers); } + static RaftGroup newRaftGroup(RaftGroupId groupId, + Collection peers) { + final List newPeers = peers.stream() + .map(RatisHelper::toRaftPeer) + .collect(Collectors.toList()); + return peers.isEmpty() ? new RaftGroup(groupId, Collections.emptyList()) + : new RaftGroup(groupId, newPeers); + } + static RaftGroup newRaftGroup(Pipeline pipeline) { - return newRaftGroup(toRaftPeers(pipeline)); + return new RaftGroup(pipeline.getId().getRaftGroupID(), + toRaftPeers(pipeline)); } - static RaftClient newRaftClient(RpcType rpcType, Pipeline pipeline) { + static RaftClient newRaftClient(RpcType rpcType, Pipeline pipeline, + RetryPolicy retryPolicy) { return newRaftClient(rpcType, toRaftPeerId(pipeline.getLeader()), - newRaftGroup(pipeline)); + newRaftGroup(pipeline.getId().getRaftGroupID(), pipeline.getMachines()), + retryPolicy); } - static RaftClient newRaftClient(RpcType rpcType, RaftPeer leader) { + static RaftClient newRaftClient(RpcType rpcType, RaftPeer leader, + RetryPolicy retryPolicy) { return newRaftClient(rpcType, leader.getId(), - newRaftGroup(new ArrayList<>(Arrays.asList(leader)))); + newRaftGroup(new ArrayList<>(Arrays.asList(leader))), retryPolicy); } - static RaftClient newRaftClient( - RpcType rpcType, RaftPeerId leader, RaftGroup group) { + static RaftClient newRaftClient(RpcType rpcType, RaftPeer leader, + RaftGroup group, RetryPolicy retryPolicy) { + return newRaftClient(rpcType, leader.getId(), group, retryPolicy); + } + + static RaftClient newRaftClient(RpcType rpcType, RaftPeerId leader, + RaftGroup group, RetryPolicy retryPolicy) { LOG.trace("newRaftClient: {}, leader={}, group={}", rpcType, leader, group); final RaftProperties properties = new RaftProperties(); RaftConfigKeys.Rpc.setType(properties, rpcType); + GrpcConfigKeys.setMessageSizeMax(properties, SizeInBytes.valueOf(OzoneConfigKeys.DFS_CONTAINER_CHUNK_MAX_SIZE)); @@ -126,6 +160,48 @@ static RaftClient newRaftClient( .setRaftGroup(group) .setLeaderId(leader) .setProperties(properties) + .setRetryPolicy(retryPolicy) .build(); } + + static RetryPolicy createRetryPolicy(Configuration conf) { + int maxRetryCount = + conf.getInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, + OzoneConfigKeys. + DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_DEFAULT); + long retryInterval = conf.getTimeDuration(OzoneConfigKeys. + DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY, OzoneConfigKeys. + DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT + .toInt(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); + long leaderElectionTimeout = conf.getTimeDuration( + DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, + DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT + .toInt(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); + long clientRequestTimeout = conf.getTimeDuration( + OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_KEY, + OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_DEFAULT + .toInt(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); + long retryCacheTimeout = conf.getTimeDuration( + OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY, + OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT + .toInt(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); + Preconditions + .assertTrue(maxRetryCount * retryInterval > 5 * leaderElectionTimeout, + "Please make sure dfs.ratis.client.request.max.retries * " + + "dfs.ratis.client.request.retry.interval > " + + "5 * dfs.ratis.leader.election.minimum.timeout.duration"); + Preconditions.assertTrue( + maxRetryCount * (retryInterval + clientRequestTimeout) + < retryCacheTimeout, + "Please make sure " + + "(dfs.ratis.client.request.max.retries * " + + "(dfs.ratis.client.request.retry.interval + " + + "dfs.ratis.client.request.timeout.duration)) " + + "< dfs.ratis.server.retry-cache.timeout.duration"); + TimeDuration sleepDuration = + TimeDuration.valueOf(retryInterval, TimeUnit.MILLISECONDS); + RetryPolicy retryPolicy = RetryPolicies + .retryUpToMaximumCountWithFixedSleep(maxRetryCount, sleepDuration); + return retryPolicy; + } } diff --git a/hadoop-hdds/common/src/main/proto/DatanodeContainerProtocol.proto b/hadoop-hdds/common/src/main/proto/DatanodeContainerProtocol.proto index 53da18abb54..ba0d2d445eb 100644 --- a/hadoop-hdds/common/src/main/proto/DatanodeContainerProtocol.proto +++ b/hadoop-hdds/common/src/main/proto/DatanodeContainerProtocol.proto @@ -99,7 +99,7 @@ enum Type { PutSmallFile = 15; GetSmallFile = 16; CloseContainer = 17; - + GetCommittedBlockLength = 18; } @@ -132,6 +132,13 @@ enum Result { DELETE_ON_OPEN_CONTAINER = 26; CLOSED_CONTAINER_RETRY = 27; INVALID_CONTAINER_STATE = 28; + DISK_OUT_OF_SPACE = 29; + CONTAINER_ALREADY_EXISTS = 30; + CONTAINER_METADATA_ERROR = 31; + CONTAINER_FILES_CREATE_ERROR = 32; + CONTAINER_CHECKSUM_ERROR = 33; + UNKNOWN_CONTAINER_TYPE = 34; + BLOCK_NOT_COMMITTED = 35; } /** @@ -158,78 +165,81 @@ enum ContainerLifeCycleState { } message ContainerCommandRequestProto { - required Type cmdType = 1; // Type of the command + required Type cmdType = 1; // Type of the command // A string that identifies this command, we generate Trace ID in Ozone // frontend and this allows us to trace that command all over ozone. - optional string traceID = 2; + optional string traceID = 2; + + required int64 containerID = 3; + required string datanodeUuid = 4; // One of the following command is available when the corresponding // cmdType is set. At the protocol level we allow only // one command in each packet. // TODO : Upgrade to Protobuf 2.6 or later. - optional CreateContainerRequestProto createContainer = 3; - optional ReadContainerRequestProto readContainer = 4; - optional UpdateContainerRequestProto updateContainer = 5; - optional DeleteContainerRequestProto deleteContainer = 6; - optional ListContainerRequestProto listContainer = 7; + optional CreateContainerRequestProto createContainer = 5; + optional ReadContainerRequestProto readContainer = 6; + optional UpdateContainerRequestProto updateContainer = 7; + optional DeleteContainerRequestProto deleteContainer = 8; + optional ListContainerRequestProto listContainer = 9; + optional CloseContainerRequestProto closeContainer = 10; + + optional PutKeyRequestProto putKey = 11; + optional GetKeyRequestProto getKey = 12; + optional DeleteKeyRequestProto deleteKey = 13; + optional ListKeyRequestProto listKey = 14; - optional PutKeyRequestProto putKey = 8; - optional GetKeyRequestProto getKey = 9; - optional DeleteKeyRequestProto deleteKey = 10; - optional ListKeyRequestProto listKey = 11; + optional ReadChunkRequestProto readChunk = 15; + optional WriteChunkRequestProto writeChunk = 16; + optional DeleteChunkRequestProto deleteChunk = 17; + optional ListChunkRequestProto listChunk = 18; - optional ReadChunkRequestProto readChunk = 12; - optional WriteChunkRequestProto writeChunk = 13; - optional DeleteChunkRequestProto deleteChunk = 14; - optional ListChunkRequestProto listChunk = 15; + optional PutSmallFileRequestProto putSmallFile = 19; + optional GetSmallFileRequestProto getSmallFile = 20; - optional PutSmallFileRequestProto putSmallFile = 16; - optional GetSmallFileRequestProto getSmallFile = 17; - optional CloseContainerRequestProto closeContainer = 18; - required string datanodeUuid = 19; + optional GetCommittedBlockLengthRequestProto getCommittedBlockLength = 21; } message ContainerCommandResponseProto { - required Type cmdType = 1; - optional string traceID = 2; + required Type cmdType = 1; + optional string traceID = 2; - optional CreateContainerResponseProto createContainer = 3; - optional ReadContainerResponseProto readContainer = 4; - optional UpdateContainerResponseProto updateContainer = 5; - optional DeleteContainerResponseProto deleteContainer = 6; - optional ListContainerResponseProto listContainer = 7; + required Result result = 3; + optional string message = 4; - optional PutKeyResponseProto putKey = 8; - optional GetKeyResponseProto getKey = 9; - optional DeleteKeyResponseProto deleteKey = 10; - optional ListKeyResponseProto listKey = 11; + optional CreateContainerResponseProto createContainer = 5; + optional ReadContainerResponseProto readContainer = 6; + optional UpdateContainerResponseProto updateContainer = 7; + optional DeleteContainerResponseProto deleteContainer = 8; + optional ListContainerResponseProto listContainer = 9; + optional CloseContainerResponseProto closeContainer = 10; - optional WriteChunkResponseProto writeChunk = 12; - optional ReadChunkResponseProto readChunk = 13; - optional DeleteChunkResponseProto deleteChunk = 14; - optional ListChunkResponseProto listChunk = 15; + optional PutKeyResponseProto putKey = 11; + optional GetKeyResponseProto getKey = 12; + optional DeleteKeyResponseProto deleteKey = 13; + optional ListKeyResponseProto listKey = 14; - required Result result = 17; - optional string message = 18; + optional WriteChunkResponseProto writeChunk = 15; + optional ReadChunkResponseProto readChunk = 16; + optional DeleteChunkResponseProto deleteChunk = 17; + optional ListChunkResponseProto listChunk = 18; - optional PutSmallFileResponseProto putSmallFile = 19; - optional GetSmallFileResponseProto getSmallFile = 20; - optional CloseContainerResponseProto closeContainer = 21; + optional PutSmallFileResponseProto putSmallFile = 19; + optional GetSmallFileResponseProto getSmallFile = 20; + optional GetCommittedBlockLengthResponseProto getCommittedBlockLength = 21; } message ContainerData { required int64 containerID = 1; repeated KeyValue metadata = 2; - optional string dbPath = 3; optional string containerPath = 4; optional int64 bytesUsed = 6; optional int64 size = 7; - optional int64 keyCount = 8; + optional int64 blockCount = 8; optional ContainerLifeCycleState state = 9 [default = OPEN]; optional ContainerType containerType = 10 [default = KeyValueContainer]; - optional string containerDBType = 11; } enum ContainerType { @@ -239,14 +249,14 @@ enum ContainerType { // Container Messages. message CreateContainerRequestProto { - required ContainerData containerData = 1; + repeated KeyValue metadata = 2; + optional ContainerType containerType = 3 [default = KeyValueContainer]; } message CreateContainerResponseProto { } message ReadContainerRequestProto { - required int64 containerID = 1; } message ReadContainerResponseProto { @@ -254,15 +264,14 @@ message ReadContainerResponseProto { } message UpdateContainerRequestProto { - required ContainerData containerData = 1; - optional bool forceUpdate = 2 [default = false]; + repeated KeyValue metadata = 2; + optional bool forceUpdate = 3 [default = false]; } message UpdateContainerResponseProto { } message DeleteContainerRequestProto { - required int64 containerID = 1; optional bool forceDelete = 2 [default = false]; } @@ -270,7 +279,6 @@ message DeleteContainerResponseProto { } message ListContainerRequestProto { - required int64 startContainerID = 1; optional uint32 count = 2; // Max Results to return } @@ -279,7 +287,6 @@ message ListContainerResponseProto { } message CloseContainerRequestProto { - required int64 containerID = 1; } message CloseContainerResponseProto { @@ -292,6 +299,7 @@ message KeyData { optional int64 flags = 2; // for future use. repeated KeyValue metadata = 3; repeated ChunkInfo chunks = 4; + optional int64 size = 5; } // Key Messages. @@ -300,10 +308,11 @@ message PutKeyRequestProto { } message PutKeyResponseProto { + required GetCommittedBlockLengthResponseProto committedBlockLength = 1; } message GetKeyRequestProto { - required KeyData keyData = 1; + required DatanodeBlockID blockID = 1; } message GetKeyResponseProto { @@ -315,11 +324,19 @@ message DeleteKeyRequestProto { required DatanodeBlockID blockID = 1; } +message GetCommittedBlockLengthRequestProto { + required DatanodeBlockID blockID = 1; +} + +message GetCommittedBlockLengthResponseProto { + required DatanodeBlockID blockID = 1; + required int64 blockLength = 2; +} + message DeleteKeyResponseProto { } message ListKeyRequestProto { - required int64 containerID = 1; optional int64 startLocalID = 2; required uint32 count = 3; @@ -413,16 +430,22 @@ message CopyContainerRequestProto { } message CopyContainerResponseProto { - required string archiveName = 1; + required int64 containerID = 1; required uint64 readOffset = 2; required uint64 len = 3; required bool eof = 4; - repeated bytes data = 5; + required bytes data = 5; optional int64 checksum = 6; } service XceiverClientProtocolService { // A client-to-datanode RPC to send container commands rpc send(stream ContainerCommandRequestProto) returns - (stream ContainerCommandResponseProto) {} + (stream ContainerCommandResponseProto) {}; + +} + +service IntraDatanodeProtocolService { + // An intradatanode service to copy the raw containerdata betwen nodes + rpc download (CopyContainerRequestProto) returns (stream CopyContainerResponseProto); } \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/proto/hdds.proto b/hadoop-hdds/common/src/main/proto/hdds.proto index a5ce9949402..dedc57bb76c 100644 --- a/hadoop-hdds/common/src/main/proto/hdds.proto +++ b/hadoop-hdds/common/src/main/proto/hdds.proto @@ -40,13 +40,17 @@ message Port { required uint32 value = 2; } +message PipelineID { + required string id = 1; +} + message Pipeline { required string leaderID = 1; repeated DatanodeDetailsProto members = 2; optional LifeCycleState state = 3 [default = OPEN]; optional ReplicationType type = 4 [default = STAND_ALONE]; optional ReplicationFactor factor = 5 [default = ONE]; - optional string name = 6; + required PipelineID id = 6; } message KeyValue { @@ -129,7 +133,7 @@ enum LifeCycleEvent { message SCMContainerInfo { required int64 containerID = 1; required LifeCycleState state = 2; - optional string pipelineName = 3; + optional PipelineID pipelineID = 3; // This is not total size of container, but space allocated by SCM for // clients to write blocks required uint64 allocatedBytes = 4; @@ -167,6 +171,20 @@ enum ReplicationFactor { THREE = 3; } +enum ScmOps { + allocateBlock = 1; + keyBlocksInfoList = 2; + getScmInfo = 3; + deleteBlock = 4; + createReplicationPipeline = 5; + allocateContainer = 6; + getContainer = 7; + getContainerWithPipeline = 8; + listContainer = 9; + deleteContainer = 10; + queryNode = 11; +} + /** * Block ID that uniquely identify a block by SCM. */ diff --git a/hadoop-hdds/common/src/main/resources/hdds-version-info.properties b/hadoop-hdds/common/src/main/resources/hdds-version-info.properties new file mode 100644 index 00000000000..2cbd817ebbf --- /dev/null +++ b/hadoop-hdds/common/src/main/resources/hdds-version-info.properties @@ -0,0 +1,26 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +version=${declared.hdds.version} +revision=${version-info.scm.commit} +branch=${version-info.scm.branch} +user=${user.name} +date=${version-info.build.time} +url=${version-info.scm.uri} +srcChecksum=${version-info.source.md5} +protocVersion=${protobuf.version} diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 530fb09b24a..a74124e30e0 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -62,6 +62,29 @@ this not set. Ideally, this should be mapped to a fast disk like an SSD. + + hdds.datanode.dir + + OZONE, CONTAINER, STORAGE, MANAGEMENT + Determines where on the local filesystem HDDS data will be + stored. Defaults to dfs.datanode.data.dir if not specified. + The directories should be tagged with corresponding storage types + ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for storage policies. The default + storage type will be DISK if the directory does not have a storage type + tagged explicitly. + + + + hdds.datanode.volume.choosing.policy + + OZONE, CONTAINER, STORAGE, MANAGEMENT + + The class name of the policy for choosing volumes in the list of + directories. Defaults to + org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy. + This volume choosing policy selects volumes in a round-robin order. + + dfs.container.ratis.enabled false @@ -71,15 +94,6 @@ the replication pipeline supported by ozone. - - dfs.container.grpc.enabled - false - OZONE, MANAGEMENT, PIPELINE, RATIS - Ozone supports different kinds of replication pipelines - protocols. grpc is one of the replication pipeline protocol supported by - ozone. - - dfs.container.ratis.ipc 9858 @@ -112,6 +126,15 @@ will use for writing chunks (60 by default). + + dfs.container.ratis.replication.level + MAJORITY + OZONE, RATIS + Replication level to be used by datanode for submitting a + container command to ratis. Available replication levels are ALL and + MAJORTIY, MAJORITY is used as the default replication level. + + dfs.container.ratis.segment.size 1073741824 @@ -134,6 +157,25 @@ OZONE, RATIS, MANAGEMENT The timeout duration for ratis client request. + + dfs.ratis.client.request.max.retries + 180 + OZONE, RATIS, MANAGEMENT + Number of retries for ratis client request. + + + dfs.ratis.client.request.retry.interval + 100ms + OZONE, RATIS, MANAGEMENT + Interval between successive retries for a ratis client request. + + + + dfs.ratis.server.retry-cache.timeout.duration + 600000ms + OZONE, RATIS, MANAGEMENT + Retry Cache entry timeout for ratis server. + dfs.ratis.server.request.timeout.duration 3s @@ -141,13 +183,46 @@ The timeout duration for ratis server request. - ozone.container.report.interval + dfs.ratis.leader.election.minimum.timeout.duration + 1s + OZONE, RATIS, MANAGEMENT + The minimum timeout duration for ratis leader election. + Default is 1s. + + + + dfs.ratis.server.failure.duration + 120s + OZONE, RATIS, MANAGEMENT + The timeout duration for ratis server failure detection, + once the threshold has reached, the ratis state machine will be informed + about the failure in the ratis ring + + + + hdds.node.report.interval + 60000ms + OZONE, CONTAINER, MANAGEMENT + Time interval of the datanode to send node report. Each + datanode periodically send node report to SCM. Unit could be + defined with postfix (ns,ms,s,m,h,d) + + + hdds.container.report.interval 60000ms OZONE, CONTAINER, MANAGEMENT Time interval of the datanode to send container report. Each - datanode periodically send container report upon receive - sendContainerReport from SCM. Unit could be defined with - postfix (ns,ms,s,m,h,d) + datanode periodically send container report to SCM. Unit could be + defined with postfix (ns,ms,s,m,h,d) + + + hdds.command.status.report.interval + 60000ms + OZONE, CONTAINER, MANAGEMENT + Time interval of the datanode to send status of command + execution. Each datanode periodically the execution status of commands + received from SCM to SCM. Unit could be defined with postfix + (ns,ms,s,m,h,d) @@ -213,6 +288,22 @@ Connection timeout for Ozone client in milliseconds. + + ozone.client.max.retries + 50 + OZONE, CLIENT + Maximum number of retries by Ozone Client on encountering + exception while fetching committed block length. + + + + ozone.client.retry.interval + 200ms + OZONE, CLIENT + Interval between retries by Ozone Client on encountering + exception while fetching committed block length. + + ozone.client.protocol org.apache.hadoop.ozone.client.rpc.RpcClient @@ -247,17 +338,6 @@ Please note: By default ozone is disabled on a hadoop cluster. - - ozone.handler.type - distributed - OZONE, REST - - Tells ozone which storage handler to use. The possible values are: - distributed - The Ozone distributed storage handler, which speaks to - OM/SCM on the backend and provides REST services to clients. - local - Local Storage handler strictly for testing - To be removed. - - ozone.key.deleting.limit.per.task 1000 @@ -386,16 +466,6 @@ Default user permissions used in OM. - - ozone.localstorage.root - ${hadoop.tmp.dir}/ozone - OZONE, DEBUG - - This is used only for testing purposes. This value is used by the local - storage handler to simulate a REST backend. This is useful only when - debugging the REST front end independent of OM and SCM. To be removed. - - ozone.metadata.dirs @@ -529,13 +599,13 @@ The port number of the Ozone SCM client service. - ozone.scm.container.deletion-choosing.policy + ozone.scm.keyvalue.container.deletion-choosing.policy org.apache.hadoop.ozone.container.common.impl.TopNOrderedContainerDeletionChoosingPolicy OZONE, MANAGEMENT - The policy used for choosing desire containers for block deletion. + The policy used for choosing desired keyvalue containers for block deletion. Datanode selects some containers to process block deletion in a certain interval defined by ozone.block.deleting.service.interval. The number of containers to process in each interval is defined @@ -572,12 +642,11 @@ - ozone.scm.container.size.gb - 5 + ozone.scm.container.size + 5GB OZONE, PERFORMANCE, MANAGEMENT - Default container size used by Ozone. This value is specified - in GB. + Default container size used by Ozone. There are two considerations while picking this number. The speed at which a container can be replicated, determined by the network speed and the metadata that each container generates. So selecting a large number @@ -665,7 +734,7 @@ - ozone.scm.heartbeat.interval + hdds.heartbeat.interval 30s OZONE, MANAGEMENT @@ -776,7 +845,7 @@ ozone.scm.names - OZONE + OZONE, REQUIRED The value of this property is a set of DNS | DNS:PORT | IP Address | IP:PORT. Written as a comma separated string. e.g. scm1, @@ -906,9 +975,9 @@ - ozone.scm.container.close.threshold + hdds.container.close.threshold 0.9f - OZONE, SCM + OZONE, DATANODE This determines the threshold to be used for closing a container. When the container used percentage reaches this threshold, @@ -1028,4 +1097,121 @@ - \ No newline at end of file + + hdds.write.lock.reporting.threshold.ms + 5000 + OZONE, DATANODE, MANAGEMENT + + When a write lock is held for a long time, this will be logged as the + lock is released. This sets how long the lock must be held for logging + to occur. + + + + + hdds.lock.suppress.warning.interval.ms + 10000 + OZONE, DATANODE, MANAGEMENT + + Instrumentation reporting long critical sections will suppress + consecutive warnings within this interval. + + + + + hdds.command.status.report.interval + 30s + OZONE, DATANODE, MANAGEMENT + Time interval of the datanode to send status of commands + executed since last report. Unit could be defined with + postfix (ns,ms,s,m,h,d) + + + ozone.scm.pipeline.creation.lease.timeout + 60s + OZONE, SCM, PIPELINE + + Pipeline creation timeout in milliseconds to be used by SCM. When + BEGIN_CREATE event happens the pipeline is moved from ALLOCATED to + CREATING state, SCM will now wait for the configured amount of time + to get COMPLETE_CREATE event if it doesn't receive it will move the + pipeline to DELETING. + + + + + hdds.scm.chillmode.threshold.pct + 0.99 + HDDS,SCM,OPERATION + % of containers which should have at least one + reported replica before SCM comes out of chill mode. + + + + + hdds.scm.chillmode.enabled + true + HDDS,SCM,OPERATION + Boolean value to enable or disable SCM chill mode. + + + + + hdds.container.action.max.limit + 20 + DATANODE + + Maximum number of Container Actions sent by the datanode to SCM in a + single heartbeat. + + + + + hdds.pipeline.action.max.limit + 20 + DATANODE + + Maximum number of Pipeline Actions sent by the datanode to SCM in a + single heartbeat. + + + + hdds.scm.watcher.timeout + 10m + OZONE, SCM, MANAGEMENT + + Timeout for the watchers of the HDDS SCM CommandWatchers. After this + duration the Copy/Delete container commands will be sent again to the + datanode unless the datanode confirms the completion. + + + + + hdds.db.profile + SSD + OZONE, OM, PERFORMANCE, REQUIRED + This property allows user to pick a configuration + that tunes the RocksDB settings for the hardware it is running + on. Right now, we have SSD and DISK as profile options. + + + + hdds.datanode.replication.work.dir + DATANODE + Temporary which is used during the container replication + betweeen datanodes. Should have enough space to store multiple container + (in compressed format), but doesn't require fast io access such as SSD. + + + + + hdds.lock.max.concurrency + 100 + HDDS + Locks in HDDS/Ozone uses object pool to maintain active locks + in the system, this property defines the max limit for the locks that + will be maintained in the pool. + + + + diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/audit/TestOzoneAuditLogger.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/audit/TestOzoneAuditLogger.java index d3cc9e4ce8a..6c59de61ac8 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/audit/TestOzoneAuditLogger.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/audit/TestOzoneAuditLogger.java @@ -28,6 +28,7 @@ import java.io.File; import java.io.IOException; import java.util.List; +import java.util.Map; import static org.junit.Assert.assertTrue; @@ -36,10 +37,29 @@ */ public class TestOzoneAuditLogger { - private static final Logger LOG = LoggerFactory.getLogger - (TestOzoneAuditLogger.class.getName()); - private static AuditLogger AUDIT = new AuditLogger(AuditLoggerType.OMLOGGER); - public DummyEntity auditableObj = new DummyEntity(); + private static final Logger LOG = + LoggerFactory.getLogger(TestOzoneAuditLogger.class.getName()); + + private static final AuditLogger AUDIT = + new AuditLogger(AuditLoggerType.OMLOGGER); + + private static final String SUCCESS = AuditEventStatus.SUCCESS.name(); + private static final String FAILURE = AuditEventStatus.FAILURE.name(); + + private static final Map PARAMS = + new DummyEntity().toAuditMap(); + + private static final AuditMessage WRITE_FAIL_MSG = new AuditMessage("john", + "192.168.0.1", DummyAction.CREATE_VOLUME.name(), PARAMS, FAILURE); + + private static final AuditMessage WRITE_SUCCESS_MSG = new AuditMessage("john", + "192.168.0.1", DummyAction.CREATE_VOLUME.name(), PARAMS, SUCCESS); + + private static final AuditMessage READ_FAIL_MSG = new AuditMessage("john", + "192.168.0.1", DummyAction.READ_VOLUME.name(), PARAMS, FAILURE); + + private static final AuditMessage READ_SUCCESS_MSG = new AuditMessage("john", + "192.168.0.1", DummyAction.READ_VOLUME.name(), PARAMS, SUCCESS); @BeforeClass public static void setUp(){ @@ -48,13 +68,13 @@ public static void setUp(){ @AfterClass public static void tearDown() { - File file = new File("audit.log"); - if (FileUtils.deleteQuietly(file)) { - LOG.info(file.getName() + - " has been deleted as all tests have completed."); - } else { - LOG.info("audit.log could not be deleted."); - } + File file = new File("audit.log"); + if (FileUtils.deleteQuietly(file)) { + LOG.info(file.getName() + + " has been deleted as all tests have completed."); + } else { + LOG.info("audit.log could not be deleted."); + } } /** @@ -62,20 +82,31 @@ public static void tearDown() { */ @Test public void logInfoWriteSuccess() throws IOException { - AUDIT.logWriteSuccess(DummyAction.CREATE_VOLUME, auditableObj.toAuditMap(), Level.INFO); - String expected = "[INFO ] OMAudit - CREATE_VOLUME [ key1=\"value1\" " + - "key2=\"value2\"] SUCCESS"; + AUDIT.logWriteSuccess(Level.INFO, WRITE_SUCCESS_MSG); + String expected = + "[INFO ] OMAudit - " + WRITE_SUCCESS_MSG.getFormattedMessage(); verifyLog(expected); } /** - * Test to verify default log level is INFO + * Test to verify default log level is INFO when logging success events. */ @Test - public void verifyDefaultLogLevel() throws IOException { - AUDIT.logWriteSuccess(DummyAction.CREATE_VOLUME, auditableObj.toAuditMap()); - String expected = "[INFO ] OMAudit - CREATE_VOLUME [ key1=\"value1\" " + - "key2=\"value2\"] SUCCESS"; + public void verifyDefaultLogLevelForSuccess() throws IOException { + AUDIT.logWriteSuccess(WRITE_SUCCESS_MSG); + String expected = + "[INFO ] OMAudit - " + WRITE_SUCCESS_MSG.getFormattedMessage(); + verifyLog(expected); + } + + /** + * Test to verify default log level is ERROR when logging failure events. + */ + @Test + public void verifyDefaultLogLevelForFailure() throws IOException { + AUDIT.logWriteFailure(WRITE_FAIL_MSG); + String expected = + "[ERROR] OMAudit - " + WRITE_FAIL_MSG.getFormattedMessage(); verifyLog(expected); } @@ -84,9 +115,9 @@ public void verifyDefaultLogLevel() throws IOException { */ @Test public void logErrorWriteFailure() throws IOException { - AUDIT.logWriteFailure(DummyAction.CREATE_VOLUME, auditableObj.toAuditMap(), Level.ERROR); - String expected = "[ERROR] OMAudit - CREATE_VOLUME [ key1=\"value1\" " + - "key2=\"value2\"] FAILURE"; + AUDIT.logWriteFailure(Level.ERROR, WRITE_FAIL_MSG); + String expected = + "[ERROR] OMAudit - " + WRITE_FAIL_MSG.getFormattedMessage(); verifyLog(expected); } @@ -95,12 +126,11 @@ public void logErrorWriteFailure() throws IOException { */ @Test public void notLogReadEvents() throws IOException { - AUDIT.logReadSuccess(DummyAction.READ_VOLUME, auditableObj.toAuditMap(), Level.INFO); - AUDIT.logReadFailure(DummyAction.READ_VOLUME, auditableObj.toAuditMap(), Level.INFO); - AUDIT.logReadFailure(DummyAction.READ_VOLUME, auditableObj.toAuditMap(), Level.ERROR); - AUDIT.logReadFailure(DummyAction.READ_VOLUME, auditableObj.toAuditMap(), Level.ERROR, - new Exception("test")); - verifyLog(null); + AUDIT.logReadSuccess(Level.INFO, READ_SUCCESS_MSG); + AUDIT.logReadFailure(Level.INFO, READ_FAIL_MSG); + AUDIT.logReadFailure(Level.ERROR, READ_FAIL_MSG); + AUDIT.logReadFailure(Level.ERROR, READ_FAIL_MSG, new Exception("test")); + verifyNoLog(); } /** @@ -108,24 +138,40 @@ public void notLogReadEvents() throws IOException { */ @Test public void notLogDebugEvents() throws IOException { - AUDIT.logWriteSuccess(DummyAction.CREATE_VOLUME, auditableObj.toAuditMap(), Level.DEBUG); - AUDIT.logReadSuccess(DummyAction.READ_VOLUME, auditableObj.toAuditMap(), Level.DEBUG); - verifyLog(null); + AUDIT.logWriteSuccess(Level.DEBUG, WRITE_SUCCESS_MSG); + AUDIT.logReadSuccess(Level.DEBUG, READ_SUCCESS_MSG); + verifyNoLog(); } - public void verifyLog(String expected) throws IOException { - File file = new File("audit.log"); - List lines = FileUtils.readLines(file, (String)null); - if(expected == null){ - // When no log entry is expected, the log file must be empty - assertTrue(lines.size() == 0); - } else { - // When log entry is expected, the log file will contain one line and - // that must be equal to the expected string - assertTrue(expected.equalsIgnoreCase(lines.get(0))); - //empty the file - lines.remove(0); - FileUtils.writeLines(file, lines, false); + private void verifyLog(String expected) throws IOException { + File file = new File("audit.log"); + List lines = FileUtils.readLines(file, (String)null); + final int retry = 5; + int i = 0; + while (lines.isEmpty() && i < retry) { + lines = FileUtils.readLines(file, (String)null); + try { + Thread.sleep(500 * (i + 1)); + } catch(InterruptedException ie) { + Thread.currentThread().interrupt(); + break; } + i++; + } + + // When log entry is expected, the log file will contain one line and + // that must be equal to the expected string + assertTrue(lines.size() != 0); + assertTrue(expected.equalsIgnoreCase(lines.get(0))); + //empty the file + lines.remove(0); + FileUtils.writeLines(file, lines, false); + } + + private void verifyNoLog() throws IOException { + File file = new File("audit.log"); + List lines = FileUtils.readLines(file, (String)null); + // When no log entry is expected, the log file must be empty + assertTrue(lines.size() == 0); } } diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lease/TestLeaseManager.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lease/TestLeaseManager.java index 517c1a7c47f..bdc70fc983a 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lease/TestLeaseManager.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lease/TestLeaseManager.java @@ -67,7 +67,7 @@ public boolean equals(Object obj) { public void testLeaseAcquireAndRelease() throws LeaseException { //It is assumed that the test case execution won't take more than 5 seconds, //if it takes more time increase the defaultTimeout value of LeaseManager. - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); DummyResource resourceTwo = new DummyResource("two"); @@ -93,7 +93,7 @@ public void testLeaseAcquireAndRelease() throws LeaseException { @Test public void testLeaseAlreadyExist() throws LeaseException { - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); DummyResource resourceTwo = new DummyResource("two"); @@ -113,7 +113,7 @@ public void testLeaseAlreadyExist() throws LeaseException { @Test public void testLeaseNotFound() throws LeaseException, InterruptedException { - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); DummyResource resourceTwo = new DummyResource("two"); @@ -154,7 +154,7 @@ public void testLeaseNotFound() throws LeaseException, InterruptedException { @Test public void testCustomLeaseTimeout() throws LeaseException { - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); DummyResource resourceTwo = new DummyResource("two"); @@ -179,7 +179,7 @@ public void testCustomLeaseTimeout() throws LeaseException { @Test public void testLeaseCallback() throws LeaseException, InterruptedException { Map leaseStatus = new HashMap<>(); - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); Lease leaseOne = manager.acquire(resourceOne); @@ -209,7 +209,7 @@ public void testCallbackExecutionInCaseOfLeaseRelease() throws LeaseException, InterruptedException { // Callbacks should not be executed in case of lease release Map leaseStatus = new HashMap<>(); - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); Lease leaseOne = manager.acquire(resourceOne); @@ -231,7 +231,7 @@ public void testCallbackExecutionInCaseOfLeaseRelease() public void testLeaseCallbackWithMultipleLeases() throws LeaseException, InterruptedException { Map leaseStatus = new HashMap<>(); - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); DummyResource resourceTwo = new DummyResource("two"); @@ -302,7 +302,7 @@ public void testLeaseCallbackWithMultipleLeases() @Test public void testReuseReleasedLease() throws LeaseException { - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); Lease leaseOne = manager.acquire(resourceOne); @@ -324,13 +324,12 @@ public void testReuseReleasedLease() throws LeaseException { @Test public void testReuseTimedOutLease() throws LeaseException, InterruptedException { - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); Lease leaseOne = manager.acquire(resourceOne); Assert.assertEquals(leaseOne, manager.get(resourceOne)); Assert.assertFalse(leaseOne.hasExpired()); - // wait for lease to expire long sleepTime = leaseOne.getRemainingTime() + 1000; try { @@ -352,7 +351,7 @@ public void testReuseTimedOutLease() @Test public void testRenewLease() throws LeaseException, InterruptedException { - LeaseManager manager = new LeaseManager<>(5000); + LeaseManager manager = new LeaseManager<>("Test", 5000); manager.start(); DummyResource resourceOne = new DummyResource("one"); Lease leaseOne = manager.acquire(resourceOne); diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lock/TestLockManager.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lock/TestLockManager.java new file mode 100644 index 00000000000..fa3030d0c35 --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lock/TestLockManager.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.lock; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.junit.Assert; +import org.junit.Test; + +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Test-cases to test LockManager. + */ +public class TestLockManager { + + @Test(timeout = 1000) + public void testWithDifferentResource() { + LockManager manager = new LockManager<>(new OzoneConfiguration()); + manager.lock("/resourceOne"); + // This should work, as they are different resource. + manager.lock("/resourceTwo"); + manager.unlock("/resourceOne"); + manager.unlock("/resourceTwo"); + Assert.assertTrue(true); + } + + @Test + public void testWithSameResource() throws Exception { + LockManager manager = new LockManager<>(new OzoneConfiguration()); + manager.lock("/resourceOne"); + AtomicBoolean gotLock = new AtomicBoolean(false); + new Thread(() -> { + manager.lock("/resourceOne"); + gotLock.set(true); + manager.unlock("/resourceOne"); + }).start(); + // Let's give some time for the new thread to run + Thread.sleep(100); + // Since the new thread is trying to get lock on same object, it will wait. + Assert.assertFalse(gotLock.get()); + manager.unlock("/resourceOne"); + // Since we have released the lock, the new thread should have the lock + // now + // Let's give some time for the new thread to run + Thread.sleep(100); + Assert.assertTrue(gotLock.get()); + } + +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lock/package-info.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lock/package-info.java new file mode 100644 index 00000000000..a96bc16248c --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/lock/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.lock; +/* + This package contains the lock related test classes. + */ \ No newline at end of file diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/TestHddsIdFactory.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/TestHddsIdFactory.java new file mode 100644 index 00000000000..a341ccc223a --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/TestHddsIdFactory.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.utils; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import org.apache.hadoop.hdds.HddsIdFactory; +import org.junit.After; +import static org.junit.Assert.assertEquals; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Test the JMX interface for the rocksdb metastore implementation. + */ +public class TestHddsIdFactory { + + private static final Set ID_SET = ConcurrentHashMap.newKeySet(); + private static final int IDS_PER_THREAD = 10000; + private static final int NUM_OF_THREADS = 5; + + @After + public void cleanup() { + ID_SET.clear(); + } + + @Test + public void testGetLongId() throws Exception { + + ExecutorService executor = Executors.newFixedThreadPool(5); + List> tasks = new ArrayList<>(5); + addTasks(tasks); + List> result = executor.invokeAll(tasks); + assertEquals(IDS_PER_THREAD * NUM_OF_THREADS, ID_SET.size()); + for (Future r : result) { + assertEquals(r.get().intValue(), IDS_PER_THREAD); + } + } + + private void addTasks(List> tasks) { + for (int i = 0; i < NUM_OF_THREADS; i++) { + Callable task = () -> { + for (int idNum = 0; idNum < IDS_PER_THREAD; idNum++) { + long var = HddsIdFactory.getLongId(); + if (ID_SET.contains(var)) { + Assert.fail("Duplicate id found"); + } + ID_SET.add(var); + } + return IDS_PER_THREAD; + }; + tasks.add(task); + } + } +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/TestMetadataStore.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/TestMetadataStore.java similarity index 63% rename from hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/TestMetadataStore.java rename to hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/TestMetadataStore.java index a5f2f93a044..30fc7f313a0 100644 --- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/ozone/TestMetadataStore.java +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/TestMetadataStore.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.ozone; +package org.apache.hadoop.utils; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; @@ -26,20 +26,19 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtilClient; +import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.hadoop.utils.BatchOperation; +import org.apache.hadoop.utils.MetadataStore.KeyValue; import org.apache.hadoop.utils.MetadataKeyFilters.KeyPrefixFilter; import org.apache.hadoop.utils.MetadataKeyFilters.MetadataKeyFilter; -import org.apache.hadoop.utils.MetadataStore; -import org.apache.hadoop.utils.MetadataStoreBuilder; import org.junit.After; -import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.slf4j.event.Level; import java.io.File; import java.io.IOException; @@ -49,9 +48,16 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.NoSuchElementException; import java.util.UUID; + import java.util.concurrent.atomic.AtomicInteger; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + import static org.junit.runners.Parameterized.Parameters; /** @@ -109,6 +115,107 @@ public void init() throws IOException { } } + @Test + public void testIterator() throws Exception { + Configuration conf = new OzoneConfiguration(); + conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, storeImpl); + File dbDir = GenericTestUtils.getRandomizedTestDir(); + MetadataStore dbStore = MetadataStoreBuilder.newBuilder() + .setConf(conf) + .setCreateIfMissing(true) + .setDbFile(dbDir) + .build(); + + //As database is empty, check whether iterator is working as expected or + // not. + MetaStoreIterator metaStoreIterator = dbStore.iterator(); + assertFalse(metaStoreIterator.hasNext()); + try { + metaStoreIterator.next(); + fail("testIterator failed"); + } catch (NoSuchElementException ex) { + GenericTestUtils.assertExceptionContains("Store has no more elements", + ex); + } + + for (int i = 0; i < 10; i++) { + store.put(getBytes("a" + i), getBytes("a-value" + i)); + } + + metaStoreIterator = dbStore.iterator(); + + int i = 0; + while (metaStoreIterator.hasNext()) { + KeyValue val = metaStoreIterator.next(); + assertEquals("a" + i, getString(val.getKey())); + assertEquals("a-value" + i, getString(val.getValue())); + i++; + } + + // As we have iterated all the keys in database, hasNext should return + // false and next() should throw NoSuchElement exception. + + assertFalse(metaStoreIterator.hasNext()); + try { + metaStoreIterator.next(); + fail("testIterator failed"); + } catch (NoSuchElementException ex) { + GenericTestUtils.assertExceptionContains("Store has no more elements", + ex); + } + FileUtils.deleteDirectory(dbDir); + + } + + @Test + public void testMetaStoreConfigDifferentFromType() throws IOException { + + Configuration conf = new OzoneConfiguration(); + conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, storeImpl); + String dbType; + GenericTestUtils.setLogLevel(MetadataStoreBuilder.LOG, Level.DEBUG); + GenericTestUtils.LogCapturer logCapturer = + GenericTestUtils.LogCapturer.captureLogs(MetadataStoreBuilder.LOG); + if(storeImpl.equals(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB)) { + dbType = "RocksDB"; + } else { + dbType = "LevelDB"; + } + + File dbDir = GenericTestUtils.getTestDir(getClass().getSimpleName() + + "-" + dbType.toLowerCase() + "-test"); + MetadataStore dbStore = MetadataStoreBuilder.newBuilder().setConf(conf) + .setCreateIfMissing(true).setDbFile(dbDir).setDBType(dbType).build(); + assertTrue(logCapturer.getOutput().contains("Using dbType " + dbType + "" + + " for metastore")); + dbStore.close(); + dbStore.destroy(); + FileUtils.deleteDirectory(dbDir); + + } + + @Test + public void testdbTypeNotSet() throws IOException { + + Configuration conf = new OzoneConfiguration(); + conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, storeImpl); + GenericTestUtils.setLogLevel(MetadataStoreBuilder.LOG, Level.DEBUG); + GenericTestUtils.LogCapturer logCapturer = + GenericTestUtils.LogCapturer.captureLogs(MetadataStoreBuilder.LOG); + + + File dbDir = GenericTestUtils.getTestDir(getClass().getSimpleName() + + "-" + storeImpl.toLowerCase() + "-test"); + MetadataStore dbStore = MetadataStoreBuilder.newBuilder().setConf(conf) + .setCreateIfMissing(true).setDbFile(dbDir).build(); + assertTrue(logCapturer.getOutput().contains("dbType is null, using dbType" + + " " + storeImpl)); + dbStore.close(); + dbStore.destroy(); + FileUtils.deleteDirectory(dbDir); + + } + @After public void cleanup() throws IOException { if (store != null) { @@ -134,17 +241,17 @@ private String getString(byte[] bytes) { public void testGetDelete() throws IOException { for (int i=0; i<10; i++) { byte[] va = store.get(getBytes("a" + i)); - Assert.assertEquals("a-value" + i, getString(va)); + assertEquals("a-value" + i, getString(va)); byte[] vb = store.get(getBytes("b" + i)); - Assert.assertEquals("b-value" + i, getString(vb)); + assertEquals("b-value" + i, getString(vb)); } String keyToDel = "del-" + UUID.randomUUID().toString(); store.put(getBytes(keyToDel), getBytes(keyToDel)); - Assert.assertEquals(keyToDel, getString(store.get(getBytes(keyToDel)))); + assertEquals(keyToDel, getString(store.get(getBytes(keyToDel)))); store.delete(getBytes(keyToDel)); - Assert.assertEquals(null, store.get(getBytes(keyToDel))); + assertEquals(null, store.get(getBytes(keyToDel))); } @Test @@ -179,8 +286,8 @@ private void testPeek(String peekKey, String prevKey, String nextKey) k = getString(current.getKey()); v = getString(current.getValue()); } - Assert.assertEquals(peekKey, k); - Assert.assertEquals(v, getExpectedValue(peekKey)); + assertEquals(peekKey, k); + assertEquals(v, getExpectedValue(peekKey)); // Look for prev k = null; @@ -191,8 +298,8 @@ private void testPeek(String peekKey, String prevKey, String nextKey) k = getString(prev.getKey()); v = getString(prev.getValue()); } - Assert.assertEquals(prevKey, k); - Assert.assertEquals(v, getExpectedValue(prevKey)); + assertEquals(prevKey, k); + assertEquals(v, getExpectedValue(prevKey)); // Look for next k = null; @@ -203,8 +310,8 @@ private void testPeek(String peekKey, String prevKey, String nextKey) k = getString(next.getKey()); v = getString(next.getValue()); } - Assert.assertEquals(nextKey, k); - Assert.assertEquals(v, getExpectedValue(nextKey)); + assertEquals(nextKey, k); + assertEquals(v, getExpectedValue(nextKey)); } @Test @@ -222,9 +329,9 @@ public void testIterateKeys() throws IOException { return true; }); - Assert.assertFalse(result.isEmpty()); + assertFalse(result.isEmpty()); for (int i=0; i + assertEquals(10, result.size()); + assertTrue(result.stream().allMatch(entry -> new String(entry.getKey()).startsWith("b") )); - Assert.assertEquals(20, filter1.getKeysScannedNum()); - Assert.assertEquals(10, filter1.getKeysHintedNum()); + assertEquals(20, filter1.getKeysScannedNum()); + assertEquals(10, filter1.getKeysHintedNum()); result = store.getRangeKVs(null, 3, filter1); - Assert.assertEquals(3, result.size()); + assertEquals(3, result.size()); result = store.getRangeKVs(getBytes("b3"), 1, filter1); - Assert.assertEquals("b-value3", getString(result.get(0).getValue())); + assertEquals("b-value3", getString(result.get(0).getValue())); // Define a customized filter that filters keys by suffix. // Returns all "*2" entries. MetadataKeyFilter filter2 = (preKey, currentKey, nextKey) -> getString(currentKey).endsWith("2"); result = store.getRangeKVs(null, MAX_GETRANGE_LENGTH, filter2); - Assert.assertEquals(2, result.size()); - Assert.assertEquals("a2", getString(result.get(0).getKey())); - Assert.assertEquals("b2", getString(result.get(1).getKey())); + assertEquals(2, result.size()); + assertEquals("a2", getString(result.get(0).getKey())); + assertEquals("b2", getString(result.get(1).getKey())); result = store.getRangeKVs(null, 1, filter2); - Assert.assertEquals(1, result.size()); - Assert.assertEquals("a2", getString(result.get(0).getKey())); + assertEquals(1, result.size()); + assertEquals("a2", getString(result.get(0).getKey())); // Apply multiple filters. result = store.getRangeKVs(null, MAX_GETRANGE_LENGTH, filter1, filter2); - Assert.assertEquals(1, result.size()); - Assert.assertEquals("b2", getString(result.get(0).getKey())); - Assert.assertEquals("b-value2", getString(result.get(0).getValue())); + assertEquals(1, result.size()); + assertEquals("b2", getString(result.get(0).getKey())); + assertEquals("b-value2", getString(result.get(0).getValue())); // If filter is null, no effect. result = store.getRangeKVs(null, 1, null); - Assert.assertEquals(1, result.size()); - Assert.assertEquals("a0", getString(result.get(0).getKey())); + assertEquals(1, result.size()); + assertEquals("a0", getString(result.get(0).getKey())); } @Test @@ -319,16 +426,16 @@ public void testGetSequentialRangeKVs() throws IOException { // Suppose to return a2 and b2 List> result = store.getRangeKVs(null, MAX_GETRANGE_LENGTH, suffixFilter); - Assert.assertEquals(2, result.size()); - Assert.assertEquals("a2", DFSUtil.bytes2String(result.get(0).getKey())); - Assert.assertEquals("b2", DFSUtil.bytes2String(result.get(1).getKey())); + assertEquals(2, result.size()); + assertEquals("a2", DFSUtil.bytes2String(result.get(0).getKey())); + assertEquals("b2", DFSUtil.bytes2String(result.get(1).getKey())); // Suppose to return just a2, because when it iterates to a3, // the filter no long matches and it should stop from there. result = store.getSequentialRangeKVs(null, MAX_GETRANGE_LENGTH, suffixFilter); - Assert.assertEquals(1, result.size()); - Assert.assertEquals("a2", DFSUtil.bytes2String(result.get(0).getKey())); + assertEquals(1, result.size()); + assertEquals("a2", DFSUtil.bytes2String(result.get(0).getKey())); } @Test @@ -336,10 +443,10 @@ public void testGetRangeLength() throws IOException { List> result = null; result = store.getRangeKVs(null, 0); - Assert.assertEquals(0, result.size()); + assertEquals(0, result.size()); result = store.getRangeKVs(null, 1); - Assert.assertEquals(1, result.size()); + assertEquals(1, result.size()); // Count less than zero is invalid. expectedException.expect(IllegalArgumentException.class); @@ -352,7 +459,7 @@ public void testInvalidStartKey() throws IOException { // If startKey is invalid, the returned list should be empty. List> kvs = store.getRangeKVs(getBytes("unknownKey"), MAX_GETRANGE_LENGTH); - Assert.assertEquals(kvs.size(), 0); + assertEquals(kvs.size(), 0); } @Test @@ -372,13 +479,13 @@ public void testDestroyDB() throws IOException { dbStore.put(getBytes("key1"), getBytes("value1")); dbStore.put(getBytes("key2"), getBytes("value2")); - Assert.assertFalse(dbStore.isEmpty()); - Assert.assertTrue(dbDir.exists()); - Assert.assertTrue(dbDir.listFiles().length > 0); + assertFalse(dbStore.isEmpty()); + assertTrue(dbDir.exists()); + assertTrue(dbDir.listFiles().length > 0); dbStore.destroy(); - Assert.assertFalse(dbDir.exists()); + assertFalse(dbDir.exists()); } @Test @@ -420,7 +527,7 @@ public void testBatchWrite() throws IOException { return it.hasNext() && it.next().equals(getString(key)); }); - Assert.assertEquals(8, count.get()); + assertEquals(8, count.get()); } @Test @@ -433,52 +540,51 @@ public void testKeyPrefixFilter() throws IOException { } catch (IllegalArgumentException e) { exception = e; } - Assert.assertTrue( - exception.getMessage().contains("KeyPrefix: b already rejected")); + assertTrue(exception.getMessage().contains("KeyPrefix: b already " + + "rejected")); try { new KeyPrefixFilter().addFilter("b0").addFilter("b", true); } catch (IllegalArgumentException e) { exception = e; } - Assert.assertTrue( - exception.getMessage().contains("KeyPrefix: b already accepted")); + assertTrue(exception.getMessage().contains("KeyPrefix: b already " + + "accepted")); try { new KeyPrefixFilter().addFilter("b", true).addFilter("b0"); } catch (IllegalArgumentException e) { exception = e; } - Assert.assertTrue( - exception.getMessage().contains("KeyPrefix: b0 already rejected")); + assertTrue(exception.getMessage().contains("KeyPrefix: b0 already " + + "rejected")); try { new KeyPrefixFilter().addFilter("b").addFilter("b0", true); } catch (IllegalArgumentException e) { exception = e; } - Assert.assertTrue( - exception.getMessage().contains("KeyPrefix: b0 already accepted")); + assertTrue(exception.getMessage().contains("KeyPrefix: b0 already " + + "accepted")); MetadataKeyFilter filter1 = new KeyPrefixFilter(true) .addFilter("a0") .addFilter("a1") .addFilter("b", true); result = store.getRangeKVs(null, 100, filter1); - Assert.assertEquals(2, result.size()); - Assert.assertTrue(result.stream() - .anyMatch(entry -> new String(entry.getKey()).startsWith("a0")) - && result.stream() - .anyMatch(entry -> new String(entry.getKey()).startsWith("a1"))); + assertEquals(2, result.size()); + assertTrue(result.stream().anyMatch(entry -> new String(entry.getKey()) + .startsWith("a0")) && result.stream().anyMatch(entry -> new String( + entry.getKey()).startsWith("a1"))); filter1 = new KeyPrefixFilter(true).addFilter("b", true); result = store.getRangeKVs(null, 100, filter1); - Assert.assertEquals(0, result.size()); + assertEquals(0, result.size()); filter1 = new KeyPrefixFilter().addFilter("b", true); result = store.getRangeKVs(null, 100, filter1); - Assert.assertEquals(10, result.size()); - Assert.assertTrue(result.stream() - .allMatch(entry -> new String(entry.getKey()).startsWith("a"))); + assertEquals(10, result.size()); + assertTrue(result.stream().allMatch(entry -> new String(entry.getKey()) + .startsWith("a"))); } } diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestDBConfigFromFile.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestDBConfigFromFile.java new file mode 100644 index 00000000000..b20ca70d33b --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestDBConfigFromFile.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdfs.DFSUtil; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyOptions; +import org.rocksdb.DBOptions; +import org.rocksdb.RocksDB; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.apache.hadoop.utils.db.DBConfigFromFile.getOptionsFileNameFromDB; + +/** + * DBConf tests. + */ +public class TestDBConfigFromFile { + private final static String DB_FILE = "test.db"; + private final static String INI_FILE = getOptionsFileNameFromDB(DB_FILE); + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + @Before + public void setUp() throws Exception { + System.setProperty(DBConfigFromFile.CONFIG_DIR, + folder.newFolder().toString()); + ClassLoader classLoader = getClass().getClassLoader(); + File testData = new File(classLoader.getResource(INI_FILE).getFile()); + File dest = Paths.get( + System.getProperty(DBConfigFromFile.CONFIG_DIR), INI_FILE).toFile(); + FileUtils.copyFile(testData, dest); + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void readFromFile() throws IOException { + final List families = + Arrays.asList(DFSUtil.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY), + "First", "Second", "Third", + "Fourth", "Fifth", + "Sixth"); + final List columnFamilyDescriptors = + new ArrayList<>(); + for (String family : families) { + columnFamilyDescriptors.add( + new ColumnFamilyDescriptor(family.getBytes(StandardCharsets.UTF_8), + new ColumnFamilyOptions())); + } + + final DBOptions options = DBConfigFromFile.readFromFile(DB_FILE, + columnFamilyDescriptors); + + // Some Random Values Defined in the test.db.ini, we verify that we are + // able to get values that are defined in the test.db.ini. + Assert.assertNotNull(options); + Assert.assertEquals(551615L, options.maxManifestFileSize()); + Assert.assertEquals(1000L, options.keepLogFileNum()); + Assert.assertEquals(1048576, options.writableFileMaxBufferSize()); + } + + @Test + public void readFromFileInvalidConfig() throws IOException { + final List families = + Arrays.asList(DFSUtil.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY), + "First", "Second", "Third", + "Fourth", "Fifth", + "Sixth"); + final List columnFamilyDescriptors = + new ArrayList<>(); + for (String family : families) { + columnFamilyDescriptors.add( + new ColumnFamilyDescriptor(family.getBytes(StandardCharsets.UTF_8), + new ColumnFamilyOptions())); + } + + final DBOptions options = DBConfigFromFile.readFromFile("badfile.db.ini", + columnFamilyDescriptors); + + // This has to return a Null, since we have config defined for badfile.db + Assert.assertNull(options); + } +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestDBStoreBuilder.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestDBStoreBuilder.java new file mode 100644 index 00000000000..3e1f364f090 --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestDBStoreBuilder.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.conf.Configuration; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * Tests RDBStore creation. + */ +public class TestDBStoreBuilder { + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Before + public void setUp() throws Exception { + System.setProperty(DBConfigFromFile.CONFIG_DIR, + folder.newFolder().toString()); + } + + @Test + public void builderWithoutAnyParams() throws IOException { + Configuration conf = new Configuration(); + thrown.expect(IOException.class); + DBStoreBuilder.newBuilder(conf).build(); + } + + @Test + public void builderWithOneParamV1() throws IOException { + Configuration conf = new Configuration(); + thrown.expect(IOException.class); + DBStoreBuilder.newBuilder(conf) + .setName("Test.db") + .build(); + } + + @Test + public void builderWithOneParamV2() throws IOException { + Configuration conf = new Configuration(); + File newFolder = folder.newFolder(); + if(!newFolder.exists()) { + Assert.assertTrue(newFolder.mkdirs()); + } + thrown.expect(IOException.class); + DBStoreBuilder.newBuilder(conf) + .setPath(newFolder.toPath()) + .build(); + } + + @Test + public void builderWithOpenClose() throws Exception { + Configuration conf = new Configuration(); + File newFolder = folder.newFolder(); + if(!newFolder.exists()) { + Assert.assertTrue(newFolder.mkdirs()); + } + DBStore dbStore = DBStoreBuilder.newBuilder(conf) + .setName("Test.db") + .setPath(newFolder.toPath()) + .build(); + // Nothing to do just open and Close. + dbStore.close(); + } + + @Test + public void builderWithDoubleTableName() throws Exception { + Configuration conf = new Configuration(); + File newFolder = folder.newFolder(); + if(!newFolder.exists()) { + Assert.assertTrue(newFolder.mkdirs()); + } + thrown.expect(IOException.class); + DBStoreBuilder.newBuilder(conf) + .setName("Test.db") + .setPath(newFolder.toPath()) + .addTable("FIRST") + .addTable("FIRST") + .build(); + // Nothing to do , This will throw so we do not have to close. + + } + + @Test + public void builderWithDataWrites() throws Exception { + Configuration conf = new Configuration(); + File newFolder = folder.newFolder(); + if(!newFolder.exists()) { + Assert.assertTrue(newFolder.mkdirs()); + } + try (DBStore dbStore = DBStoreBuilder.newBuilder(conf) + .setName("Test.db") + .setPath(newFolder.toPath()) + .addTable("First") + .addTable("Second") + .build()) { + try (Table firstTable = dbStore.getTable("First")) { + byte[] key = + RandomStringUtils.random(9).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(9).getBytes(StandardCharsets.UTF_8); + firstTable.put(key, value); + byte[] temp = firstTable.get(key); + Arrays.equals(value, temp); + } + + try (Table secondTable = dbStore.getTable("Second")) { + Assert.assertTrue(secondTable.isEmpty()); + } + } + } + + @Test + public void builderWithDiskProfileWrites() throws Exception { + Configuration conf = new Configuration(); + File newFolder = folder.newFolder(); + if(!newFolder.exists()) { + Assert.assertTrue(newFolder.mkdirs()); + } + try (DBStore dbStore = DBStoreBuilder.newBuilder(conf) + .setName("Test.db") + .setPath(newFolder.toPath()) + .addTable("First") + .addTable("Second") + .setProfile(DBProfile.DISK) + .build()) { + try (Table firstTable = dbStore.getTable("First")) { + byte[] key = + RandomStringUtils.random(9).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(9).getBytes(StandardCharsets.UTF_8); + firstTable.put(key, value); + byte[] temp = firstTable.get(key); + Arrays.equals(value, temp); + } + + try (Table secondTable = dbStore.getTable("Second")) { + Assert.assertTrue(secondTable.isEmpty()); + } + } + } + + +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBStore.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBStore.java new file mode 100644 index 00000000000..0b673d38d29 --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBStore.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdfs.DFSUtil; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; +import org.rocksdb.ColumnFamilyOptions; +import org.rocksdb.DBOptions; +import org.rocksdb.RocksDB; +import org.rocksdb.Statistics; +import org.rocksdb.StatsLevel; + +import javax.management.MBeanServer; +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * RDBStore Tests. + */ +public class TestRDBStore { + private final List families = + Arrays.asList(DFSUtil.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY), + "First", "Second", "Third", + "Fourth", "Fifth", + "Sixth"); + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + @Rule + public ExpectedException thrown = ExpectedException.none(); + private RDBStore rdbStore = null; + private DBOptions options = null; + private Set configSet; + + @Before + public void setUp() throws Exception { + options = new DBOptions(); + options.setCreateIfMissing(true); + options.setCreateMissingColumnFamilies(true); + + Statistics statistics = new Statistics(); + statistics.setStatsLevel(StatsLevel.ALL); + options = options.setStatistics(statistics); + configSet = new HashSet<>(); + for(String name : families) { + TableConfig newConfig = new TableConfig(name, new ColumnFamilyOptions()); + configSet.add(newConfig); + } + rdbStore = new RDBStore(folder.newFolder(), options, configSet); + } + + @After + public void tearDown() throws Exception { + if (rdbStore != null) { + rdbStore.close(); + } + } + + @Test + public void compactDB() throws Exception { + try (RDBStore newStore = + new RDBStore(folder.newFolder(), options, configSet)) { + Assert.assertNotNull("DB Store cannot be null", newStore); + try (Table firstTable = newStore.getTable(families.get(1))) { + Assert.assertNotNull("Table cannot be null", firstTable); + for (int x = 0; x < 100; x++) { + byte[] key = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + firstTable.put(key, value); + } + } + // This test does not assert anything if there is any error this test + // will throw and fail. + newStore.compactDB(); + } + } + + @Test + public void close() throws Exception { + RDBStore newStore = + new RDBStore(folder.newFolder(), options, configSet); + Assert.assertNotNull("DBStore cannot be null", newStore); + // This test does not assert anything if there is any error this test + // will throw and fail. + newStore.close(); + } + + @Test + public void moveKey() throws Exception { + byte[] key = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + + try (Table firstTable = rdbStore.getTable(families.get(1))) { + firstTable.put(key, value); + try (Table secondTable = rdbStore.getTable(families.get(2))) { + rdbStore.move(key, firstTable, secondTable); + byte[] newvalue = secondTable.get(key); + // Make sure we have value in the second table + Assert.assertNotNull(newvalue); + //and it is same as what we wrote to the FirstTable + Assert.assertArrayEquals(value, newvalue); + } + // After move this key must not exist in the first table. + Assert.assertNull(firstTable.get(key)); + } + } + + @Test + public void moveWithValue() throws Exception { + byte[] key = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + + byte[] nextValue = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + try (Table firstTable = rdbStore.getTable(families.get(1))) { + firstTable.put(key, value); + try (Table secondTable = rdbStore.getTable(families.get(2))) { + rdbStore.move(key, nextValue, firstTable, secondTable); + byte[] newvalue = secondTable.get(key); + // Make sure we have value in the second table + Assert.assertNotNull(newvalue); + //and it is not same as what we wrote to the FirstTable, and equals + // the new value. + Assert.assertArrayEquals(nextValue, nextValue); + } + } + + } + + @Test + public void getEstimatedKeyCount() throws Exception { + try (RDBStore newStore = + new RDBStore(folder.newFolder(), options, configSet)) { + Assert.assertNotNull("DB Store cannot be null", newStore); + // Write 100 keys to the first table. + try (Table firstTable = newStore.getTable(families.get(1))) { + Assert.assertNotNull("Table cannot be null", firstTable); + for (int x = 0; x < 100; x++) { + byte[] key = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + firstTable.put(key, value); + } + } + + // Write 100 keys to the secondTable table. + try (Table secondTable = newStore.getTable(families.get(2))) { + Assert.assertNotNull("Table cannot be null", secondTable); + for (int x = 0; x < 100; x++) { + byte[] key = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + secondTable.put(key, value); + } + } + // Let us make sure that our estimate is not off by 10% + Assert.assertTrue(newStore.getEstimatedKeyCount() > 180 + || newStore.getEstimatedKeyCount() < 220); + } + } + + @Test + public void getStatMBeanName() throws Exception { + + try (Table firstTable = rdbStore.getTable(families.get(1))) { + for (int y = 0; y < 100; y++) { + byte[] key = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + firstTable.put(key, value); + } + } + MBeanServer platformMBeanServer = + ManagementFactory.getPlatformMBeanServer(); + Thread.sleep(2000); + + Object keysWritten = platformMBeanServer + .getAttribute(rdbStore.getStatMBeanName(), "NUMBER_KEYS_WRITTEN"); + + Assert.assertTrue(((Long) keysWritten) >= 99L); + + Object dbWriteAverage = platformMBeanServer + .getAttribute(rdbStore.getStatMBeanName(), "DB_WRITE_AVERAGE"); + Assert.assertTrue((double) dbWriteAverage > 0); + } + + @Test + public void getTable() throws Exception { + for (String tableName : families) { + try (Table table = rdbStore.getTable(tableName)) { + Assert.assertNotNull(tableName + "is null", table); + } + } + thrown.expect(IOException.class); + rdbStore.getTable("ATableWithNoName"); + } + + @Test + public void listTables() throws Exception { + List

tableList = rdbStore.listTables(); + Assert.assertNotNull("Table list cannot be null", tableList); + Map hashTable = new HashMap<>(); + + for (Table t : tableList) { + hashTable.put(t.getName(), t); + } + + int count = families.size(); + // Assert that we have all the tables in the list and no more. + for (String name : families) { + Assert.assertTrue(hashTable.containsKey(name)); + count--; + } + Assert.assertEquals(0, count); + } +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBTableStore.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBTableStore.java new file mode 100644 index 00000000000..cd25548403a --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/TestRDBTableStore.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.utils.db; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdfs.DFSUtil; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.rocksdb.ColumnFamilyOptions; +import org.rocksdb.DBOptions; +import org.rocksdb.RocksDB; +import org.rocksdb.Statistics; +import org.rocksdb.StatsLevel; +import org.rocksdb.WriteBatch; + +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +/** + * Tests for RocksDBTable Store. + */ +public class TestRDBTableStore { + private static int count = 0; + private final List families = + Arrays.asList(DFSUtil.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY), + "First", "Second", "Third", + "Fourth", "Fifth", + "Sixth"); + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + private RDBStore rdbStore = null; + private DBOptions options = null; + + @Before + public void setUp() throws Exception { + options = new DBOptions(); + options.setCreateIfMissing(true); + options.setCreateMissingColumnFamilies(true); + + Statistics statistics = new Statistics(); + statistics.setStatsLevel(StatsLevel.ALL); + options = options.setStatistics(statistics); + + Set configSet = new HashSet<>(); + for(String name : families) { + TableConfig newConfig = new TableConfig(name, new ColumnFamilyOptions()); + configSet.add(newConfig); + } + rdbStore = new RDBStore(folder.newFolder(), options, configSet); + } + + @After + public void tearDown() throws Exception { + if (rdbStore != null) { + rdbStore.close(); + } + } + + @Test + public void toIOException() { + } + + @Test + public void getHandle() throws Exception { + try (Table testTable = rdbStore.getTable("First")) { + Assert.assertNotNull(testTable); + Assert.assertNotNull(testTable.getHandle()); + } + } + + @Test + public void putGetAndEmpty() throws Exception { + try (Table testTable = rdbStore.getTable("First")) { + byte[] key = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + testTable.put(key, value); + Assert.assertFalse(testTable.isEmpty()); + byte[] readValue = testTable.get(key); + Assert.assertArrayEquals(value, readValue); + } + try (Table secondTable = rdbStore.getTable("Second")) { + Assert.assertTrue(secondTable.isEmpty()); + } + } + + @Test + public void delete() throws Exception { + List deletedKeys = new LinkedList<>(); + List validKeys = new LinkedList<>(); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + for (int x = 0; x < 100; x++) { + deletedKeys.add( + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8)); + } + + for (int x = 0; x < 100; x++) { + validKeys.add( + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8)); + } + + // Write all the keys and delete the keys scheduled for delete. + //Assert we find only expected keys in the Table. + try (Table testTable = rdbStore.getTable("Fourth")) { + for (int x = 0; x < deletedKeys.size(); x++) { + testTable.put(deletedKeys.get(x), value); + testTable.delete(deletedKeys.get(x)); + } + + for (int x = 0; x < validKeys.size(); x++) { + testTable.put(validKeys.get(x), value); + } + + for (int x = 0; x < validKeys.size(); x++) { + Assert.assertNotNull(testTable.get(validKeys.get(0))); + } + + for (int x = 0; x < deletedKeys.size(); x++) { + Assert.assertNull(testTable.get(deletedKeys.get(0))); + } + } + } + + @Test + public void writeBatch() throws Exception { + WriteBatch batch = new WriteBatch(); + try (Table testTable = rdbStore.getTable("Fifth")) { + byte[] key = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + batch.put(testTable.getHandle(), key, value); + testTable.writeBatch(batch); + Assert.assertNotNull(testTable.get(key)); + } + batch.close(); + } + + private static boolean consume(Table.KeyValue keyValue) { + count++; + Assert.assertNotNull(keyValue.getKey()); + return true; + } + + @Test + public void forEachAndIterator() throws Exception { + final int iterCount = 100; + try (Table testTable = rdbStore.getTable("Sixth")) { + for (int x = 0; x < iterCount; x++) { + byte[] key = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + byte[] value = + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8); + testTable.put(key, value); + } + int localCount = 0; + try (TableIterator iter = testTable.iterator()) { + while (iter.hasNext()) { + Table.KeyValue keyValue = iter.next(); + localCount++; + } + + Assert.assertEquals(iterCount, localCount); + iter.seekToFirst(); + iter.forEachRemaining(TestRDBTableStore::consume); + Assert.assertEquals(iterCount, count); + + } + } + } +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/package-info.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/package-info.java new file mode 100644 index 00000000000..f06855e038a --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +/** + * Tests for the DB Utilities. + */ +package org.apache.hadoop.utils.db; \ No newline at end of file diff --git a/hadoop-hdds/common/src/test/resources/test.db.ini b/hadoop-hdds/common/src/test/resources/test.db.ini new file mode 100644 index 00000000000..6666cd28b2d --- /dev/null +++ b/hadoop-hdds/common/src/test/resources/test.db.ini @@ -0,0 +1,145 @@ +# This is a RocksDB option file. +# +# A typical RocksDB options file has four sections, which are +# Version section, DBOptions section, at least one CFOptions +# section, and one TableOptions section for each column family. +# The RocksDB options file in general follows the basic INI +# file format with the following extensions / modifications: +# +# * Escaped characters +# We escaped the following characters: +# - \n -- line feed - new line +# - \r -- carriage return +# - \\ -- backslash \ +# - \: -- colon symbol : +# - \# -- hash tag # +# * Comments +# We support # style comments. Comments can appear at the ending +# part of a line. +# * Statements +# A statement is of the form option_name = value. +# Each statement contains a '=', where extra white-spaces +# are supported. However, we don't support multi-lined statement. +# Furthermore, each line can only contain at most one statement. +# * Sections +# Sections are of the form [SecitonTitle "SectionArgument"], +# where section argument is optional. +# * List +# We use colon-separated string to represent a list. +# For instance, n1:n2:n3:n4 is a list containing four values. +# +# Below is an example of a RocksDB options file: + + +#----------------------IMPORTANT------------------------------------# +### FAKE VALUES FOR TESTING ONLY ### DO NOT USE THESE FOR PRODUCTION. +#----------------------IMPORTANT------------------------------------# +[DBOptions] + stats_dump_period_sec=600 + max_manifest_file_size=551615 + bytes_per_sync=8388608 + delayed_write_rate=2097152 + WAL_ttl_seconds=0 + WAL_size_limit_MB=0 + max_subcompactions=1 + wal_dir= + wal_bytes_per_sync=0 + db_write_buffer_size=0 + keep_log_file_num=1000 + table_cache_numshardbits=4 + max_file_opening_threads=1 + writable_file_max_buffer_size=1048576 + random_access_max_buffer_size=1048576 + use_fsync=false + max_total_wal_size=0 + max_open_files=-1 + skip_stats_update_on_db_open=false + max_background_compactions=16 + manifest_preallocation_size=4194304 + max_background_flushes=7 + is_fd_close_on_exec=true + max_log_file_size=0 + advise_random_on_open=true + create_missing_column_families=false + paranoid_checks=true + delete_obsolete_files_period_micros=21600000000 + log_file_time_to_roll=0 + compaction_readahead_size=0 + create_if_missing=false + use_adaptive_mutex=false + enable_thread_tracking=false + allow_fallocate=true + error_if_exists=false + recycle_log_file_num=0 + skip_log_error_on_recovery=false + db_log_dir= + new_table_reader_for_compaction_inputs=true + allow_mmap_reads=false + allow_mmap_writes=false + use_direct_reads=false + use_direct_writes=false + + +[CFOptions "default"] + compaction_style=kCompactionStyleLevel + compaction_filter=nullptr + num_levels=6 + table_factory=BlockBasedTable + comparator=leveldb.BytewiseComparator + max_sequential_skip_in_iterations=8 + soft_rate_limit=0.000000 + max_bytes_for_level_base=1073741824 + memtable_prefix_bloom_probes=6 + memtable_prefix_bloom_bits=0 + memtable_prefix_bloom_huge_page_tlb_size=0 + max_successive_merges=0 + arena_block_size=16777216 + min_write_buffer_number_to_merge=1 + target_file_size_multiplier=1 + source_compaction_factor=1 + max_bytes_for_level_multiplier=8 + max_bytes_for_level_multiplier_additional=2:3:5 + compaction_filter_factory=nullptr + max_write_buffer_number=8 + level0_stop_writes_trigger=20 + compression=kSnappyCompression + level0_file_num_compaction_trigger=4 + purge_redundant_kvs_while_flush=true + max_write_buffer_number_to_maintain=0 + memtable_factory=SkipListFactory + max_grandparent_overlap_factor=8 + expanded_compaction_factor=25 + hard_pending_compaction_bytes_limit=137438953472 + inplace_update_num_locks=10000 + level_compaction_dynamic_level_bytes=true + level0_slowdown_writes_trigger=12 + filter_deletes=false + verify_checksums_in_compaction=true + min_partial_merge_operands=2 + paranoid_file_checks=false + target_file_size_base=134217728 + optimize_filters_for_hits=false + merge_operator=PutOperator + compression_per_level=kNoCompression:kNoCompression:kNoCompression:kSnappyCompression:kSnappyCompression:kSnappyCompression + compaction_measure_io_stats=false + prefix_extractor=nullptr + bloom_locality=0 + write_buffer_size=134217728 + disable_auto_compactions=false + inplace_update_support=false + +[TableOptions/BlockBasedTable "default"] + format_version=2 + whole_key_filtering=true + no_block_cache=false + checksum=kCRC32c + filter_policy=rocksdb.BuiltinBloomFilter + block_size_deviation=10 + block_size=8192 + block_restart_interval=16 + cache_index_and_filter_blocks=false + pin_l0_filter_and_index_blocks_in_cache=false + pin_top_level_index_and_filter=false + index_type=kBinarySearch + hash_index_allow_collision=true + flush_block_policy_factory=FlushBlockBySizePolicyFactory \ No newline at end of file diff --git a/hadoop-hdds/container-service/pom.xml b/hadoop-hdds/container-service/pom.xml index 542462e8c7d..3d4e58185cf 100644 --- a/hadoop-hdds/container-service/pom.xml +++ b/hadoop-hdds/container-service/pom.xml @@ -20,10 +20,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-hdds-container-service - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Distributed Data Store Container Service Apache Hadoop HDDS Container Service jar @@ -37,12 +37,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds-common - provided org.apache.hadoop hadoop-hdds-server-framework - provided @@ -52,6 +50,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> test + + org.yaml + snakeyaml + 1.8 + + io.dropwizard.metrics metrics-core diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/hdds/scm/HddsServerUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/hdds/scm/HddsServerUtil.java index cc7adbf8fd3..580d0279ad6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/hdds/scm/HddsServerUtil.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/hdds/scm/HddsServerUtil.java @@ -29,12 +29,14 @@ import java.util.Map; import java.util.concurrent.TimeUnit; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_HEARTBEAT_INTERVAL_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_DEADNODE_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_DEADNODE_INTERVAL_DEFAULT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_HEARTBEAT_LOG_WARN_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys @@ -178,12 +180,11 @@ public static long getScmheartbeatCheckerInterval(Configuration conf) { * SCM. * * @param conf - Ozone Config - * @return - HB interval in seconds. + * @return - HB interval in milli seconds. */ public static long getScmHeartbeatInterval(Configuration conf) { - return conf.getTimeDuration(OZONE_SCM_HEARTBEAT_INTERVAL, - ScmConfigKeys.OZONE_SCM_HEARBEAT_INTERVAL_DEFAULT, - TimeUnit.SECONDS); + return conf.getTimeDuration(HDDS_HEARTBEAT_INTERVAL, + HDDS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); } /** @@ -201,7 +202,7 @@ public static long getStaleNodeInterval(Configuration conf) { long heartbeatThreadFrequencyMs = getScmheartbeatCheckerInterval(conf); - long heartbeatIntervalMs = getScmHeartbeatInterval(conf) * 1000; + long heartbeatIntervalMs = getScmHeartbeatInterval(conf); // Make sure that StaleNodeInterval is configured way above the frequency @@ -225,7 +226,7 @@ public static long getStaleNodeInterval(Configuration conf) { sanitizeUserArgs(staleNodeIntervalMs, heartbeatIntervalMs, 3, 1000); } catch (IllegalArgumentException ex) { LOG.error("Stale Node Interval MS is cannot be honored due to " + - "mis-configured {}. ex: {}", OZONE_SCM_HEARTBEAT_INTERVAL, ex); + "mis-configured {}. ex: {}", HDDS_HEARTBEAT_INTERVAL, ex); throw ex; } return staleNodeIntervalMs; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java index ddeec873bce..348196cbe75 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.hadoop.ozone.container.common.statemachine .DatanodeStateMachine; @@ -231,7 +232,8 @@ public static HddsDatanodeService createHddsDatanodeService( public static void main(String[] args) { try { - if (DFSUtil.parseHelpArgument(args, "Starts HDDS Datanode", System.out, false)) { + if (DFSUtil.parseHelpArgument( + args, "Starts HDDS Datanode", System.out, false)) { System.exit(0); } Configuration conf = new OzoneConfiguration(); @@ -241,6 +243,7 @@ public static void main(String[] args) { System.exit(1); } StringUtils.startupShutdownMessage(HddsDatanodeService.class, args, LOG); + DefaultMetricsSystem.initialize("HddsDatanode"); HddsDatanodeService hddsDatanodeService = createHddsDatanodeService(conf); hddsDatanodeService.start(null); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/DataNodeLayoutVersion.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/DataNodeLayoutVersion.java new file mode 100644 index 00000000000..2d58c39a151 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/DataNodeLayoutVersion.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.common; + +/** + * Datanode layout version which describes information about the layout version + * on the datanode. + */ +public final class DataNodeLayoutVersion { + + // We will just be normal and use positive counting numbers for versions. + private final static DataNodeLayoutVersion[] VERSION_INFOS = + {new DataNodeLayoutVersion(1, "HDDS Datanode LayOut Version 1")}; + + private final String description; + private final int version; + + /** + * Never created outside this class. + * + * @param description -- description + * @param version -- version number + */ + private DataNodeLayoutVersion(int version, String description) { + this.description = description; + this.version = version; + } + + /** + * Returns all versions. + * + * @return Version info array. + */ + public static DataNodeLayoutVersion[] getAllVersions() { + return VERSION_INFOS.clone(); + } + + /** + * Returns the latest version. + * + * @return versionInfo + */ + public static DataNodeLayoutVersion getLatestVersion() { + return VERSION_INFOS[VERSION_INFOS.length - 1]; + } + + /** + * Return description. + * + * @return String + */ + public String getDescription() { + return description; + } + + /** + * Return the version. + * + * @return int. + */ + public int getVersion() { + return version; + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerData.java deleted file mode 100644 index 5767f76b3dc..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerData.java +++ /dev/null @@ -1,512 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.helpers; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerType; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerLifeCycleState; -import org.apache.hadoop.ozone.OzoneConsts; - -import java.io.IOException; -import java.util.Collections; -import java.util.Map; -import java.util.TreeMap; -import java.util.concurrent.atomic.AtomicLong; - -import static java.lang.Math.max; - -/** - * This class maintains the information about a container in the ozone world. - *

- * A container is a name, along with metadata- which is a set of key value - * pair. - */ -public class ContainerData { - - private final Map metadata; - private String dbPath; // Path to Level DB Store. - // Path to Physical file system where container and checksum are stored. - private String containerFilePath; - private AtomicLong bytesUsed; - private long maxSize; - private long containerID; - private ContainerLifeCycleState state; - private ContainerType containerType; - private String containerDBType; - - - /** - * Number of pending deletion blocks in container. - */ - private int numPendingDeletionBlocks; - private long deleteTransactionId; - private AtomicLong readBytes; - private AtomicLong writeBytes; - private AtomicLong readCount; - private AtomicLong writeCount; - - - /** - * Constructs a ContainerData Object. - * - * @param containerID - ID - * @param conf - Configuration - */ - public ContainerData(long containerID, - Configuration conf) { - this.metadata = new TreeMap<>(); - this.maxSize = conf.getLong(ScmConfigKeys.SCM_CONTAINER_CLIENT_MAX_SIZE_KEY, - ScmConfigKeys.SCM_CONTAINER_CLIENT_MAX_SIZE_DEFAULT) * OzoneConsts.GB; - this.bytesUsed = new AtomicLong(0L); - this.containerID = containerID; - this.state = ContainerLifeCycleState.OPEN; - this.numPendingDeletionBlocks = 0; - this.deleteTransactionId = 0; - this.readCount = new AtomicLong(0L); - this.readBytes = new AtomicLong(0L); - this.writeCount = new AtomicLong(0L); - this.writeBytes = new AtomicLong(0L); - } - - /** - * Constructs a ContainerData Object. - * - * @param containerID - ID - * @param conf - Configuration - * @param state - ContainerLifeCycleState - * @param - */ - public ContainerData(long containerID, Configuration conf, - ContainerLifeCycleState state) { - this.metadata = new TreeMap<>(); - this.maxSize = conf.getLong(ScmConfigKeys.SCM_CONTAINER_CLIENT_MAX_SIZE_KEY, - ScmConfigKeys.SCM_CONTAINER_CLIENT_MAX_SIZE_DEFAULT) * OzoneConsts.GB; - this.bytesUsed = new AtomicLong(0L); - this.containerID = containerID; - this.state = state; - this.numPendingDeletionBlocks = 0; - this.deleteTransactionId = 0; - this.readCount = new AtomicLong(0L); - this.readBytes = new AtomicLong(0L); - this.writeCount = new AtomicLong(0L); - this.writeBytes = new AtomicLong(0L); - } - - /** - * Constructs a ContainerData object from ProtoBuf classes. - * - * @param protoData - ProtoBuf Message - * @throws IOException - */ - public static ContainerData getFromProtBuf( - ContainerProtos.ContainerData protoData, Configuration conf) - throws IOException { - ContainerData data = new ContainerData( - protoData.getContainerID(), conf); - for (int x = 0; x < protoData.getMetadataCount(); x++) { - data.addMetadata(protoData.getMetadata(x).getKey(), - protoData.getMetadata(x).getValue()); - } - - if (protoData.hasContainerPath()) { - data.setContainerPath(protoData.getContainerPath()); - } - - if (protoData.hasDbPath()) { - data.setDBPath(protoData.getDbPath()); - } - - if (protoData.hasState()) { - data.setState(protoData.getState()); - } - - if (protoData.hasBytesUsed()) { - data.setBytesUsed(protoData.getBytesUsed()); - } - - if (protoData.hasSize()) { - data.setMaxSize(protoData.getSize()); - } - - if(protoData.hasContainerType()) { - data.setContainerType(protoData.getContainerType()); - } - - if(protoData.hasContainerDBType()) { - data.setContainerDBType(protoData.getContainerDBType()); - } - - return data; - } - - public String getContainerDBType() { - return containerDBType; - } - - public void setContainerDBType(String containerDBType) { - this.containerDBType = containerDBType; - } - - /** - * Returns a ProtoBuf Message from ContainerData. - * - * @return Protocol Buffer Message - */ - public ContainerProtos.ContainerData getProtoBufMessage() { - ContainerProtos.ContainerData.Builder builder = ContainerProtos - .ContainerData.newBuilder(); - builder.setContainerID(this.getContainerID()); - - if (this.getDBPath() != null) { - builder.setDbPath(this.getDBPath()); - } - - if (this.getContainerPath() != null) { - builder.setContainerPath(this.getContainerPath()); - } - - builder.setState(this.getState()); - - for (Map.Entry entry : metadata.entrySet()) { - ContainerProtos.KeyValue.Builder keyValBuilder = - ContainerProtos.KeyValue.newBuilder(); - builder.addMetadata(keyValBuilder.setKey(entry.getKey()) - .setValue(entry.getValue()).build()); - } - - if (this.getBytesUsed() >= 0) { - builder.setBytesUsed(this.getBytesUsed()); - } - - if (this.getKeyCount() >= 0) { - builder.setKeyCount(this.getKeyCount()); - } - - if (this.getMaxSize() >= 0) { - builder.setSize(this.getMaxSize()); - } - - if(this.getContainerType() != null) { - builder.setContainerType(containerType); - } - - if(this.getContainerDBType() != null) { - builder.setContainerDBType(containerDBType); - } - - return builder.build(); - } - - public void setContainerType(ContainerType containerType) { - this.containerType = containerType; - } - - public ContainerType getContainerType() { - return this.containerType; - } - /** - * Adds metadata. - */ - public void addMetadata(String key, String value) throws IOException { - synchronized (this.metadata) { - if (this.metadata.containsKey(key)) { - throw new IOException("This key already exists. Key " + key); - } - metadata.put(key, value); - } - } - - /** - * Returns all metadata. - */ - public Map getAllMetadata() { - synchronized (this.metadata) { - return Collections.unmodifiableMap(this.metadata); - } - } - - /** - * Returns value of a key. - */ - public String getValue(String key) { - synchronized (this.metadata) { - return metadata.get(key); - } - } - - /** - * Deletes a metadata entry from the map. - * - * @param key - Key - */ - public void deleteKey(String key) { - synchronized (this.metadata) { - metadata.remove(key); - } - } - - /** - * Returns path. - * - * @return - path - */ - public String getDBPath() { - return dbPath; - } - - /** - * Sets path. - * - * @param path - String. - */ - public void setDBPath(String path) { - this.dbPath = path; - } - - /** - * This function serves as the generic key for ContainerCache class. Both - * ContainerData and ContainerKeyData overrides this function to appropriately - * return the right name that can be used in ContainerCache. - * - * @return String Name. - */ - // TODO: check the ContainerCache class to see if - // we are using the ContainerID instead. - /* - public String getName() { - return getContainerID(); - }*/ - - /** - * Get container file path. - * @return - Physical path where container file and checksum is stored. - */ - public String getContainerPath() { - return containerFilePath; - } - - /** - * Set container Path. - * @param containerPath - File path. - */ - public void setContainerPath(String containerPath) { - this.containerFilePath = containerPath; - } - - /** - * Get container ID. - * @return - container ID. - */ - public synchronized long getContainerID() { - return containerID; - } - - public synchronized void setState(ContainerLifeCycleState state) { - this.state = state; - } - - public synchronized ContainerLifeCycleState getState() { - return this.state; - } - - /** - * checks if the container is open. - * @return - boolean - */ - public synchronized boolean isOpen() { - return ContainerLifeCycleState.OPEN == state; - } - - /** - * checks if the container is invalid. - * @return - boolean - */ - public boolean isValid() { - return !(ContainerLifeCycleState.INVALID == state); - } - - /** - * checks if the container is closed. - * @return - boolean - */ - public synchronized boolean isClosed() { - return ContainerLifeCycleState.CLOSED == state; - } - - /** - * Marks this container as closed. - */ - public synchronized void closeContainer() { - // TODO: closed or closing here - setState(ContainerLifeCycleState.CLOSED); - - } - - public void setMaxSize(long maxSize) { - this.maxSize = maxSize; - } - - public long getMaxSize() { - return maxSize; - } - - public long getKeyCount() { - return metadata.size(); - } - - public void setBytesUsed(long used) { - this.bytesUsed.set(used); - } - - /** - * Get the number of bytes used by the container. - * @return the number of bytes used by the container. - */ - public long getBytesUsed() { - return bytesUsed.get(); - } - - /** - * Increase the number of bytes used by the container. - * @param used number of bytes used by the container. - * @return the current number of bytes used by the container afert increase. - */ - public long incrBytesUsed(long used) { - return this.bytesUsed.addAndGet(used); - } - - - /** - * Decrease the number of bytes used by the container. - * @param reclaimed the number of bytes reclaimed from the container. - * @return the current number of bytes used by the container after decrease. - */ - public long decrBytesUsed(long reclaimed) { - return this.bytesUsed.addAndGet(-1L * reclaimed); - } - - /** - * Increase the count of pending deletion blocks. - * - * @param numBlocks increment number - */ - public void incrPendingDeletionBlocks(int numBlocks) { - this.numPendingDeletionBlocks += numBlocks; - } - - /** - * Decrease the count of pending deletion blocks. - * - * @param numBlocks decrement number - */ - public void decrPendingDeletionBlocks(int numBlocks) { - this.numPendingDeletionBlocks -= numBlocks; - } - - /** - * Get the number of pending deletion blocks. - */ - public int getNumPendingDeletionBlocks() { - return this.numPendingDeletionBlocks; - } - - /** - * Sets deleteTransactionId to latest delete transactionId for the container. - * - * @param transactionId latest transactionId of the container. - */ - public void updateDeleteTransactionId(long transactionId) { - deleteTransactionId = max(transactionId, deleteTransactionId); - } - - /** - * Return the latest deleteTransactionId of the container. - */ - public long getDeleteTransactionId() { - return deleteTransactionId; - } - - /** - * Get the number of bytes read from the container. - * @return the number of bytes read from the container. - */ - public long getReadBytes() { - return readBytes.get(); - } - - /** - * Increase the number of bytes read from the container. - * @param bytes number of bytes read. - */ - public void incrReadBytes(long bytes) { - this.readBytes.addAndGet(bytes); - } - - /** - * Get the number of times the container is read. - * @return the number of times the container is read. - */ - public long getReadCount() { - return readCount.get(); - } - - /** - * Increase the number of container read count by 1. - */ - public void incrReadCount() { - this.readCount.incrementAndGet(); - } - - /** - * Get the number of bytes write into the container. - * @return the number of bytes write into the container. - */ - public long getWriteBytes() { - return writeBytes.get(); - } - - /** - * Increase the number of bytes write into the container. - * @param bytes the number of bytes write into the container. - */ - public void incrWriteBytes(long bytes) { - this.writeBytes.addAndGet(bytes); - } - - /** - * Get the number of writes into the container. - * @return the number of writes into the container. - */ - public long getWriteCount() { - return writeCount.get(); - } - - /** - * Increase the number of writes into the container by 1. - */ - public void incrWriteCount() { - this.writeCount.incrementAndGet(); - } - - -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java index 714db598d7c..2879001c28b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java @@ -94,7 +94,7 @@ public static ContainerMetrics create(Configuration conf) { new ContainerMetrics(intervals)); } - public void incContainerOpcMetrics(ContainerProtos.Type type){ + public void incContainerOpsMetrics(ContainerProtos.Type type) { numOps.incr(); numOpsArray[type.ordinal()].incr(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerReport.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerReport.java index b2427549cf6..a4c1f2f4678 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerReport.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerReport.java @@ -20,7 +20,6 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerInfo; -import static java.lang.Math.max; /** * Container Report iterates the closed containers and sends a container report @@ -37,7 +36,6 @@ private long readBytes; private long writeBytes; private long containerID; - private long deleteTransactionId; public long getContainerID() { return containerID; @@ -47,9 +45,6 @@ public void setContainerID(long containerID) { this.containerID = containerID; } - - - /** * Constructs the ContainerReport. * @@ -66,7 +61,6 @@ public ContainerReport(long containerID, String finalhash) { this.readBytes = 0L; this.writeCount = 0L; this.writeBytes = 0L; - this.deleteTransactionId = 0; } /** @@ -100,9 +94,6 @@ public static ContainerReport getFromProtoBuf(ContainerInfo info) { if (info.hasWriteBytes()) { report.setWriteBytes(info.getWriteBytes()); } - if (info.hasDeleteTransactionId()) { - report.updateDeleteTransactionId(info.getDeleteTransactionId()); - } report.setContainerID(info.getContainerID()); return report; @@ -193,10 +184,6 @@ public void setBytesUsed(long bytesUsed) { this.bytesUsed = bytesUsed; } - public void updateDeleteTransactionId(long transactionId) { - this.deleteTransactionId = max(transactionId, deleteTransactionId); - } - /** * Gets a containerInfo protobuf message from ContainerReports. * @@ -213,7 +200,6 @@ public ContainerInfo getProtoBufMessage() { .setWriteBytes(this.getWriteBytes()) .setFinalhash(this.getFinalhash()) .setContainerID(this.getContainerID()) - .setDeleteTransactionId(this.deleteTransactionId) .build(); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java index 9b5231664fb..d96849e3c35 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java @@ -19,18 +19,23 @@ package org.apache.hadoop.ozone.container.common.helpers; import com.google.common.base.Preconditions; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.hadoop.fs.FileAlreadyExistsException; -import org.apache.hadoop.hdds.scm.container.common.helpers - .StorageContainerException; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.container.common.impl.ContainerManagerImpl; -import org.apache.hadoop.utils.MetadataStore; -import org.apache.hadoop.utils.MetadataStoreBuilder; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,16 +43,16 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; -import java.nio.file.Path; import java.nio.file.Paths; +import org.yaml.snakeyaml.Yaml; import static org.apache.commons.io.FilenameUtils.removeExtension; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result - .INVALID_ARGUMENT; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result - .UNABLE_TO_FIND_DATA_DIR; -import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_EXTENSION; - +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.CONTAINER_CHECKSUM_ERROR; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.NO_SUCH_ALGORITHM; +import static org.apache.hadoop.ozone.container.common.impl.ContainerData + .CHARSET_ENCODING; /** * A set of helper functions to create proper responses. @@ -59,38 +64,49 @@ private ContainerUtils() { } /** - * Returns a CreateContainer Response. This call is used by create and delete - * containers which have null success responses. - * - * @param msg Request - * @return Response. + * Returns a Container Command Response Builder with the specified result + * and message. + * @param request requestProto message. + * @param result result of the command. + * @param message response message. + * @return ContainerCommand Response Builder. */ - public static ContainerProtos.ContainerCommandResponseProto - getContainerResponse(ContainerProtos.ContainerCommandRequestProto msg) { - ContainerProtos.ContainerCommandResponseProto.Builder builder = - getContainerResponse(msg, ContainerProtos.Result.SUCCESS, ""); - return builder.build(); + public static ContainerCommandResponseProto.Builder + getContainerCommandResponse( + ContainerCommandRequestProto request, Result result, String message) { + return ContainerCommandResponseProto.newBuilder() + .setCmdType(request.getCmdType()) + .setTraceID(request.getTraceID()) + .setResult(result) + .setMessage(message); } /** - * Returns a ReadContainer Response. - * - * @param msg Request - * @param containerData - data - * @return Response. + * Returns a Container Command Response Builder. This call is used to build + * success responses. Calling function can add other fields to the response + * as required. + * @param request requestProto message. + * @return ContainerCommand Response Builder with result as SUCCESS. */ - public static ContainerProtos.ContainerCommandResponseProto - getReadContainerResponse(ContainerProtos.ContainerCommandRequestProto msg, - ContainerData containerData) { - Preconditions.checkNotNull(containerData); - - ContainerProtos.ReadContainerResponseProto.Builder response = - ContainerProtos.ReadContainerResponseProto.newBuilder(); - response.setContainerData(containerData.getProtoBufMessage()); + public static ContainerCommandResponseProto.Builder getSuccessResponseBuilder( + ContainerCommandRequestProto request) { + return + ContainerCommandResponseProto.newBuilder() + .setCmdType(request.getCmdType()) + .setTraceID(request.getTraceID()) + .setResult(Result.SUCCESS); + } - ContainerProtos.ContainerCommandResponseProto.Builder builder = - getContainerResponse(msg, ContainerProtos.Result.SUCCESS, ""); - builder.setReadContainer(response); + /** + * Returns a Container Command Response. This call is used for creating null + * success responses. + * @param request requestProto message. + * @return ContainerCommand Response with result as SUCCESS. + */ + public static ContainerCommandResponseProto getSuccessResponse( + ContainerCommandRequestProto request) { + ContainerCommandResponseProto.Builder builder = + getContainerCommandResponse(request, Result.SUCCESS, ""); return builder.build(); } @@ -98,37 +114,25 @@ private ContainerUtils() { * We found a command type but no associated payload for the command. Hence * return malformed Command as response. * - * @param msg - Protobuf message. - * @param result - result - * @param message - Error message. + * @param request - Protobuf message. * @return ContainerCommandResponseProto - MALFORMED_REQUEST. */ - public static ContainerProtos.ContainerCommandResponseProto.Builder - getContainerResponse(ContainerProtos.ContainerCommandRequestProto msg, - ContainerProtos.Result result, String message) { - return - ContainerProtos.ContainerCommandResponseProto.newBuilder() - .setCmdType(msg.getCmdType()) - .setTraceID(msg.getTraceID()) - .setResult(result) - .setMessage(message); + public static ContainerCommandResponseProto malformedRequest( + ContainerCommandRequestProto request) { + return getContainerCommandResponse(request, Result.MALFORMED_REQUEST, + "Cmd type does not match the payload.").build(); } /** - * Logs the error and returns a response to the caller. + * We found a command type that is not supported yet. * - * @param log - Logger - * @param ex - Exception - * @param msg - Request Object - * @return Response + * @param request - Protobuf message. + * @return ContainerCommandResponseProto - UNSUPPORTED_REQUEST. */ - public static ContainerProtos.ContainerCommandResponseProto logAndReturnError( - Logger log, StorageContainerException ex, - ContainerProtos.ContainerCommandRequestProto msg) { - log.info("Operation: {} : Trace ID: {} : Message: {} : Result: {}", - msg.getCmdType().name(), msg.getTraceID(), - ex.getMessage(), ex.getResult().getValueDescriptor().getName()); - return getContainerResponse(msg, ex.getResult(), ex.getMessage()).build(); + public static ContainerCommandResponseProto unsupportedRequest( + ContainerCommandRequestProto request) { + return getContainerCommandResponse(request, Result.UNSUPPORTED_REQUEST, + "Server does not support this command yet.").build(); } /** @@ -136,40 +140,17 @@ private ContainerUtils() { * * @param log - Logger * @param ex - Exception - * @param msg - Request Object + * @param request - Request Object * @return Response */ - public static ContainerProtos.ContainerCommandResponseProto logAndReturnError( - Logger log, RuntimeException ex, - ContainerProtos.ContainerCommandRequestProto msg) { - log.info("Operation: {} : Trace ID: {} : Message: {} ", - msg.getCmdType().name(), msg.getTraceID(), ex.getMessage()); - return getContainerResponse(msg, INVALID_ARGUMENT, ex.getMessage()).build(); - } - - /** - * We found a command type but no associated payload for the command. Hence - * return malformed Command as response. - * - * @param msg - Protobuf message. - * @return ContainerCommandResponseProto - MALFORMED_REQUEST. - */ - public static ContainerProtos.ContainerCommandResponseProto - malformedRequest(ContainerProtos.ContainerCommandRequestProto msg) { - return getContainerResponse(msg, ContainerProtos.Result.MALFORMED_REQUEST, - "Cmd type does not match the payload.").build(); - } - - /** - * We found a command type that is not supported yet. - * - * @param msg - Protobuf message. - * @return ContainerCommandResponseProto - MALFORMED_REQUEST. - */ - public static ContainerProtos.ContainerCommandResponseProto - unsupportedRequest(ContainerProtos.ContainerCommandRequestProto msg) { - return getContainerResponse(msg, ContainerProtos.Result.UNSUPPORTED_REQUEST, - "Server does not support this command yet.").build(); + public static ContainerCommandResponseProto logAndReturnError( + Logger log, StorageContainerException ex, + ContainerCommandRequestProto request) { + log.info("Operation: {} : Trace ID: {} : Message: {} : Result: {}", + request.getCmdType().name(), request.getTraceID(), + ex.getMessage(), ex.getResult().getValueDescriptor().getName()); + return getContainerCommandResponse(request, ex.getResult(), ex.getMessage()) + .build(); } /** @@ -191,189 +172,28 @@ public static long getContainerIDFromFile(File containerFile) { } /** - * Verifies that this in indeed a new container. + * Verifies that this is indeed a new container. * * @param containerFile - Container File to verify * @throws IOException */ - public static void verifyIsNewContainer(File containerFile) - throws IOException { - Logger log = LoggerFactory.getLogger(ContainerManagerImpl.class); - if (containerFile.exists()) { - log.error("container already exists on disk. File: {}", - containerFile.toPath()); + public static void verifyIsNewContainer(File containerFile) throws + FileAlreadyExistsException { + Logger log = LoggerFactory.getLogger(ContainerSet.class); + Preconditions.checkNotNull(containerFile, "containerFile Should not be " + + "null"); + if (containerFile.getParentFile().exists()) { + log.error("Container already exists on disk. File: {}", containerFile + .toPath()); throw new FileAlreadyExistsException("container already exists on " + "disk."); } - - File parentPath = new File(containerFile.getParent()); - - if (!parentPath.exists() && !parentPath.mkdirs()) { - log.error("Unable to create parent path. Path: {}", - parentPath.toString()); - throw new IOException("Unable to create container directory."); - } - - if (!containerFile.createNewFile()) { - log.error("creation of a new container file failed. File: {}", - containerFile.toPath()); - throw new IOException("creation of a new container file failed."); - } - } public static String getContainerDbFileName(String containerName) { return containerName + OzoneConsts.DN_CONTAINER_DB; } - /** - * creates a Metadata DB for the specified container. - * - * @param containerPath - Container Path. - * @throws IOException - */ - public static Path createMetadata(Path containerPath, String containerName, - Configuration conf) - throws IOException { - Logger log = LoggerFactory.getLogger(ContainerManagerImpl.class); - Preconditions.checkNotNull(containerPath); - Path metadataPath = containerPath.resolve(OzoneConsts.CONTAINER_META_PATH); - if (!metadataPath.toFile().mkdirs()) { - log.error("Unable to create directory for metadata storage. Path: {}", - metadataPath); - throw new IOException("Unable to create directory for metadata storage." + - " Path: " + metadataPath); - } - MetadataStore store = MetadataStoreBuilder.newBuilder() - .setConf(conf) - .setCreateIfMissing(true) - .setDbFile(metadataPath - .resolve(getContainerDbFileName(containerName)).toFile()) - .build(); - - // we close since the SCM pre-creates containers. - // we will open and put Db handle into a cache when keys are being created - // in a container. - - store.close(); - - Path dataPath = containerPath.resolve(OzoneConsts.CONTAINER_DATA_PATH); - if (!dataPath.toFile().mkdirs()) { - - // If we failed to create data directory, we cleanup the - // metadata directory completely. That is, we will delete the - // whole directory including LevelDB file. - log.error("Unable to create directory for data storage. cleaning up the" + - " container path: {} dataPath: {}", - containerPath, dataPath); - FileUtils.deleteDirectory(containerPath.toFile()); - throw new IOException("Unable to create directory for data storage." + - " Path: " + dataPath); - } - return metadataPath; - } - - /** - * Returns container file location. - * - * @param containerData - Data - * @param location - Root path - * @return Path - */ - public static File getContainerFile(ContainerData containerData, - Path location) { - return location.resolve(Long.toString(containerData - .getContainerID()).concat(CONTAINER_EXTENSION)) - .toFile(); - } - - /** - * Container metadata directory -- here is where the level DB lives. - * - * @param cData - cData. - * @return Path to the parent directory where the DB lives. - */ - public static Path getMetadataDirectory(ContainerData cData) { - Path dbPath = Paths.get(cData.getDBPath()); - Preconditions.checkNotNull(dbPath); - Preconditions.checkState(dbPath.toString().length() > 0); - return dbPath.getParent(); - } - - /** - * Returns the path where data or chunks live for a given container. - * - * @param cData - cData container - * @return - Path - * @throws StorageContainerException - */ - public static Path getDataDirectory(ContainerData cData) - throws StorageContainerException { - Path path = getMetadataDirectory(cData); - Preconditions.checkNotNull(path); - Path parentPath = path.getParent(); - if (parentPath == null) { - throw new StorageContainerException("Unable to get Data directory." - + path, UNABLE_TO_FIND_DATA_DIR); - } - return parentPath.resolve(OzoneConsts.CONTAINER_DATA_PATH); - } - - /** - * remove Container if it is empty. - *

- * There are three things we need to delete. - *

- * 1. Container file and metadata file. 2. The Level DB file 3. The path that - * we created on the data location. - * - * @param containerData - Data of the container to remove. - * @param conf - configuration of the cluster. - * @param forceDelete - whether this container should be deleted forcibly. - * @throws IOException - */ - public static void removeContainer(ContainerData containerData, - Configuration conf, boolean forceDelete) throws IOException { - Preconditions.checkNotNull(containerData); - Path dbPath = Paths.get(containerData.getDBPath()); - - MetadataStore db = KeyUtils.getDB(containerData, conf); - // If the container is not empty and cannot be deleted forcibly, - // then throw a SCE to stop deleting. - if(!forceDelete && !db.isEmpty()) { - throw new StorageContainerException( - "Container cannot be deleted because it is not empty.", - ContainerProtos.Result.ERROR_CONTAINER_NOT_EMPTY); - } - // Close the DB connection and remove the DB handler from cache - KeyUtils.removeDB(containerData, conf); - - // Delete the DB File. - FileUtils.forceDelete(dbPath.toFile()); - dbPath = dbPath.getParent(); - - // Delete all Metadata in the Data directories for this containers. - if (dbPath != null) { - FileUtils.deleteDirectory(dbPath.toFile()); - dbPath = dbPath.getParent(); - } - - // now delete the container directory, this means that all key data dirs - // will be removed too. - if (dbPath != null) { - FileUtils.deleteDirectory(dbPath.toFile()); - } - - // Delete the container metadata from the metadata locations. - String rootPath = getContainerNameFromFile(new File(containerData - .getContainerPath())); - Path containerPath = Paths.get(rootPath.concat(CONTAINER_EXTENSION)); - - - FileUtils.forceDelete(containerPath.toFile()); - - } - /** * Persistent a {@link DatanodeDetails} to a local file. * @@ -418,4 +238,68 @@ public synchronized static DatanodeDetails readDatanodeDetailsFrom(File path) + path.getAbsolutePath(), e); } } + + /** + * Verify that the checksum stored in containerData is equal to the + * computed checksum. + * @param containerData + * @throws IOException + */ + public static void verifyChecksum(ContainerData containerData) + throws IOException { + String storedChecksum = containerData.getChecksum(); + + Yaml yaml = ContainerDataYaml.getYamlForContainerType( + containerData.getContainerType()); + containerData.computeAndSetChecksum(yaml); + String computedChecksum = containerData.getChecksum(); + + if (storedChecksum == null || !storedChecksum.equals(computedChecksum)) { + throw new StorageContainerException("Container checksum error for " + + "ContainerID: " + containerData.getContainerID() + ". " + + "\nStored Checksum: " + storedChecksum + + "\nExpected Checksum: " + computedChecksum, + CONTAINER_CHECKSUM_ERROR); + } + } + + /** + * Return the SHA-256 chesksum of the containerData. + * @param containerDataYamlStr ContainerData as a Yaml String + * @return Checksum of the container data + * @throws StorageContainerException + */ + public static String getChecksum(String containerDataYamlStr) + throws StorageContainerException { + MessageDigest sha; + try { + sha = MessageDigest.getInstance(OzoneConsts.FILE_HASH); + sha.update(containerDataYamlStr.getBytes(CHARSET_ENCODING)); + return DigestUtils.sha256Hex(sha.digest()); + } catch (NoSuchAlgorithmException e) { + throw new StorageContainerException("Unable to create Message Digest, " + + "usually this is a java configuration issue.", NO_SUCH_ALGORITHM); + } + } + + /** + * Get the .container file from the containerBaseDir. + * @param containerBaseDir container base directory. The name of this + * directory is same as the containerID + * @return the .container file + */ + public static File getContainerFile(File containerBaseDir) { + // Container file layout is + // .../<>/metadata/<>.container + String containerFilePath = OzoneConsts.CONTAINER_META_PATH + File.separator + + getContainerID(containerBaseDir) + OzoneConsts.CONTAINER_EXTENSION; + return new File(containerBaseDir, containerFilePath); + } + + /** + * ContainerID can be decoded from the container base directory name. + */ + public static long getContainerID(File containerBaseDir) { + return Long.parseLong(containerBaseDir.getName()); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeVersionFile.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeVersionFile.java new file mode 100644 index 00000000000..4db6d3120fd --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeVersionFile.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.container.common.helpers; + +import org.apache.hadoop.ozone.OzoneConsts; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.util.Properties; + +/** + * This is a utility class which helps to create the version file on datanode + * and also validate the content of the version file. + */ +public class DatanodeVersionFile { + + private final String storageId; + private final String clusterId; + private final String datanodeUuid; + private final long cTime; + private final int layOutVersion; + + public DatanodeVersionFile(String storageId, String clusterId, + String datanodeUuid, long cTime, int layOutVersion) { + this.storageId = storageId; + this.clusterId = clusterId; + this.datanodeUuid = datanodeUuid; + this.cTime = cTime; + this.layOutVersion = layOutVersion; + } + + private Properties createProperties() { + Properties properties = new Properties(); + properties.setProperty(OzoneConsts.STORAGE_ID, storageId); + properties.setProperty(OzoneConsts.CLUSTER_ID, clusterId); + properties.setProperty(OzoneConsts.DATANODE_UUID, datanodeUuid); + properties.setProperty(OzoneConsts.CTIME, String.valueOf(cTime)); + properties.setProperty(OzoneConsts.LAYOUTVERSION, String.valueOf( + layOutVersion)); + return properties; + } + + /** + * Creates a version File in specified path. + * @param path + * @throws IOException + */ + public void createVersionFile(File path) throws + IOException { + try (RandomAccessFile file = new RandomAccessFile(path, "rws"); + FileOutputStream out = new FileOutputStream(file.getFD())) { + file.getChannel().truncate(0); + Properties properties = createProperties(); + /* + * If server is interrupted before this line, + * the version file will remain unchanged. + */ + properties.store(out, null); + } + } + + + /** + * Creates a property object from the specified file content. + * @param versionFile + * @return Properties + * @throws IOException + */ + public static Properties readFrom(File versionFile) throws IOException { + try (RandomAccessFile file = new RandomAccessFile(versionFile, "rws"); + FileInputStream in = new FileInputStream(file.getFD())) { + Properties props = new Properties(); + props.load(in); + return props; + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyUtils.java deleted file mode 100644 index f831d455b6a..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyUtils.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.ozone.container.common.helpers; - -import com.google.common.base.Preconditions; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.scm.container.common.helpers - .StorageContainerException; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.ozone.container.common.utils.ContainerCache; -import org.apache.hadoop.utils.MetadataStore; - -import java.io.IOException; -import java.nio.charset.Charset; - -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.NO_SUCH_KEY; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.UNABLE_TO_READ_METADATA_DB; - -/** - * Utils functions to help key functions. - */ -public final class KeyUtils { - public static final String ENCODING_NAME = "UTF-8"; - public static final Charset ENCODING = Charset.forName(ENCODING_NAME); - - /** - * Never Constructed. - */ - private KeyUtils() { - } - - /** - * Get a DB handler for a given container. - * If the handler doesn't exist in cache yet, first create one and - * add into cache. This function is called with containerManager - * ReadLock held. - * - * @param container container. - * @param conf configuration. - * @return MetadataStore handle. - * @throws StorageContainerException - */ - public static MetadataStore getDB(ContainerData container, - Configuration conf) throws StorageContainerException { - Preconditions.checkNotNull(container); - ContainerCache cache = ContainerCache.getInstance(conf); - Preconditions.checkNotNull(cache); - try { - return cache.getDB(container.getContainerID(), container.getDBPath()); - } catch (IOException ex) { - String message = - String.format("Unable to open DB. DB Name: %s, Path: %s. ex: %s", - container.getContainerID(), container.getDBPath(), ex.getMessage()); - throw new StorageContainerException(message, UNABLE_TO_READ_METADATA_DB); - } - } - - /** - * Remove a DB handler from cache. - * - * @param container - Container data. - * @param conf - Configuration. - */ - public static void removeDB(ContainerData container, - Configuration conf) { - Preconditions.checkNotNull(container); - ContainerCache cache = ContainerCache.getInstance(conf); - Preconditions.checkNotNull(cache); - cache.removeDB(container.getContainerID()); - } - /** - * Shutdown all DB Handles. - * - * @param cache - Cache for DB Handles. - */ - @SuppressWarnings("unchecked") - public static void shutdownCache(ContainerCache cache) { - cache.shutdownCache(); - } - - /** - * Returns successful keyResponse. - * @param msg - Request. - * @return Response. - */ - public static ContainerProtos.ContainerCommandResponseProto - getKeyResponse(ContainerProtos.ContainerCommandRequestProto msg) { - return ContainerUtils.getContainerResponse(msg); - } - - - public static ContainerProtos.ContainerCommandResponseProto - getKeyDataResponse(ContainerProtos.ContainerCommandRequestProto msg, - KeyData data) { - ContainerProtos.GetKeyResponseProto.Builder getKey = ContainerProtos - .GetKeyResponseProto.newBuilder(); - getKey.setKeyData(data.getProtoBufMessage()); - ContainerProtos.ContainerCommandResponseProto.Builder builder = - ContainerUtils.getContainerResponse(msg, ContainerProtos.Result - .SUCCESS, ""); - builder.setGetKey(getKey); - return builder.build(); - } - - /** - * Parses the key name from a bytes array. - * @param bytes key name in bytes. - * @return key name string. - */ - public static String getKeyName(byte[] bytes) { - return new String(bytes, ENCODING); - } - - /** - * Parses the {@link KeyData} from a bytes array. - * - * @param bytes key data in bytes. - * @return key data. - * @throws IOException if the bytes array is malformed or invalid. - */ - public static KeyData getKeyData(byte[] bytes) throws IOException { - try { - ContainerProtos.KeyData kd = ContainerProtos.KeyData.parseFrom(bytes); - KeyData data = KeyData.getFromProtoBuf(kd); - return data; - } catch (IOException e) { - throw new StorageContainerException("Failed to parse key data from the" + - " bytes array.", NO_SUCH_KEY); - } - } -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyValueContainerReport.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyValueContainerReport.java new file mode 100644 index 00000000000..b03487b5beb --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/KeyValueContainerReport.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.common.helpers; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerInfo; + +import static java.lang.Math.max; + +/** + * KeyValueContainer Report iterates the closed containers and sends a + * container report to SCM. + */ +public class KeyValueContainerReport extends ContainerReport{ + private long deleteTransactionId; + + /** + * Constructs the KeyValueContainerReport. + * + * @param containerID - Container ID. + * @param finalhash - Final Hash. + */ + public KeyValueContainerReport(long containerID, String finalhash) { + super(containerID, finalhash); + this.deleteTransactionId = 0; + } + + /** + * Sets the deleteTransactionId if it is greater than existing. + * @param transactionId - deleteTransactionId + */ + public void updateDeleteTransactionId(long transactionId) { + this.deleteTransactionId = max(transactionId, deleteTransactionId); + } + + /** + * Gets the deleteTransactionId. + * @return - deleteTransactionId. + */ + public long getDeleteTransactionId() { + return this.deleteTransactionId; + } + + /** + * Gets a containerReport from protobuf class. + * + * @param info - ContainerInfo. + * @return - ContainerReport. + */ + public static KeyValueContainerReport getFromProtoBuf(ContainerInfo info) { + Preconditions.checkNotNull(info); + KeyValueContainerReport report = new KeyValueContainerReport( + info.getContainerID(), info.getFinalhash()); + if (info.hasSize()) { + report.setSize(info.getSize()); + } + if (info.hasKeyCount()) { + report.setKeyCount(info.getKeyCount()); + } + if (info.hasUsed()) { + report.setBytesUsed(info.getUsed()); + } + if (info.hasReadCount()) { + report.setReadCount(info.getReadCount()); + } + if (info.hasReadBytes()) { + report.setReadBytes(info.getReadBytes()); + } + if (info.hasWriteCount()) { + report.setWriteCount(info.getWriteCount()); + } + if (info.hasWriteBytes()) { + report.setWriteBytes(info.getWriteBytes()); + } + if (info.hasDeleteTransactionId()) { + report.updateDeleteTransactionId(info.getDeleteTransactionId()); + } + report.setContainerID(info.getContainerID()); + return report; + } + + /** + * Gets a containerInfo protobuf message from ContainerReports. + * + * @return ContainerInfo + */ + @Override + public ContainerInfo getProtoBufMessage() { + return ContainerInfo.newBuilder() + .setKeyCount(this.getKeyCount()) + .setSize(this.getSize()) + .setUsed(this.getBytesUsed()) + .setReadCount(this.getReadCount()) + .setReadBytes(this.getReadBytes()) + .setWriteCount(this.getWriteCount()) + .setWriteBytes(this.getWriteBytes()) + .setFinalhash(this.getFinalhash()) + .setContainerID(this.getContainerID()) + .setDeleteTransactionId(this.getDeleteTransactionId()) + .build(); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ChunkLayOutVersion.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ChunkLayOutVersion.java new file mode 100644 index 00000000000..d1b1bd66493 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ChunkLayOutVersion.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.common.impl; + + +import com.google.common.base.Preconditions; + +/** + * Defines layout versions for the Chunks. + */ + +public final class ChunkLayOutVersion { + + private final static ChunkLayOutVersion[] CHUNK_LAYOUT_VERSION_INFOS = + {new ChunkLayOutVersion(1, "Data without checksums.")}; + + private int version; + private String description; + + + /** + * Never created outside this class. + * + * @param description -- description + * @param version -- version number + */ + private ChunkLayOutVersion(int version, String description) { + this.version = version; + this.description = description; + } + + /** + * Return ChunkLayOutVersion object for the chunkVersion. + * @param chunkVersion + * @return ChunkLayOutVersion + */ + public static ChunkLayOutVersion getChunkLayOutVersion(int chunkVersion) { + Preconditions.checkArgument((chunkVersion <= ChunkLayOutVersion + .getLatestVersion().getVersion())); + for(ChunkLayOutVersion chunkLayOutVersion : CHUNK_LAYOUT_VERSION_INFOS) { + if(chunkLayOutVersion.getVersion() == chunkVersion) { + return chunkLayOutVersion; + } + } + return null; + } + + /** + * Returns all versions. + * + * @return Version info array. + */ + public static ChunkLayOutVersion[] getAllVersions() { + return CHUNK_LAYOUT_VERSION_INFOS.clone(); + } + + /** + * Returns the latest version. + * + * @return versionInfo + */ + public static ChunkLayOutVersion getLatestVersion() { + return CHUNK_LAYOUT_VERSION_INFOS[CHUNK_LAYOUT_VERSION_INFOS.length - 1]; + } + + /** + * Return version. + * + * @return int + */ + public int getVersion() { + return version; + } + + /** + * Returns description. + * @return String + */ + public String getDescription() { + return description; + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ChunkManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ChunkManagerImpl.java deleted file mode 100644 index fa820266026..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ChunkManagerImpl.java +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.ozone.container.common.impl; - -import com.google.common.base.Preconditions; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hdds.scm.container.common.helpers - .StorageContainerException; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; -import org.apache.hadoop.ozone.container.common.helpers.ChunkUtils; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; -import org.apache.hadoop.ozone.container.common.interfaces.ChunkManager; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerManager; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; -import java.security.NoSuchAlgorithmException; -import java.util.concurrent.ExecutionException; - -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.CONTAINER_INTERNAL_ERROR; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.UNSUPPORTED_REQUEST; - -/** - * An implementation of ChunkManager that is used by default in ozone. - */ -public class ChunkManagerImpl implements ChunkManager { - static final Logger LOG = - LoggerFactory.getLogger(ChunkManagerImpl.class); - - private final ContainerManager containerManager; - - /** - * Constructs a ChunkManager. - * - * @param manager - ContainerManager. - */ - public ChunkManagerImpl(ContainerManager manager) { - this.containerManager = manager; - } - - /** - * writes a given chunk. - * - * @param blockID - ID of the block. - * @param info - ChunkInfo. - * @throws StorageContainerException - */ - @Override - public void writeChunk(BlockID blockID, ChunkInfo info, - byte[] data, ContainerProtos.Stage stage) - throws StorageContainerException { - // we don't want container manager to go away while we are writing chunks. - containerManager.readLock(); - - // TODO : Take keyManager Write lock here. - try { - Preconditions.checkNotNull(blockID, "Block ID cannot be null."); - long containerID = blockID.getContainerID(); - Preconditions.checkState(containerID >= 0, - "Container ID cannot be negative"); - ContainerData container = - containerManager.readContainer(containerID); - File chunkFile = ChunkUtils.validateChunk(container, info); - File tmpChunkFile = getTmpChunkFile(chunkFile, info); - - LOG.debug("writing chunk:{} chunk stage:{} chunk file:{} tmp chunk file", - info.getChunkName(), stage, chunkFile, tmpChunkFile); - switch (stage) { - case WRITE_DATA: - ChunkUtils.writeData(tmpChunkFile, info, data); - break; - case COMMIT_DATA: - commitChunk(tmpChunkFile, chunkFile, containerID, info.getLen()); - break; - case COMBINED: - // directly write to the chunk file - long oldSize = chunkFile.length(); - ChunkUtils.writeData(chunkFile, info, data); - long newSize = chunkFile.length(); - containerManager.incrBytesUsed(containerID, newSize - oldSize); - containerManager.incrWriteCount(containerID); - containerManager.incrWriteBytes(containerID, info.getLen()); - break; - default: - throw new IOException("Can not identify write operation."); - } - } catch (ExecutionException | NoSuchAlgorithmException | IOException e) { - LOG.error("write data failed. error: {}", e); - throw new StorageContainerException("Internal error: ", e, - CONTAINER_INTERNAL_ERROR); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.error("write data failed. error: {}", e); - throw new StorageContainerException("Internal error: ", e, - CONTAINER_INTERNAL_ERROR); - } finally { - containerManager.readUnlock(); - } - } - - // Create a temporary file in the same container directory - // in the format ".tmp" - private static File getTmpChunkFile(File chunkFile, ChunkInfo info) - throws StorageContainerException { - return new File(chunkFile.getParent(), - chunkFile.getName() + - OzoneConsts.CONTAINER_CHUNK_NAME_DELIMITER + - OzoneConsts.CONTAINER_TEMPORARY_CHUNK_PREFIX); - } - - // Commit the chunk by renaming the temporary chunk file to chunk file - private void commitChunk(File tmpChunkFile, File chunkFile, - long containerID, long chunkLen) throws IOException { - long sizeDiff = tmpChunkFile.length() - chunkFile.length(); - // It is safe to replace here as the earlier chunk if existing should be - // caught as part of validateChunk - Files.move(tmpChunkFile.toPath(), chunkFile.toPath(), - StandardCopyOption.REPLACE_EXISTING); - containerManager.incrBytesUsed(containerID, sizeDiff); - containerManager.incrWriteCount(containerID); - containerManager.incrWriteBytes(containerID, chunkLen); - } - - /** - * reads the data defined by a chunk. - * - * @param blockID - ID of the block. - * @param info - ChunkInfo. - * @return byte array - * @throws StorageContainerException - * TODO: Right now we do not support partial reads and writes of chunks. - * TODO: Explore if we need to do that for ozone. - */ - @Override - public byte[] readChunk(BlockID blockID, ChunkInfo info) - throws StorageContainerException { - containerManager.readLock(); - try { - Preconditions.checkNotNull(blockID, "Block ID cannot be null."); - long containerID = blockID.getContainerID(); - Preconditions.checkState(containerID >= 0, - "Container ID cannot be negative"); - ContainerData container = - containerManager.readContainer(containerID); - File chunkFile = ChunkUtils.getChunkFile(container, info); - ByteBuffer data = ChunkUtils.readData(chunkFile, info); - containerManager.incrReadCount(containerID); - containerManager.incrReadBytes(containerID, chunkFile.length()); - return data.array(); - } catch (ExecutionException | NoSuchAlgorithmException e) { - LOG.error("read data failed. error: {}", e); - throw new StorageContainerException("Internal error: ", - e, CONTAINER_INTERNAL_ERROR); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.error("read data failed. error: {}", e); - throw new StorageContainerException("Internal error: ", - e, CONTAINER_INTERNAL_ERROR); - } finally { - containerManager.readUnlock(); - } - } - - /** - * Deletes a given chunk. - * - * @param blockID - ID of the block. - * @param info - Chunk Info - * @throws StorageContainerException - */ - @Override - public void deleteChunk(BlockID blockID, ChunkInfo info) - throws StorageContainerException { - containerManager.readLock(); - try { - Preconditions.checkNotNull(blockID, "Block ID cannot be null."); - long containerID = blockID.getContainerID(); - Preconditions.checkState(containerID >= 0, - "Container ID cannot be negative"); - - File chunkFile = ChunkUtils.getChunkFile(containerManager - .readContainer(containerID), info); - if ((info.getOffset() == 0) && (info.getLen() == chunkFile.length())) { - FileUtil.fullyDelete(chunkFile); - containerManager.decrBytesUsed(containerID, chunkFile.length()); - } else { - LOG.error("Not Supported Operation. Trying to delete a " + - "chunk that is in shared file. chunk info : " + info.toString()); - throw new StorageContainerException("Not Supported Operation. " + - "Trying to delete a chunk that is in shared file. chunk info : " - + info.toString(), UNSUPPORTED_REQUEST); - } - } finally { - containerManager.readUnlock(); - } - } - - /** - * Shutdown the chunkManager. - * - * In the chunkManager we haven't acquired any resources, so nothing to do - * here. This call is made with containerManager Writelock held. - */ - @Override - public void shutdown() { - Preconditions.checkState(this.containerManager.hasWriteLock()); - } -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java new file mode 100644 index 00000000000..efea20bdc4b --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java @@ -0,0 +1,435 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.common.impl; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import java.io.IOException; +import java.nio.charset.Charset; +import java.util.List; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos. + ContainerType; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos. + ContainerLifeCycleState; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; + +import java.util.Collections; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicLong; +import org.yaml.snakeyaml.Yaml; + +import static org.apache.hadoop.ozone.OzoneConsts.CHECKSUM; +import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_ID; +import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_TYPE; +import static org.apache.hadoop.ozone.OzoneConsts.LAYOUTVERSION; +import static org.apache.hadoop.ozone.OzoneConsts.MAX_SIZE; +import static org.apache.hadoop.ozone.OzoneConsts.METADATA; +import static org.apache.hadoop.ozone.OzoneConsts.STATE; + +/** + * ContainerData is the in-memory representation of container metadata and is + * represented on disk by the .container file. + */ +public abstract class ContainerData { + + //Type of the container. + // For now, we support only KeyValueContainer. + private final ContainerType containerType; + + // Unique identifier for the container + private final long containerID; + + // Layout version of the container data + private final int layOutVersion; + + // Metadata of the container will be a key value pair. + // This can hold information like volume name, owner etc., + private final Map metadata; + + // State of the Container + private ContainerLifeCycleState state; + + private final long maxSize; + + /** parameters for read/write statistics on the container. **/ + private final AtomicLong readBytes; + private final AtomicLong writeBytes; + private final AtomicLong readCount; + private final AtomicLong writeCount; + private final AtomicLong bytesUsed; + private final AtomicLong keyCount; + + private HddsVolume volume; + + private String checksum; + public static final Charset CHARSET_ENCODING = Charset.forName("UTF-8"); + private static final String DUMMY_CHECKSUM = new String(new byte[64], + CHARSET_ENCODING); + + // Common Fields need to be stored in .container file. + protected static final List YAML_FIELDS = + Collections.unmodifiableList(Lists.newArrayList( + CONTAINER_TYPE, + CONTAINER_ID, + LAYOUTVERSION, + STATE, + METADATA, + MAX_SIZE, + CHECKSUM)); + + /** + * Creates a ContainerData Object, which holds metadata of the container. + * @param type - ContainerType + * @param containerId - ContainerId + * @param size - container maximum size in bytes + */ + protected ContainerData(ContainerType type, long containerId, long size) { + this(type, containerId, + ChunkLayOutVersion.getLatestVersion().getVersion(), size); + } + + /** + * Creates a ContainerData Object, which holds metadata of the container. + * @param type - ContainerType + * @param containerId - ContainerId + * @param layOutVersion - Container layOutVersion + * @param size - Container maximum size in bytes + */ + protected ContainerData(ContainerType type, long containerId, + int layOutVersion, long size) { + Preconditions.checkNotNull(type); + + this.containerType = type; + this.containerID = containerId; + this.layOutVersion = layOutVersion; + this.metadata = new TreeMap<>(); + this.state = ContainerLifeCycleState.OPEN; + this.readCount = new AtomicLong(0L); + this.readBytes = new AtomicLong(0L); + this.writeCount = new AtomicLong(0L); + this.writeBytes = new AtomicLong(0L); + this.bytesUsed = new AtomicLong(0L); + this.keyCount = new AtomicLong(0L); + this.maxSize = size; + setChecksumTo0ByteArray(); + } + + /** + * Returns the containerID. + */ + public long getContainerID() { + return containerID; + } + + /** + * Returns the path to base dir of the container. + * @return Path to base dir. + */ + public abstract String getContainerPath(); + + /** + * Returns the type of the container. + * @return ContainerType + */ + public ContainerType getContainerType() { + return containerType; + } + + + /** + * Returns the state of the container. + * @return ContainerLifeCycleState + */ + public synchronized ContainerLifeCycleState getState() { + return state; + } + + /** + * Set the state of the container. + * @param state + */ + public synchronized void setState(ContainerLifeCycleState state) { + this.state = state; + } + + /** + * Return's maximum size of the container in bytes. + * @return maxSize in bytes + */ + public long getMaxSize() { + return maxSize; + } + + /** + * Returns the layOutVersion of the actual container data format. + * @return layOutVersion + */ + public int getLayOutVersion() { + return ChunkLayOutVersion.getChunkLayOutVersion(layOutVersion).getVersion(); + } + + /** + * Add/Update metadata. + * We should hold the container lock before updating the metadata as this + * will be persisted on disk. Unless, we are reconstructing ContainerData + * from protoBuf or from on disk .container file in which case lock is not + * required. + */ + public void addMetadata(String key, String value) { + metadata.put(key, value); + } + + /** + * Retuns metadata of the container. + * @return metadata + */ + public Map getMetadata() { + return Collections.unmodifiableMap(this.metadata); + } + + /** + * Set metadata. + * We should hold the container lock before updating the metadata as this + * will be persisted on disk. Unless, we are reconstructing ContainerData + * from protoBuf or from on disk .container file in which case lock is not + * required. + */ + public void setMetadata(Map metadataMap) { + metadata.clear(); + metadata.putAll(metadataMap); + } + + /** + * checks if the container is open. + * @return - boolean + */ + public synchronized boolean isOpen() { + return ContainerLifeCycleState.OPEN == state; + } + + /** + * checks if the container is invalid. + * @return - boolean + */ + public synchronized boolean isValid() { + return !(ContainerLifeCycleState.INVALID == state); + } + + /** + * checks if the container is closed. + * @return - boolean + */ + public synchronized boolean isClosed() { + return ContainerLifeCycleState.CLOSED == state; + } + + /** + * Marks this container as closed. + */ + public synchronized void closeContainer() { + setState(ContainerLifeCycleState.CLOSED); + } + + /** + * Get the number of bytes read from the container. + * @return the number of bytes read from the container. + */ + public long getReadBytes() { + return readBytes.get(); + } + + /** + * Increase the number of bytes read from the container. + * @param bytes number of bytes read. + */ + public void incrReadBytes(long bytes) { + this.readBytes.addAndGet(bytes); + } + + /** + * Get the number of times the container is read. + * @return the number of times the container is read. + */ + public long getReadCount() { + return readCount.get(); + } + + /** + * Increase the number of container read count by 1. + */ + public void incrReadCount() { + this.readCount.incrementAndGet(); + } + + /** + * Get the number of bytes write into the container. + * @return the number of bytes write into the container. + */ + public long getWriteBytes() { + return writeBytes.get(); + } + + /** + * Increase the number of bytes write into the container. + * @param bytes the number of bytes write into the container. + */ + public void incrWriteBytes(long bytes) { + this.writeBytes.addAndGet(bytes); + } + + /** + * Get the number of writes into the container. + * @return the number of writes into the container. + */ + public long getWriteCount() { + return writeCount.get(); + } + + /** + * Increase the number of writes into the container by 1. + */ + public void incrWriteCount() { + this.writeCount.incrementAndGet(); + } + + /** + * Sets the number of bytes used by the container. + * @param used + */ + public void setBytesUsed(long used) { + this.bytesUsed.set(used); + } + + /** + * Get the number of bytes used by the container. + * @return the number of bytes used by the container. + */ + public long getBytesUsed() { + return bytesUsed.get(); + } + + /** + * Increase the number of bytes used by the container. + * @param used number of bytes used by the container. + * @return the current number of bytes used by the container afert increase. + */ + public long incrBytesUsed(long used) { + return this.bytesUsed.addAndGet(used); + } + + /** + * Decrease the number of bytes used by the container. + * @param reclaimed the number of bytes reclaimed from the container. + * @return the current number of bytes used by the container after decrease. + */ + public long decrBytesUsed(long reclaimed) { + return this.bytesUsed.addAndGet(-1L * reclaimed); + } + + /** + * Set the Volume for the Container. + * This should be called only from the createContainer. + * @param hddsVolume + */ + public void setVolume(HddsVolume hddsVolume) { + this.volume = hddsVolume; + } + + /** + * Returns the volume of the Container. + * @return HddsVolume + */ + public HddsVolume getVolume() { + return volume; + } + + /** + * Increments the number of keys in the container. + */ + public void incrKeyCount() { + this.keyCount.incrementAndGet(); + } + + /** + * Decrements number of keys in the container. + */ + public void decrKeyCount() { + this.keyCount.decrementAndGet(); + } + + /** + * Returns number of keys in the container. + * @return key count + */ + public long getKeyCount() { + return this.keyCount.get(); + } + + /** + * Set's number of keys in the container. + * @param count + */ + public void setKeyCount(long count) { + this.keyCount.set(count); + } + + public void setChecksumTo0ByteArray() { + this.checksum = DUMMY_CHECKSUM; + } + + public void setChecksum(String checkSum) { + this.checksum = checkSum; + } + + public String getChecksum() { + return this.checksum; + } + + /** + * Compute the checksum for ContainerData using the specified Yaml (based + * on ContainerType) and set the checksum. + * + * Checksum of ContainerData is calculated by setting the + * {@link ContainerData#checksum} field to a 64-byte array with all 0's - + * {@link ContainerData#DUMMY_CHECKSUM}. After the checksum is calculated, + * the checksum field is updated with this value. + * + * @param yaml Yaml for ContainerType to get the ContainerData as Yaml String + * @throws IOException + */ + public void computeAndSetChecksum(Yaml yaml) throws IOException { + // Set checksum to dummy value - 0 byte array, to calculate the checksum + // of rest of the data. + setChecksumTo0ByteArray(); + + // Dump yaml data into a string to compute its checksum + String containerDataYamlStr = yaml.dump(this); + + this.checksum = ContainerUtils.getChecksum(containerDataYamlStr); + } + + /** + * Returns a ProtoBuf Message from ContainerData. + * + * @return Protocol Buffer Message + */ + public abstract ContainerProtos.ContainerData getProtoBufMessage(); +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java new file mode 100644 index 00000000000..65262d4b738 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java @@ -0,0 +1,326 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.impl; + +import java.beans.IntrospectionException; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerType; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; + +import com.google.common.base.Preconditions; +import static org.apache.hadoop.ozone.container.keyvalue + .KeyValueContainerData.KEYVALUE_YAML_TAG; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.yaml.snakeyaml.Yaml; +import org.yaml.snakeyaml.constructor.AbstractConstruct; +import org.yaml.snakeyaml.constructor.Constructor; +import org.yaml.snakeyaml.introspector.BeanAccess; +import org.yaml.snakeyaml.introspector.Property; +import org.yaml.snakeyaml.introspector.PropertyUtils; +import org.yaml.snakeyaml.nodes.MappingNode; +import org.yaml.snakeyaml.nodes.Node; +import org.yaml.snakeyaml.nodes.ScalarNode; +import org.yaml.snakeyaml.nodes.Tag; +import org.yaml.snakeyaml.representer.Representer; + +/** + * Class for creating and reading .container files. + */ + +public final class ContainerDataYaml { + + private static final Logger LOG = + LoggerFactory.getLogger(ContainerDataYaml.class); + + private ContainerDataYaml() { + + } + + /** + * Creates a .container file in yaml format. + * + * @param containerFile + * @param containerData + * @throws IOException + */ + public static void createContainerFile(ContainerType containerType, + ContainerData containerData, File containerFile) throws IOException { + Writer writer = null; + try { + // Create Yaml for given container type + Yaml yaml = getYamlForContainerType(containerType); + // Compute Checksum and update ContainerData + containerData.computeAndSetChecksum(yaml); + + // Write the ContainerData with checksum to Yaml file. + writer = new OutputStreamWriter(new FileOutputStream( + containerFile), "UTF-8"); + yaml.dump(containerData, writer); + + } finally { + try { + if (writer != null) { + writer.close(); + } + } catch (IOException ex) { + LOG.warn("Error occurred during closing the writer. ContainerID: " + + containerData.getContainerID()); + } + } + } + + /** + * Read the yaml file, and return containerData. + * + * @throws IOException + */ + public static ContainerData readContainerFile(File containerFile) + throws IOException { + Preconditions.checkNotNull(containerFile, "containerFile cannot be null"); + try (FileInputStream inputFileStream = new FileInputStream(containerFile)) { + return readContainer(inputFileStream); + } + + } + + /** + * Read the yaml file content, and return containerData. + * + * @throws IOException + */ + public static ContainerData readContainer(byte[] containerFileContent) + throws IOException { + return readContainer( + new ByteArrayInputStream(containerFileContent)); + } + + /** + * Read the yaml content, and return containerData. + * + * @throws IOException + */ + public static ContainerData readContainer(InputStream input) + throws IOException { + + ContainerData containerData; + PropertyUtils propertyUtils = new PropertyUtils(); + propertyUtils.setBeanAccess(BeanAccess.FIELD); + propertyUtils.setAllowReadOnlyProperties(true); + + Representer representer = new ContainerDataRepresenter(); + representer.setPropertyUtils(propertyUtils); + + Constructor containerDataConstructor = new ContainerDataConstructor(); + + Yaml yaml = new Yaml(containerDataConstructor, representer); + yaml.setBeanAccess(BeanAccess.FIELD); + + containerData = (ContainerData) + yaml.load(input); + + return containerData; + } + + /** + * Given a ContainerType this method returns a Yaml representation of + * the container properties. + * + * @param containerType type of container + * @return Yamal representation of container properties + * + * @throws StorageContainerException if the type is unrecognized + */ + public static Yaml getYamlForContainerType(ContainerType containerType) + throws StorageContainerException { + PropertyUtils propertyUtils = new PropertyUtils(); + propertyUtils.setBeanAccess(BeanAccess.FIELD); + propertyUtils.setAllowReadOnlyProperties(true); + + switch (containerType) { + case KeyValueContainer: + Representer representer = new ContainerDataRepresenter(); + representer.setPropertyUtils(propertyUtils); + representer.addClassTag( + KeyValueContainerData.class, + KeyValueContainerData.KEYVALUE_YAML_TAG); + + Constructor keyValueDataConstructor = new ContainerDataConstructor(); + + return new Yaml(keyValueDataConstructor, representer); + default: + throw new StorageContainerException("Unrecognized container Type " + + "format " + containerType, ContainerProtos.Result + .UNKNOWN_CONTAINER_TYPE); + } + } + + /** + * Representer class to define which fields need to be stored in yaml file. + */ + private static class ContainerDataRepresenter extends Representer { + @Override + protected Set getProperties(Class type) + throws IntrospectionException { + Set set = super.getProperties(type); + Set filtered = new TreeSet(); + + // When a new Container type is added, we need to add what fields need + // to be filtered here + if (type.equals(KeyValueContainerData.class)) { + List yamlFields = KeyValueContainerData.getYamlFields(); + // filter properties + for (Property prop : set) { + String name = prop.getName(); + if (yamlFields.contains(name)) { + filtered.add(prop); + } + } + } + return filtered; + } + } + + /** + * Constructor class for KeyValueData, which will be used by Yaml. + */ + private static class ContainerDataConstructor extends Constructor { + ContainerDataConstructor() { + //Adding our own specific constructors for tags. + // When a new Container type is added, we need to add yamlConstructor + // for that + this.yamlConstructors.put( + KEYVALUE_YAML_TAG, new ConstructKeyValueContainerData()); + this.yamlConstructors.put(Tag.INT, new ConstructLong()); + } + + private class ConstructKeyValueContainerData extends AbstractConstruct { + public Object construct(Node node) { + MappingNode mnode = (MappingNode) node; + Map nodes = constructMapping(mnode); + + //Needed this, as TAG.INT type is by default converted to Long. + long layOutVersion = (long) nodes.get(OzoneConsts.LAYOUTVERSION); + int lv = (int) layOutVersion; + + long size = (long) nodes.get(OzoneConsts.MAX_SIZE); + + //When a new field is added, it needs to be added here. + KeyValueContainerData kvData = new KeyValueContainerData( + (long) nodes.get(OzoneConsts.CONTAINER_ID), lv, size); + + kvData.setContainerDBType((String)nodes.get( + OzoneConsts.CONTAINER_DB_TYPE)); + kvData.setMetadataPath((String) nodes.get( + OzoneConsts.METADATA_PATH)); + kvData.setChunksPath((String) nodes.get(OzoneConsts.CHUNKS_PATH)); + Map meta = (Map) nodes.get(OzoneConsts.METADATA); + kvData.setMetadata(meta); + kvData.setChecksum((String) nodes.get(OzoneConsts.CHECKSUM)); + String state = (String) nodes.get(OzoneConsts.STATE); + switch (state) { + case "OPEN": + kvData.setState(ContainerProtos.ContainerLifeCycleState.OPEN); + break; + case "CLOSING": + kvData.setState(ContainerProtos.ContainerLifeCycleState.CLOSING); + break; + case "CLOSED": + kvData.setState(ContainerProtos.ContainerLifeCycleState.CLOSED); + break; + default: + throw new IllegalStateException("Unexpected " + + "ContainerLifeCycleState " + state + " for the containerId " + + nodes.get(OzoneConsts.CONTAINER_ID)); + } + return kvData; + } + } + + //Below code is taken from snake yaml, as snakeyaml tries to fit the + // number if it fits in integer, otherwise returns long. So, slightly + // modified the code to return long in all cases. + private class ConstructLong extends AbstractConstruct { + public Object construct(Node node) { + String value = constructScalar((ScalarNode) node).toString() + .replaceAll("_", ""); + int sign = +1; + char first = value.charAt(0); + if (first == '-') { + sign = -1; + value = value.substring(1); + } else if (first == '+') { + value = value.substring(1); + } + int base = 10; + if ("0".equals(value)) { + return Long.valueOf(0); + } else if (value.startsWith("0b")) { + value = value.substring(2); + base = 2; + } else if (value.startsWith("0x")) { + value = value.substring(2); + base = 16; + } else if (value.startsWith("0")) { + value = value.substring(1); + base = 8; + } else if (value.indexOf(':') != -1) { + String[] digits = value.split(":"); + int bes = 1; + int val = 0; + for (int i = 0, j = digits.length; i < j; i++) { + val += (Long.parseLong(digits[(j - i) - 1]) * bes); + bes *= 60; + } + return createNumber(sign, String.valueOf(val), 10); + } else { + return createNumber(sign, value, 10); + } + return createNumber(sign, value, base); + } + } + + private Number createNumber(int sign, String number, int radix) { + Number result; + if (sign < 0) { + number = "-" + number; + } + result = Long.valueOf(number, radix); + return result; + } + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerLocationManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerLocationManagerImpl.java deleted file mode 100644 index 5f5b81f4808..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerLocationManagerImpl.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.impl; - -import com.google.common.base.Preconditions; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.server.datanode.StorageLocation; -import org.apache.hadoop.metrics2.util.MBeans; -import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.container.common.interfaces - .ContainerLocationManager; -import org.apache.hadoop.ozone.container.common.interfaces - .ContainerLocationManagerMXBean; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.management.ObjectName; -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.LinkedList; -import java.util.List; - -/** - * A class that tells the ContainerManager where to place the containers. - * Please note : There is *no* one-to-one correlation between metadata - * Locations and data Locations. - * - * For example : A user could map all container files to a - * SSD but leave data/metadata on bunch of other disks. - */ -public class ContainerLocationManagerImpl implements ContainerLocationManager, - ContainerLocationManagerMXBean { - private static final Logger LOG = - LoggerFactory.getLogger(ContainerLocationManagerImpl.class); - - private final List dataLocations; - private int currentIndex; - private final List metadataLocations; - private final ObjectName jmxbean; - - /** - * Constructs a Location Manager. - * @param metadataLocations - Refers to the metadataLocations - * where we store the container metadata. - * @param dataDirs - metadataLocations where we store the actual - * data or chunk files. - * @param conf - configuration. - * @throws IOException - */ - public ContainerLocationManagerImpl(List metadataLocations, - List dataDirs, Configuration conf) - throws IOException { - dataLocations = new LinkedList<>(); - for (StorageLocation dataDir : dataDirs) { - dataLocations.add(new ContainerStorageLocation(dataDir, conf)); - } - this.metadataLocations = metadataLocations; - jmxbean = MBeans.register("OzoneDataNode", - ContainerLocationManager.class.getSimpleName(), this); - } - - /** - * Returns the path where the container should be placed from a set of - * metadataLocations. - * - * @return A path where we should place this container and metadata. - * @throws IOException - */ - @Override - public Path getContainerPath() - throws IOException { - Preconditions.checkState(metadataLocations.size() > 0); - int index = currentIndex % metadataLocations.size(); - return Paths.get(metadataLocations.get(index).getNormalizedUri()); - } - - /** - * Returns the path where the container Data file are stored. - * - * @return a path where we place the LevelDB and data files of a container. - * @throws IOException - */ - @Override - public Path getDataPath(String containerName) throws IOException { - Path currentPath = Paths.get( - dataLocations.get(currentIndex++ % dataLocations.size()) - .getNormalizedUri()); - currentPath = currentPath.resolve(OzoneConsts.CONTAINER_PREFIX); - return currentPath.resolve(containerName); - } - - @Override - public StorageLocationReport[] getLocationReport() throws IOException { - boolean failed; - StorageLocationReport[] reports = - new StorageLocationReport[dataLocations.size()]; - for (int idx = 0; idx < dataLocations.size(); idx++) { - ContainerStorageLocation loc = dataLocations.get(idx); - long scmUsed = 0; - long remaining = 0; - failed = false; - try { - scmUsed = loc.getScmUsed(); - remaining = loc.getAvailable(); - } catch (IOException ex) { - LOG.warn("Failed to get scmUsed and remaining for container " + - "storage location {}", loc.getNormalizedUri()); - // reset scmUsed and remaining if df/du failed. - scmUsed = 0; - remaining = 0; - failed = true; - } - - StorageLocationReport.Builder builder = - StorageLocationReport.newBuilder(); - builder.setStorageLocation(loc.getStorageLocation()) - .setId(loc.getStorageUuId()) - .setFailed(failed) - .setCapacity(loc.getCapacity()) - .setRemaining(remaining) - .setScmUsed(scmUsed) - .setStorageType(loc.getStorageType()); - StorageLocationReport r = builder.build(); - reports[idx] = r; - } - return reports; - } - - /** - * Supports clean shutdown of container location du threads. - * - * @throws IOException - */ - @Override - public void shutdown() throws IOException { - for (ContainerStorageLocation loc: dataLocations) { - loc.shutdown(); - } - MBeans.unregister(jmxbean); - } -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerManagerImpl.java deleted file mode 100644 index e81f1c6c725..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerManagerImpl.java +++ /dev/null @@ -1,1120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.impl; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.primitives.Longs; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.common.helpers - .StorageContainerException; -import org.apache.hadoop.hdfs.DFSUtil; -import org.apache.hadoop.hdfs.server.datanode.StorageLocation; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerLifeCycleState; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.NodeReportProto; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.StorageReportProto; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; -import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; -import org.apache.hadoop.ozone.container.common.helpers.KeyData; -import org.apache.hadoop.ozone.container.common.helpers.KeyUtils; -import org.apache.hadoop.ozone.container.common.interfaces.ChunkManager; -import org.apache.hadoop.ozone.container.common.interfaces - .ContainerDeletionChoosingPolicy; -import org.apache.hadoop.ozone.container.common.interfaces - .ContainerLocationManager; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerManager; -import org.apache.hadoop.ozone.container.common.interfaces.KeyManager; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hadoop.utils.MetadataKeyFilters; -import org.apache.hadoop.utils.MetadataStore; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.FilenameFilter; -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.security.DigestInputStream; -import java.security.DigestOutputStream; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentNavigableMap; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.locks.ReentrantReadWriteLock; -import java.util.stream.Collectors; - -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.CONTAINER_EXISTS; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.CONTAINER_INTERNAL_ERROR; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.CONTAINER_NOT_FOUND; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.ERROR_IN_COMPACT_DB; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.INVALID_CONFIG; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.IO_EXCEPTION; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.NO_SUCH_ALGORITHM; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.UNABLE_TO_READ_METADATA_DB; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.UNCLOSED_CONTAINER_IO; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.UNSUPPORTED_REQUEST; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos. - Result.INVALID_CONTAINER_STATE; -import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_EXTENSION; - -/** - * A Generic ContainerManagerImpl that will be called from Ozone - * ContainerManagerImpl. This allows us to support delta changes to ozone - * version without having to rewrite the containerManager. - */ -public class ContainerManagerImpl implements ContainerManager { - static final Logger LOG = - LoggerFactory.getLogger(ContainerManagerImpl.class); - - // TODO: consider primitive collection like eclipse-collections - // to avoid autoboxing overhead - private final ConcurrentSkipListMap - containerMap = new ConcurrentSkipListMap<>(); - - // Use a non-fair RW lock for better throughput, we may revisit this decision - // if this causes fairness issues. - private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); - private ContainerLocationManager locationManager; - private ChunkManager chunkManager; - private KeyManager keyManager; - private Configuration conf; - - private ContainerDeletionChoosingPolicy containerDeletionChooser; - - /** - * Init call that sets up a container Manager. - * - * @param config - Configuration. - * @param containerDirs - List of Metadata Container locations. - * @param dnDetails - DatanodeDetails. - * @throws IOException - */ - @Override - public void init( - Configuration config, List containerDirs, - DatanodeDetails dnDetails) throws IOException { - Preconditions.checkNotNull(config, "Config must not be null"); - Preconditions.checkNotNull(containerDirs, "Container directories cannot " + - "be null"); - Preconditions.checkNotNull(dnDetails, "Datanode Details cannot " + - "be null"); - - Preconditions.checkState(containerDirs.size() > 0, "Number of container" + - " directories must be greater than zero."); - - this.conf = config; - - readLock(); - try { - containerDeletionChooser = ReflectionUtils.newInstance(conf.getClass( - ScmConfigKeys.OZONE_SCM_CONTAINER_DELETION_CHOOSING_POLICY, - TopNOrderedContainerDeletionChoosingPolicy.class, - ContainerDeletionChoosingPolicy.class), conf); - - for (StorageLocation path : containerDirs) { - File directory = Paths.get(path.getNormalizedUri()).toFile(); - if (!directory.exists() && !directory.mkdirs()) { - LOG.error("Container metadata directory doesn't exist " - + "and cannot be created. Path: {}", path.toString()); - throw new StorageContainerException("Container metadata " - + "directory doesn't exist and cannot be created " + path - .toString(), INVALID_CONFIG); - } - - // TODO: This will fail if any directory is invalid. - // We should fix this to handle invalid directories and continue. - // Leaving it this way to fail fast for time being. - if (!directory.isDirectory()) { - LOG.error("Invalid path to container metadata directory. path: {}", - path.toString()); - throw new StorageContainerException("Invalid path to container " + - "metadata directory." + path, INVALID_CONFIG); - } - LOG.info("Loading containers under {}", path); - File[] files = directory.listFiles(new ContainerFilter()); - if (files != null) { - for (File containerFile : files) { - LOG.debug("Loading container {}", containerFile); - String containerPath = - ContainerUtils.getContainerNameFromFile(containerFile); - Preconditions.checkNotNull(containerPath, "Container path cannot" + - " be null"); - readContainerInfo(containerPath); - } - } - } - - List dataDirs = new LinkedList<>(); - for (String dir : config.getStrings(DFS_DATANODE_DATA_DIR_KEY)) { - StorageLocation location = StorageLocation.parse(dir); - dataDirs.add(location); - } - this.locationManager = - new ContainerLocationManagerImpl(containerDirs, dataDirs, config); - } finally { - readUnlock(); - } - } - - /** - * Reads the Container Info from a file and verifies that checksum match. If - * the checksums match, then that file is added to containerMap. - * - * @param containerName - Name which points to the persisted container. - * @throws StorageContainerException - */ - private void readContainerInfo(String containerName) - throws StorageContainerException { - Preconditions.checkState(containerName.length() > 0, - "Container name length cannot be zero."); - FileInputStream containerStream = null; - DigestInputStream dis = null; - FileInputStream metaStream = null; - Path cPath = Paths.get(containerName).getFileName(); - String keyName = null; - if (cPath != null) { - keyName = cPath.toString(); - } - Preconditions.checkNotNull(keyName, - "Container Name to container key mapping is null"); - - long containerID = Long.parseLong(keyName); - try { - String containerFileName = containerName.concat(CONTAINER_EXTENSION); - - containerStream = new FileInputStream(containerFileName); - - ContainerProtos.ContainerData containerDataProto = - ContainerProtos.ContainerData.parseDelimitedFrom(containerStream); - ContainerData containerData; - if (containerDataProto == null) { - // Sometimes container metadata might have been created but empty, - // when loading the info we get a null, this often means last time - // SCM was ending up at some middle phase causing that the metadata - // was not populated. Such containers are marked as inactive. - ContainerData cData = new ContainerData(containerID, conf, - ContainerLifeCycleState.INVALID); - containerMap.put(containerID, cData); - return; - } - containerData = ContainerData.getFromProtBuf(containerDataProto, conf); - - // Initialize pending deletion blocks and deleted blocks count in - // in-memory containerData. - MetadataStore metadata = KeyUtils.getDB(containerData, conf); - List> underDeletionBlocks = metadata - .getSequentialRangeKVs(null, Integer.MAX_VALUE, - MetadataKeyFilters.getDeletingKeyFilter()); - byte[] transactionID = metadata.get(DFSUtil.string2Bytes( - OzoneConsts.DELETE_TRANSACTION_KEY_PREFIX + containerID)); - if (transactionID != null) { - containerData - .updateDeleteTransactionId(Longs.fromByteArray(transactionID)); - } - containerData.incrPendingDeletionBlocks(underDeletionBlocks.size()); - - List> liveKeys = metadata - .getRangeKVs(null, Integer.MAX_VALUE, - MetadataKeyFilters.getNormalKeyFilter()); - - // Get container bytesUsed upon loading container - // The in-memory state is updated upon key write or delete - // TODO: update containerDataProto and persist it into container MetaFile - long bytesUsed = 0; - bytesUsed = liveKeys.parallelStream().mapToLong(e-> { - KeyData keyData; - try { - keyData = KeyUtils.getKeyData(e.getValue()); - return keyData.getSize(); - } catch (IOException ex) { - return 0L; - } - }).sum(); - containerData.setBytesUsed(bytesUsed); - - containerMap.put(containerID, containerData); - } catch (IOException ex) { - LOG.error("read failed for file: {} ex: {}", containerName, - ex.getMessage()); - - // TODO : Add this file to a recovery Queue. - - // Remember that this container is busted and we cannot use it. - ContainerData cData = new ContainerData(containerID, conf, - ContainerLifeCycleState.INVALID); - containerMap.put(containerID, cData); - throw new StorageContainerException("Unable to read container info", - UNABLE_TO_READ_METADATA_DB); - } finally { - IOUtils.closeStream(dis); - IOUtils.closeStream(containerStream); - IOUtils.closeStream(metaStream); - } - } - - /** - * Creates a container with the given name. - * - * @param containerData - Container Name and metadata. - * @throws StorageContainerException - Exception - */ - @Override - public void createContainer(ContainerData containerData) - throws StorageContainerException { - Preconditions.checkNotNull(containerData, "Container data cannot be null"); - writeLock(); - try { - if (containerMap.containsKey(containerData.getContainerID())) { - LOG.debug("container already exists. {}", - containerData.getContainerID()); - throw new StorageContainerException("container already exists.", - CONTAINER_EXISTS); - } - - // This is by design. We first write and close the - // container Info and metadata to a directory. - // Then read back and put that info into the containerMap. - // This allows us to make sure that our write is consistent. - - writeContainerInfo(containerData, false); - File cFile = new File(containerData.getContainerPath()); - readContainerInfo(ContainerUtils.getContainerNameFromFile(cFile)); - } catch (NoSuchAlgorithmException ex) { - LOG.error("Internal error: We seem to be running a JVM without a " + - "needed hash algorithm."); - throw new StorageContainerException("failed to create container", - NO_SUCH_ALGORITHM); - } finally { - writeUnlock(); - } - - } - - /** - * Writes a container to a chosen location and updates the container Map. - * - * The file formats of ContainerData and Container Meta is the following. - * - * message ContainerData { - * required string name = 1; - * repeated KeyValue metadata = 2; - * optional string dbPath = 3; - * optional string containerPath = 4; - * optional int64 bytesUsed = 5; - * optional int64 size = 6; - * } - * - * message ContainerMeta { - * required string fileName = 1; - * required string hash = 2; - * } - * - * @param containerData - container Data - * @param overwrite - Whether we are overwriting. - * @throws StorageContainerException, NoSuchAlgorithmException - */ - private void writeContainerInfo(ContainerData containerData, - boolean overwrite) - throws StorageContainerException, NoSuchAlgorithmException { - - Preconditions.checkNotNull(this.locationManager, - "Internal error: location manager cannot be null"); - - FileOutputStream containerStream = null; - DigestOutputStream dos = null; - FileOutputStream metaStream = null; - - try { - Path metadataPath = null; - Path location = (!overwrite) ? locationManager.getContainerPath(): - Paths.get(containerData.getContainerPath()).getParent(); - if (location == null) { - throw new StorageContainerException( - "Failed to get container file path.", - CONTAINER_INTERNAL_ERROR); - } - - File containerFile = ContainerUtils.getContainerFile(containerData, - location); - String containerName = Long.toString(containerData.getContainerID()); - - if(!overwrite) { - ContainerUtils.verifyIsNewContainer(containerFile); - metadataPath = this.locationManager.getDataPath(containerName); - metadataPath = ContainerUtils.createMetadata(metadataPath, - containerName, conf); - } else { - metadataPath = ContainerUtils.getMetadataDirectory(containerData); - } - - containerStream = new FileOutputStream(containerFile); - - MessageDigest sha = MessageDigest.getInstance(OzoneConsts.FILE_HASH); - - dos = new DigestOutputStream(containerStream, sha); - containerData.setDBPath(metadataPath.resolve( - ContainerUtils.getContainerDbFileName(containerName)) - .toString()); - containerData.setContainerPath(containerFile.toString()); - - if(containerData.getContainerDBType() == null) { - String impl = conf.getTrimmed(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, - OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_DEFAULT); - containerData.setContainerDBType(impl); - } - - ContainerProtos.ContainerData protoData = containerData - .getProtoBufMessage(); - protoData.writeDelimitedTo(dos); - - } catch (IOException ex) { - // TODO : we need to clean up partially constructed files - // The proper way to do would be for a thread - // to read all these 3 artifacts and make sure they are - // sane. That info needs to come from the replication - // pipeline, and if not consistent delete these file. - - // In case of ozone this is *not* a deal breaker since - // SCM is guaranteed to generate unique container names. - // The saving grace is that we check if we have residue files - // lying around when creating a new container. We need to queue - // this information to a cleaner thread. - - LOG.error("Creation of container failed. Name: {}, we might need to " + - "cleanup partially created artifacts. ", - containerData.getContainerID(), ex); - throw new StorageContainerException("Container creation failed. ", - ex, CONTAINER_INTERNAL_ERROR); - } finally { - IOUtils.closeStream(dos); - IOUtils.closeStream(containerStream); - IOUtils.closeStream(metaStream); - } - } - - /** - * Deletes an existing container. - * - * @param containerID - ID of the container. - * @param forceDelete - whether this container should be deleted forcibly. - * @throws StorageContainerException - */ - @Override - public void deleteContainer(long containerID, - boolean forceDelete) throws StorageContainerException { - Preconditions.checkState(containerID >= 0, - "Container ID cannot be negative."); - writeLock(); - try { - if (isOpen(containerID)) { - throw new StorageContainerException( - "Deleting an open container is not allowed.", - UNCLOSED_CONTAINER_IO); - } - - ContainerData containerData = containerMap.get(containerID); - if (containerData == null) { - LOG.debug("No such container. ID: {}", containerID); - throw new StorageContainerException("No such container. ID : " + - containerID, CONTAINER_NOT_FOUND); - } - - if(!containerData.isValid()) { - LOG.debug("Invalid container data. ID: {}", containerID); - throw new StorageContainerException("Invalid container data. Name : " + - containerID, CONTAINER_NOT_FOUND); - } - ContainerUtils.removeContainer(containerData, conf, forceDelete); - containerMap.remove(containerID); - } catch (StorageContainerException e) { - throw e; - } catch (IOException e) { - // TODO : An I/O error during delete can leave partial artifacts on the - // disk. We will need the cleaner thread to cleanup this information. - String errMsg = String.format("Failed to cleanup container. ID: %d", - containerID); - LOG.error(errMsg, e); - throw new StorageContainerException(errMsg, e, IO_EXCEPTION); - } finally { - writeUnlock(); - } - } - - /** - * A simple interface for container Iterations. - *

- * This call make no guarantees about consistency of the data between - * different list calls. It just returns the best known data at that point of - * time. It is possible that using this iteration you can miss certain - * container from the listing. - * - * @param startContainerID - Return containers with ID >= startContainerID. - * @param count - how many to return - * @param data - Actual containerData - * @throws StorageContainerException - */ - @Override - public void listContainer(long startContainerID, long count, - List data) throws StorageContainerException { - Preconditions.checkNotNull(data, - "Internal assertion: data cannot be null"); - Preconditions.checkState(startContainerID >= 0, - "Start container ID cannot be negative"); - Preconditions.checkState(count > 0, - "max number of containers returned " + - "must be positive"); - - readLock(); - try { - ConcurrentNavigableMap map; - if (startContainerID == 0) { - map = containerMap.tailMap(containerMap.firstKey(), true); - } else { - map = containerMap.tailMap(startContainerID, false); - } - - int currentCount = 0; - for (ContainerData entry : map.values()) { - if (currentCount < count) { - data.add(entry); - currentCount++; - } else { - return; - } - } - } finally { - readUnlock(); - } - } - - /** - * Get metadata about a specific container. - * - * @param containerID - ID of the container - * @return ContainerData - Container Data. - * @throws StorageContainerException - */ - @Override - public ContainerData readContainer(long containerID) - throws StorageContainerException { - Preconditions.checkState(containerID >= 0, - "Container ID cannot be negative."); - if (!containerMap.containsKey(containerID)) { - throw new StorageContainerException("Unable to find the container. ID: " - + containerID, CONTAINER_NOT_FOUND); - } - ContainerData cData = containerMap.get(containerID); - if (cData == null) { - throw new StorageContainerException("Invalid container data. ID: " - + containerID, CONTAINER_INTERNAL_ERROR); - } - return cData; - } - - /** - * Closes a open container, if it is already closed or does not exist a - * StorageContainerException is thrown. - * - * @param containerID - ID of the container. - * @throws StorageContainerException - */ - @Override - public void closeContainer(long containerID) - throws StorageContainerException, NoSuchAlgorithmException { - ContainerData containerData = readContainer(containerID); - containerData.closeContainer(); - writeContainerInfo(containerData, true); - MetadataStore db = KeyUtils.getDB(containerData, conf); - - // It is ok if this operation takes a bit of time. - // Close container is not expected to be instantaneous. - try { - db.compactDB(); - } catch (IOException e) { - LOG.error("Error in DB compaction while closing container", e); - throw new StorageContainerException(e, ERROR_IN_COMPACT_DB); - } - - // Active is different from closed. Closed means it is immutable, active - // false means we have some internal error that is happening to this - // container. This is a way to track damaged containers if we have an - // I/O failure, this allows us to take quick action in case of container - // issues. - - containerMap.put(containerID, containerData); - } - - @Override - public void updateContainer(long containerID, ContainerData data, - boolean forceUpdate) throws StorageContainerException { - Preconditions.checkState(containerID >= 0, - "Container ID cannot be negative."); - Preconditions.checkNotNull(data, "Container data cannot be null"); - FileOutputStream containerStream = null; - DigestOutputStream dos = null; - MessageDigest sha = null; - File containerFileBK = null, containerFile = null; - boolean deleted = false; - - if(!containerMap.containsKey(containerID)) { - throw new StorageContainerException("Container doesn't exist. Name :" - + containerID, CONTAINER_NOT_FOUND); - } - - try { - sha = MessageDigest.getInstance(OzoneConsts.FILE_HASH); - } catch (NoSuchAlgorithmException e) { - throw new StorageContainerException("Unable to create Message Digest," - + " usually this is a java configuration issue.", - NO_SUCH_ALGORITHM); - } - - try { - Path location = locationManager.getContainerPath(); - ContainerData orgData = containerMap.get(containerID); - if (orgData == null) { - // updating a invalid container - throw new StorageContainerException("Update a container with invalid" + - "container meta data", CONTAINER_INTERNAL_ERROR); - } - - if (!forceUpdate && !orgData.isOpen()) { - throw new StorageContainerException( - "Update a closed container is not allowed. ID: " + containerID, - UNSUPPORTED_REQUEST); - } - - containerFile = ContainerUtils.getContainerFile(orgData, location); - // If forceUpdate is true, there is no need to check - // whether the container file exists. - if (!forceUpdate) { - if (!containerFile.exists() || !containerFile.canWrite()) { - throw new StorageContainerException( - "Container file not exists or corrupted. ID: " + containerID, - CONTAINER_INTERNAL_ERROR); - } - - // Backup the container file - containerFileBK = File.createTempFile( - "tmp_" + System.currentTimeMillis() + "_", - containerFile.getName(), containerFile.getParentFile()); - FileUtils.copyFile(containerFile, containerFileBK); - - deleted = containerFile.delete(); - containerStream = new FileOutputStream(containerFile); - dos = new DigestOutputStream(containerStream, sha); - - ContainerProtos.ContainerData protoData = data.getProtoBufMessage(); - protoData.writeDelimitedTo(dos); - } - - // Update the in-memory map - containerMap.replace(containerID, data); - } catch (IOException e) { - // Restore the container file from backup - if(containerFileBK != null && containerFileBK.exists() && deleted) { - if(containerFile.delete() - && containerFileBK.renameTo(containerFile)) { - throw new StorageContainerException("Container update failed," - + " container data restored from the backup.", - CONTAINER_INTERNAL_ERROR); - } else { - throw new StorageContainerException( - "Failed to restore container data from the backup. ID: " - + containerID, CONTAINER_INTERNAL_ERROR); - } - } else { - throw new StorageContainerException( - e.getMessage(), CONTAINER_INTERNAL_ERROR); - } - } finally { - if (containerFileBK != null && containerFileBK.exists()) { - if(!containerFileBK.delete()) { - LOG.warn("Unable to delete container file backup : {}.", - containerFileBK.getAbsolutePath()); - } - } - IOUtils.closeStream(dos); - IOUtils.closeStream(containerStream); - } - } - - @VisibleForTesting - protected File getContainerFile(ContainerData data) throws IOException { - return ContainerUtils.getContainerFile(data, - this.locationManager.getContainerPath()); - } - - /** - * Checks if a container exists. - * - * @param containerID - ID of the container. - * @return true if the container is open false otherwise. - * @throws StorageContainerException - Throws Exception if we are not able to - * find the container. - */ - @Override - public boolean isOpen(long containerID) throws StorageContainerException { - final ContainerData containerData = containerMap.get(containerID); - if (containerData == null) { - throw new StorageContainerException( - "Container not found: " + containerID, CONTAINER_NOT_FOUND); - } - return containerData.isOpen(); - } - - /** - * Returns LifeCycle State of the container. - * @param containerID - Id of the container - * @return LifeCycle State of the container - * @throws StorageContainerException - */ - private HddsProtos.LifeCycleState getState(long containerID) - throws StorageContainerException { - LifeCycleState state; - final ContainerData data = containerMap.get(containerID); - if (data == null) { - throw new StorageContainerException( - "Container status not found: " + containerID, CONTAINER_NOT_FOUND); - } - switch (data.getState()) { - case OPEN: - state = LifeCycleState.OPEN; - break; - case CLOSING: - state = LifeCycleState.CLOSING; - break; - case CLOSED: - state = LifeCycleState.CLOSED; - break; - default: - throw new StorageContainerException( - "Invalid Container state found: " + containerID, - INVALID_CONTAINER_STATE); - } - - return state; - } - - /** - * Supports clean shutdown of container. - * - * @throws IOException - */ - @Override - public void shutdown() throws IOException { - Preconditions.checkState(this.hasWriteLock(), - "Assumption that we are holding the lock violated."); - this.containerMap.clear(); - this.locationManager.shutdown(); - } - - - @VisibleForTesting - public ConcurrentSkipListMap getContainerMap() { - return containerMap; - } - - /** - * Acquire read lock. - */ - @Override - public void readLock() { - this.lock.readLock().lock(); - - } - - @Override - public void readLockInterruptibly() throws InterruptedException { - this.lock.readLock().lockInterruptibly(); - } - - /** - * Release read lock. - */ - @Override - public void readUnlock() { - this.lock.readLock().unlock(); - } - - /** - * Check if the current thread holds read lock. - */ - @Override - public boolean hasReadLock() { - return this.lock.readLock().tryLock(); - } - - /** - * Acquire write lock. - */ - @Override - public void writeLock() { - this.lock.writeLock().lock(); - } - - /** - * Acquire write lock, unless interrupted while waiting. - */ - @Override - public void writeLockInterruptibly() throws InterruptedException { - this.lock.writeLock().lockInterruptibly(); - - } - - /** - * Release write lock. - */ - @Override - public void writeUnlock() { - this.lock.writeLock().unlock(); - - } - - /** - * Check if the current thread holds write lock. - */ - @Override - public boolean hasWriteLock() { - return this.lock.writeLock().isHeldByCurrentThread(); - } - - public ChunkManager getChunkManager() { - return this.chunkManager; - } - - /** - * Sets the chunk Manager. - * - * @param chunkManager - Chunk Manager - */ - public void setChunkManager(ChunkManager chunkManager) { - this.chunkManager = chunkManager; - } - - /** - * Gets the Key Manager. - * - * @return KeyManager. - */ - @Override - public KeyManager getKeyManager() { - return this.keyManager; - } - - /** - * Get the node report. - * @return node report. - */ - @Override - public NodeReportProto getNodeReport() throws IOException { - StorageLocationReport[] reports = locationManager.getLocationReport(); - NodeReportProto.Builder nrb = NodeReportProto.newBuilder(); - for (int i = 0; i < reports.length; i++) { - StorageReportProto.Builder srb = StorageReportProto.newBuilder(); - nrb.addStorageReport(reports[i].getProtoBufMessage()); - } - return nrb.build(); - } - - - /** - * Gets container reports. - * - * @return List of all closed containers. - * @throws IOException - */ - @Override - public List getClosedContainerReports() throws IOException { - LOG.debug("Starting container report iteration."); - // No need for locking since containerMap is a ConcurrentSkipListMap - // And we can never get the exact state since close might happen - // after we iterate a point. - return containerMap.entrySet().stream() - .filter(containerData -> - containerData.getValue().isClosed()) - .map(containerData -> containerData.getValue()) - .collect(Collectors.toList()); - } - - /** - * Get container report. - * - * @return The container report. - * @throws IOException - */ - @Override - public ContainerReportsProto getContainerReport() throws IOException { - LOG.debug("Starting container report iteration."); - // No need for locking since containerMap is a ConcurrentSkipListMap - // And we can never get the exact state since close might happen - // after we iterate a point. - List containers = containerMap.values().stream() - .collect(Collectors.toList()); - - ContainerReportsProto.Builder crBuilder = - ContainerReportsProto.newBuilder(); - - for (ContainerData container: containers) { - long containerId = container.getContainerID(); - StorageContainerDatanodeProtocolProtos.ContainerInfo.Builder ciBuilder = - StorageContainerDatanodeProtocolProtos.ContainerInfo.newBuilder(); - ciBuilder.setContainerID(container.getContainerID()) - .setSize(container.getMaxSize()) - .setUsed(container.getBytesUsed()) - .setKeyCount(container.getKeyCount()) - .setReadCount(container.getReadCount()) - .setWriteCount(container.getWriteCount()) - .setReadBytes(container.getReadBytes()) - .setWriteBytes(container.getWriteBytes()) - .setState(getState(containerId)) - .setDeleteTransactionId(container.getDeleteTransactionId()); - - crBuilder.addReports(ciBuilder.build()); - } - - return crBuilder.build(); - } - - /** - * Sets the Key Manager. - * - * @param keyManager - Key Manager. - */ - @Override - public void setKeyManager(KeyManager keyManager) { - this.keyManager = keyManager; - } - - /** - * Filter out only container files from the container metadata dir. - */ - private static class ContainerFilter implements FilenameFilter { - /** - * Tests if a specified file should be included in a file list. - * - * @param dir the directory in which the file was found. - * @param name the name of the file. - * @return true if and only if the name should be included in - * the file list; false otherwise. - */ - @Override - public boolean accept(File dir, String name) { - return name.endsWith(CONTAINER_EXTENSION); - } - } - - @Override - public List chooseContainerForBlockDeletion( - int count) throws StorageContainerException { - readLock(); - try { - return containerDeletionChooser.chooseContainerForBlockDeletion( - count, containerMap); - } finally { - readUnlock(); - } - } - - @VisibleForTesting - public ContainerDeletionChoosingPolicy getContainerDeletionChooser() { - return containerDeletionChooser; - } - - @Override - public void incrPendingDeletionBlocks(int numBlocks, long containerId) { - writeLock(); - try { - ContainerData cData = containerMap.get(containerId); - cData.incrPendingDeletionBlocks(numBlocks); - } finally { - writeUnlock(); - } - } - - @Override - public void decrPendingDeletionBlocks(int numBlocks, long containerId) { - writeLock(); - try { - ContainerData cData = containerMap.get(containerId); - cData.decrPendingDeletionBlocks(numBlocks); - } finally { - writeUnlock(); - } - } - - /** - * Increase the read count of the container. - * - * @param containerId - ID of the container. - */ - @Override - public void incrReadCount(long containerId) { - ContainerData cData = containerMap.get(containerId); - cData.incrReadCount(); - } - - public long getReadCount(long containerId) { - ContainerData cData = containerMap.get(containerId); - return cData.getReadCount(); - } - - /** - * Increase the read counter for bytes read from the container. - * - * @param containerId - ID of the container. - * @param readBytes - bytes read from the container. - */ - @Override - public void incrReadBytes(long containerId, long readBytes) { - ContainerData cData = containerMap.get(containerId); - cData.incrReadBytes(readBytes); - } - - /** - * Returns number of bytes read from the container. - * @param containerId - * @return - */ - public long getReadBytes(long containerId) { - readLock(); - try { - ContainerData cData = containerMap.get(containerId); - return cData.getReadBytes(); - } finally { - readUnlock(); - } - } - - /** - * Increase the write count of the container. - * - * @param containerId - Name of the container. - */ - @Override - public void incrWriteCount(long containerId) { - ContainerData cData = containerMap.get(containerId); - cData.incrWriteCount(); - } - - public long getWriteCount(long containerId) { - ContainerData cData = containerMap.get(containerId); - return cData.getWriteCount(); - } - - /** - * Increse the write counter for bytes write into the container. - * - * @param containerId - ID of the container. - * @param writeBytes - bytes write into the container. - */ - @Override - public void incrWriteBytes(long containerId, long writeBytes) { - ContainerData cData = containerMap.get(containerId); - cData.incrWriteBytes(writeBytes); - } - - public long getWriteBytes(long containerId) { - ContainerData cData = containerMap.get(containerId); - return cData.getWriteBytes(); - } - - /** - * Increase the bytes used by the container. - * - * @param containerId - ID of the container. - * @param used - additional bytes used by the container. - * @return the current bytes used. - */ - @Override - public long incrBytesUsed(long containerId, long used) { - ContainerData cData = containerMap.get(containerId); - return cData.incrBytesUsed(used); - } - - /** - * Decrease the bytes used by the container. - * - * @param containerId - ID of the container. - * @param used - additional bytes reclaimed by the container. - * @return the current bytes used. - */ - @Override - public long decrBytesUsed(long containerId, long used) { - ContainerData cData = containerMap.get(containerId); - return cData.decrBytesUsed(used); - } - - public long getBytesUsed(long containerId) { - ContainerData cData = containerMap.get(containerId); - return cData.getBytesUsed(); - } - - /** - * Get the number of keys in the container. - * - * @param containerId - ID of the container. - * @return the current key count. - */ - @Override - public long getNumKeys(long containerId) { - ContainerData cData = containerMap.get(containerId); - return cData.getKeyCount(); - } - - @Override - public void updateDeleteTransactionId(long containerId, - long deleteTransactionId) { - containerMap.get(containerId) - .updateDeleteTransactionId(deleteTransactionId); - } -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java new file mode 100644 index 00000000000..7f4f147fe39 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.impl; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common + .interfaces.ContainerDeletionChoosingPolicy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentNavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.stream.Collectors; + + +/** + * Class that manages Containers created on the datanode. + */ +public class ContainerSet { + + private static final Logger LOG = LoggerFactory.getLogger(ContainerSet.class); + + private final ConcurrentSkipListMap containerMap = new + ConcurrentSkipListMap<>(); + + /** + * Add Container to container map. + * @param container + * @return If container is added to containerMap returns true, otherwise + * false + */ + public boolean addContainer(Container container) throws + StorageContainerException { + Preconditions.checkNotNull(container, "container cannot be null"); + + long containerId = container.getContainerData().getContainerID(); + if(containerMap.putIfAbsent(containerId, container) == null) { + LOG.debug("Container with container Id {} is added to containerMap", + containerId); + return true; + } else { + LOG.warn("Container already exists with container Id {}", containerId); + throw new StorageContainerException("Container already exists with " + + "container Id " + containerId, + ContainerProtos.Result.CONTAINER_EXISTS); + } + } + + /** + * Returns the Container with specified containerId. + * @param containerId + * @return Container + */ + public Container getContainer(long containerId) { + Preconditions.checkState(containerId >= 0, + "Container Id cannot be negative."); + return containerMap.get(containerId); + } + + /** + * Removes the Container matching with specified containerId. + * @param containerId + * @return If container is removed from containerMap returns true, otherwise + * false + */ + public boolean removeContainer(long containerId) { + Preconditions.checkState(containerId >= 0, + "Container Id cannot be negative."); + Container removed = containerMap.remove(containerId); + if(removed == null) { + LOG.debug("Container with containerId {} is not present in " + + "containerMap", containerId); + return false; + } else { + LOG.debug("Container with containerId {} is removed from containerMap", + containerId); + return true; + } + } + + /** + * Return number of containers in container map. + * @return container count + */ + @VisibleForTesting + public int containerCount() { + return containerMap.size(); + } + + /** + * Return an container Iterator over {@link ContainerSet#containerMap}. + * @return Iterator + */ + public Iterator getContainerIterator() { + return containerMap.values().iterator(); + } + + /** + * Return an containerMap iterator over {@link ContainerSet#containerMap}. + * @return containerMap Iterator + */ + public Iterator> getContainerMapIterator() { + return containerMap.entrySet().iterator(); + } + + /** + * Return a copy of the containerMap. + * @return containerMap + */ + public Map getContainerMap() { + return ImmutableMap.copyOf(containerMap); + } + + /** + * A simple interface for container Iterations. + *

+ * This call make no guarantees about consistency of the data between + * different list calls. It just returns the best known data at that point of + * time. It is possible that using this iteration you can miss certain + * container from the listing. + * + * @param startContainerId - Return containers with Id >= startContainerId. + * @param count - how many to return + * @param data - Actual containerData + * @throws StorageContainerException + */ + public void listContainer(long startContainerId, long count, + List data) throws + StorageContainerException { + Preconditions.checkNotNull(data, + "Internal assertion: data cannot be null"); + Preconditions.checkState(startContainerId >= 0, + "Start container Id cannot be negative"); + Preconditions.checkState(count > 0, + "max number of containers returned " + + "must be positive"); + LOG.debug("listContainer returns containerData starting from {} of count " + + "{}", startContainerId, count); + ConcurrentNavigableMap map; + if (startContainerId == 0) { + map = containerMap.tailMap(containerMap.firstKey(), true); + } else { + map = containerMap.tailMap(startContainerId, true); + } + int currentCount = 0; + for (Container entry : map.values()) { + if (currentCount < count) { + data.add(entry.getContainerData()); + currentCount++; + } else { + return; + } + } + } + + /** + * Get container report. + * + * @return The container report. + * @throws IOException + */ + public ContainerReportsProto getContainerReport() throws IOException { + LOG.debug("Starting container report iteration."); + + // No need for locking since containerMap is a ConcurrentSkipListMap + // And we can never get the exact state since close might happen + // after we iterate a point. + List containers = containerMap.values().stream().collect( + Collectors.toList()); + + ContainerReportsProto.Builder crBuilder = + ContainerReportsProto.newBuilder(); + + for (Container container: containers) { + crBuilder.addReports(container.getContainerReport()); + } + + return crBuilder.build(); + } + + public List chooseContainerForBlockDeletion(int count, + ContainerDeletionChoosingPolicy deletionPolicy) + throws StorageContainerException { + Map containerDataMap = containerMap.entrySet().stream() + .filter(e -> deletionPolicy.isValidContainerType( + e.getValue().getContainerType())) + .collect(Collectors.toMap(Map.Entry::getKey, + e -> e.getValue().getContainerData())); + return deletionPolicy + .chooseContainerForBlockDeletion(count, containerDataMap); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/Dispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/Dispatcher.java deleted file mode 100644 index 3b478cdb05a..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/Dispatcher.java +++ /dev/null @@ -1,687 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.impl; - -import com.google.common.base.Preconditions; -import org.apache.ratis.shaded.com.google.protobuf.ByteString; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.scm.container.common.helpers - .StorageContainerException; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerCommandRequestProto; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerCommandResponseProto; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type; -import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; -import org.apache.hadoop.ozone.container.common.helpers.ChunkUtils; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; -import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; -import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; -import org.apache.hadoop.ozone.container.common.helpers.FileUtils; -import org.apache.hadoop.ozone.container.common.helpers.KeyData; -import org.apache.hadoop.ozone.container.common.helpers.KeyUtils; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerManager; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.security.NoSuchAlgorithmException; -import java.util.LinkedList; -import java.util.List; - -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.CLOSED_CONTAINER_IO; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.GET_SMALL_FILE_ERROR; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.NO_SUCH_ALGORITHM; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.PUT_SMALL_FILE_ERROR; - -/** - * Ozone Container dispatcher takes a call from the netty server and routes it - * to the right handler function. - */ -public class Dispatcher implements ContainerDispatcher { - static final Logger LOG = LoggerFactory.getLogger(Dispatcher.class); - - private final ContainerManager containerManager; - private ContainerMetrics metrics; - private Configuration conf; - - /** - * Constructs an OzoneContainer that receives calls from - * XceiverServerHandler. - * - * @param containerManager - A class that manages containers. - */ - public Dispatcher(ContainerManager containerManager, Configuration config) { - Preconditions.checkNotNull(containerManager); - this.containerManager = containerManager; - this.metrics = null; - this.conf = config; - } - - @Override - public void init() { - this.metrics = ContainerMetrics.create(conf); - } - - @Override - public void shutdown() { - } - - @Override - public ContainerCommandResponseProto dispatch( - ContainerCommandRequestProto msg) { - LOG.trace("Command {}, trace ID: {} ", msg.getCmdType().toString(), - msg.getTraceID()); - long startNanos = System.nanoTime(); - ContainerCommandResponseProto resp = null; - try { - Preconditions.checkNotNull(msg); - Type cmdType = msg.getCmdType(); - metrics.incContainerOpcMetrics(cmdType); - if ((cmdType == Type.CreateContainer) || - (cmdType == Type.DeleteContainer) || - (cmdType == Type.ReadContainer) || - (cmdType == Type.ListContainer) || - (cmdType == Type.UpdateContainer) || - (cmdType == Type.CloseContainer)) { - resp = containerProcessHandler(msg); - } - - if ((cmdType == Type.PutKey) || - (cmdType == Type.GetKey) || - (cmdType == Type.DeleteKey) || - (cmdType == Type.ListKey)) { - resp = keyProcessHandler(msg); - } - - if ((cmdType == Type.WriteChunk) || - (cmdType == Type.ReadChunk) || - (cmdType == Type.DeleteChunk)) { - resp = chunkProcessHandler(msg); - } - - if ((cmdType == Type.PutSmallFile) || - (cmdType == Type.GetSmallFile)) { - resp = smallFileHandler(msg); - } - - if (resp != null) { - metrics.incContainerOpsLatencies(cmdType, - System.nanoTime() - startNanos); - return resp; - } - - return ContainerUtils.unsupportedRequest(msg); - } catch (StorageContainerException e) { - // This useful since the trace ID will allow us to correlate failures. - return ContainerUtils.logAndReturnError(LOG, e, msg); - } catch (IllegalStateException | NullPointerException e) { - return ContainerUtils.logAndReturnError(LOG, e, msg); - } - } - - public ContainerMetrics getContainerMetrics() { - return metrics; - } - - /** - * Handles the all Container related functionality. - * - * @param msg - command - * @return - response - * @throws StorageContainerException - */ - private ContainerCommandResponseProto containerProcessHandler( - ContainerCommandRequestProto msg) throws StorageContainerException { - try { - - switch (msg.getCmdType()) { - case CreateContainer: - return handleCreateContainer(msg); - - case DeleteContainer: - return handleDeleteContainer(msg); - - case ListContainer: - // TODO : Support List Container. - return ContainerUtils.unsupportedRequest(msg); - - case UpdateContainer: - return handleUpdateContainer(msg); - - case ReadContainer: - return handleReadContainer(msg); - - case CloseContainer: - return handleCloseContainer(msg); - - default: - return ContainerUtils.unsupportedRequest(msg); - } - } catch (StorageContainerException e) { - return ContainerUtils.logAndReturnError(LOG, e, msg); - } catch (IOException ex) { - LOG.warn("Container operation failed. " + - "Container: {} Operation: {} trace ID: {} Error: {}", - msg.getCreateContainer().getContainerData().getContainerID(), - msg.getCmdType().name(), - msg.getTraceID(), - ex.toString(), ex); - - // TODO : Replace with finer error codes. - return ContainerUtils.getContainerResponse(msg, - ContainerProtos.Result.CONTAINER_INTERNAL_ERROR, - ex.toString()).build(); - } - } - - /** - * Handles the all key related functionality. - * - * @param msg - command - * @return - response - * @throws StorageContainerException - */ - private ContainerCommandResponseProto keyProcessHandler( - ContainerCommandRequestProto msg) throws StorageContainerException { - try { - switch (msg.getCmdType()) { - case PutKey: - return handlePutKey(msg); - - case GetKey: - return handleGetKey(msg); - - case DeleteKey: - return handleDeleteKey(msg); - - case ListKey: - return ContainerUtils.unsupportedRequest(msg); - - default: - return ContainerUtils.unsupportedRequest(msg); - - } - } catch (StorageContainerException e) { - return ContainerUtils.logAndReturnError(LOG, e, msg); - } catch (IOException ex) { - LOG.warn("Container operation failed. " + - "Container: {} Operation: {} trace ID: {} Error: {}", - msg.getCreateContainer().getContainerData().getContainerID(), - msg.getCmdType().name(), - msg.getTraceID(), - ex.toString(), ex); - - // TODO : Replace with finer error codes. - return ContainerUtils.getContainerResponse(msg, - ContainerProtos.Result.CONTAINER_INTERNAL_ERROR, - ex.toString()).build(); - } - } - - /** - * Handles the all chunk related functionality. - * - * @param msg - command - * @return - response - * @throws StorageContainerException - */ - private ContainerCommandResponseProto chunkProcessHandler( - ContainerCommandRequestProto msg) throws StorageContainerException { - try { - switch (msg.getCmdType()) { - case WriteChunk: - return handleWriteChunk(msg); - - case ReadChunk: - return handleReadChunk(msg); - - case DeleteChunk: - return handleDeleteChunk(msg); - - case ListChunk: - return ContainerUtils.unsupportedRequest(msg); - - default: - return ContainerUtils.unsupportedRequest(msg); - } - } catch (StorageContainerException e) { - return ContainerUtils.logAndReturnError(LOG, e, msg); - } catch (IOException ex) { - LOG.warn("Container operation failed. " + - "Container: {} Operation: {} trace ID: {} Error: {}", - msg.getCreateContainer().getContainerData().getContainerID(), - msg.getCmdType().name(), - msg.getTraceID(), - ex.toString(), ex); - - // TODO : Replace with finer error codes. - return ContainerUtils.getContainerResponse(msg, - ContainerProtos.Result.CONTAINER_INTERNAL_ERROR, - ex.toString()).build(); - } - } - - /** - * Dispatch calls to small file hanlder. - * @param msg - request - * @return response - * @throws StorageContainerException - */ - private ContainerCommandResponseProto smallFileHandler( - ContainerCommandRequestProto msg) throws StorageContainerException { - switch (msg.getCmdType()) { - case PutSmallFile: - return handlePutSmallFile(msg); - case GetSmallFile: - return handleGetSmallFile(msg); - default: - return ContainerUtils.unsupportedRequest(msg); - } - } - - /** - * Update an existing container with the new container data. - * - * @param msg Request - * @return ContainerCommandResponseProto - * @throws IOException - */ - private ContainerCommandResponseProto handleUpdateContainer( - ContainerCommandRequestProto msg) - throws IOException { - if (!msg.hasUpdateContainer()) { - LOG.debug("Malformed read container request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - long containerID = msg.getUpdateContainer() - .getContainerData().getContainerID(); - - ContainerData data = ContainerData.getFromProtBuf( - msg.getUpdateContainer().getContainerData(), conf); - boolean forceUpdate = msg.getUpdateContainer().getForceUpdate(); - this.containerManager.updateContainer(containerID, - data, forceUpdate); - return ContainerUtils.getContainerResponse(msg); - } - - /** - * Calls into container logic and returns appropriate response. - * - * @param msg - Request - * @return ContainerCommandResponseProto - * @throws IOException - */ - private ContainerCommandResponseProto handleReadContainer( - ContainerCommandRequestProto msg) - throws IOException { - - if (!msg.hasReadContainer()) { - LOG.debug("Malformed read container request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - - long containerID = msg.getReadContainer().getContainerID(); - ContainerData container = this.containerManager. - readContainer(containerID); - return ContainerUtils.getReadContainerResponse(msg, container); - } - - /** - * Calls into container logic and returns appropriate response. - * - * @param msg - Request - * @return Response. - * @throws IOException - */ - private ContainerCommandResponseProto handleDeleteContainer( - ContainerCommandRequestProto msg) throws IOException { - - if (!msg.hasDeleteContainer()) { - LOG.debug("Malformed delete container request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - - long containerID = msg.getDeleteContainer().getContainerID(); - boolean forceDelete = msg.getDeleteContainer().getForceDelete(); - this.containerManager.deleteContainer(containerID, forceDelete); - return ContainerUtils.getContainerResponse(msg); - } - - /** - * Calls into container logic and returns appropriate response. - * - * @param msg - Request - * @return Response. - * @throws IOException - */ - private ContainerCommandResponseProto handleCreateContainer( - ContainerCommandRequestProto msg) throws IOException { - if (!msg.hasCreateContainer()) { - LOG.debug("Malformed create container request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - ContainerData cData = ContainerData.getFromProtBuf( - msg.getCreateContainer().getContainerData(), conf); - Preconditions.checkNotNull(cData, "Container data is null"); - - this.containerManager.createContainer(cData); - return ContainerUtils.getContainerResponse(msg); - } - - /** - * closes an open container. - * - * @param msg - - * @return - * @throws IOException - */ - private ContainerCommandResponseProto handleCloseContainer( - ContainerCommandRequestProto msg) throws IOException { - try { - if (!msg.hasCloseContainer()) { - LOG.debug("Malformed close Container request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - long containerID = msg.getCloseContainer().getContainerID(); - if (!this.containerManager.isOpen(containerID)) { - throw new StorageContainerException("Attempting to close a closed " + - "container.", CLOSED_CONTAINER_IO); - } - this.containerManager.closeContainer(containerID); - return ContainerUtils.getContainerResponse(msg); - } catch (NoSuchAlgorithmException e) { - throw new StorageContainerException("No such Algorithm", e, - NO_SUCH_ALGORITHM); - } - } - - /** - * Calls into chunk manager to write a chunk. - * - * @param msg - Request. - * @return Response. - * @throws IOException - */ - private ContainerCommandResponseProto handleWriteChunk( - ContainerCommandRequestProto msg) throws IOException { - if (!msg.hasWriteChunk()) { - LOG.debug("Malformed write chunk request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - BlockID blockID = BlockID.getFromProtobuf( - msg.getWriteChunk().getBlockID()); - if (!this.containerManager.isOpen(blockID.getContainerID())) { - throw new StorageContainerException("Write to closed container.", - CLOSED_CONTAINER_IO); - } - - ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(msg.getWriteChunk() - .getChunkData()); - Preconditions.checkNotNull(chunkInfo); - byte[] data = null; - if (msg.getWriteChunk().getStage() == ContainerProtos.Stage.WRITE_DATA - || msg.getWriteChunk().getStage() == ContainerProtos.Stage.COMBINED) { - data = msg.getWriteChunk().getData().toByteArray(); - metrics.incContainerBytesStats(Type.WriteChunk, data.length); - - } - this.containerManager.getChunkManager() - .writeChunk(blockID, chunkInfo, - data, msg.getWriteChunk().getStage()); - - return ChunkUtils.getChunkResponse(msg); - } - - /** - * Calls into chunk manager to read a chunk. - * - * @param msg - Request. - * @return - Response. - * @throws IOException - */ - private ContainerCommandResponseProto handleReadChunk( - ContainerCommandRequestProto msg) throws IOException { - if (!msg.hasReadChunk()) { - LOG.debug("Malformed read chunk request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - BlockID blockID = BlockID.getFromProtobuf( - msg.getReadChunk().getBlockID()); - ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(msg.getReadChunk() - .getChunkData()); - Preconditions.checkNotNull(chunkInfo); - byte[] data = this.containerManager.getChunkManager(). - readChunk(blockID, chunkInfo); - metrics.incContainerBytesStats(Type.ReadChunk, data.length); - return ChunkUtils.getReadChunkResponse(msg, data, chunkInfo); - } - - /** - * Calls into chunk manager to write a chunk. - * - * @param msg - Request. - * @return Response. - * @throws IOException - */ - private ContainerCommandResponseProto handleDeleteChunk( - ContainerCommandRequestProto msg) throws IOException { - if (!msg.hasDeleteChunk()) { - LOG.debug("Malformed delete chunk request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - - BlockID blockID = BlockID.getFromProtobuf(msg.getDeleteChunk() - .getBlockID()); - long containerID = blockID.getContainerID(); - if (!this.containerManager.isOpen(containerID)) { - throw new StorageContainerException("Write to closed container.", - CLOSED_CONTAINER_IO); - } - ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(msg.getDeleteChunk() - .getChunkData()); - Preconditions.checkNotNull(chunkInfo); - - this.containerManager.getChunkManager().deleteChunk(blockID, - chunkInfo); - return ChunkUtils.getChunkResponse(msg); - } - - /** - * Put Key handler. - * - * @param msg - Request. - * @return - Response. - * @throws IOException - */ - private ContainerCommandResponseProto handlePutKey( - ContainerCommandRequestProto msg) throws IOException { - if (!msg.hasPutKey()) { - LOG.debug("Malformed put key request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - BlockID blockID = BlockID.getFromProtobuf( - msg.getPutKey().getKeyData().getBlockID()); - long containerID = blockID.getContainerID(); - if (!this.containerManager.isOpen(containerID)) { - throw new StorageContainerException("Write to closed container.", - CLOSED_CONTAINER_IO); - } - KeyData keyData = KeyData.getFromProtoBuf(msg.getPutKey().getKeyData()); - Preconditions.checkNotNull(keyData); - this.containerManager.getKeyManager().putKey(keyData); - long numBytes = keyData.getProtoBufMessage().toByteArray().length; - metrics.incContainerBytesStats(Type.PutKey, numBytes); - return KeyUtils.getKeyResponse(msg); - } - - /** - * Handle Get Key. - * - * @param msg - Request. - * @return - Response. - * @throws IOException - */ - private ContainerCommandResponseProto handleGetKey( - ContainerCommandRequestProto msg) throws IOException { - if (!msg.hasGetKey()) { - LOG.debug("Malformed get key request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - KeyData keyData = KeyData.getFromProtoBuf(msg.getGetKey().getKeyData()); - Preconditions.checkNotNull(keyData); - KeyData responseData = - this.containerManager.getKeyManager().getKey(keyData); - long numBytes = responseData.getProtoBufMessage().toByteArray().length; - metrics.incContainerBytesStats(Type.GetKey, numBytes); - return KeyUtils.getKeyDataResponse(msg, responseData); - } - - /** - * Handle Delete Key. - * - * @param msg - Request. - * @return - Response. - * @throws IOException - */ - private ContainerCommandResponseProto handleDeleteKey( - ContainerCommandRequestProto msg) throws IOException { - if (!msg.hasDeleteKey()) { - LOG.debug("Malformed delete key request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - BlockID blockID = BlockID.getFromProtobuf(msg.getDeleteKey() - .getBlockID()); - Preconditions.checkNotNull(blockID); - long containerID = blockID.getContainerID(); - if (!this.containerManager.isOpen(containerID)) { - throw new StorageContainerException("Write to closed container.", - CLOSED_CONTAINER_IO); - } - this.containerManager.getKeyManager().deleteKey(blockID); - return KeyUtils.getKeyResponse(msg); - } - - /** - * Handles writing a chunk and associated key using single RPC. - * - * @param msg - Message. - * @return ContainerCommandResponseProto - * @throws StorageContainerException - */ - private ContainerCommandResponseProto handlePutSmallFile( - ContainerCommandRequestProto msg) throws StorageContainerException { - - if (!msg.hasPutSmallFile()) { - LOG.debug("Malformed put small file request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - try { - - BlockID blockID = BlockID.getFromProtobuf(msg. - getPutSmallFile().getKey().getKeyData().getBlockID()); - long containerID = blockID.getContainerID(); - - if (!this.containerManager.isOpen(containerID)) { - throw new StorageContainerException("Write to closed container.", - CLOSED_CONTAINER_IO); - } - KeyData keyData = KeyData.getFromProtoBuf(msg.getPutSmallFile().getKey() - .getKeyData()); - ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(msg.getPutSmallFile() - .getChunkInfo()); - byte[] data = msg.getPutSmallFile().getData().toByteArray(); - - metrics.incContainerBytesStats(Type.PutSmallFile, data.length); - this.containerManager.getChunkManager().writeChunk(blockID, - chunkInfo, data, ContainerProtos.Stage.COMBINED); - List chunks = new LinkedList<>(); - chunks.add(chunkInfo.getProtoBufMessage()); - keyData.setChunks(chunks); - this.containerManager.getKeyManager().putKey(keyData); - return FileUtils.getPutFileResponse(msg); - } catch (StorageContainerException e) { - return ContainerUtils.logAndReturnError(LOG, e, msg); - } catch (IOException e) { - throw new StorageContainerException("Put Small File Failed.", e, - PUT_SMALL_FILE_ERROR); - } - } - - /** - * Handles getting a data stream using a key. This helps in reducing the RPC - * overhead for small files. - * - * @param msg - ContainerCommandRequestProto - * @return ContainerCommandResponseProto - * @throws StorageContainerException - */ - private ContainerCommandResponseProto handleGetSmallFile( - ContainerCommandRequestProto msg) throws StorageContainerException { - ByteString dataBuf = ByteString.EMPTY; - if (!msg.hasGetSmallFile()) { - LOG.debug("Malformed get small file request. trace ID: {}", - msg.getTraceID()); - return ContainerUtils.malformedRequest(msg); - } - try { - long bytes = 0; - KeyData keyData = KeyData.getFromProtoBuf(msg.getGetSmallFile() - .getKey().getKeyData()); - KeyData data = this.containerManager.getKeyManager().getKey(keyData); - ContainerProtos.ChunkInfo c = null; - for (ContainerProtos.ChunkInfo chunk : data.getChunks()) { - bytes += chunk.getSerializedSize(); - ByteString current = - ByteString.copyFrom(this.containerManager.getChunkManager() - .readChunk(keyData.getBlockID(), - ChunkInfo.getFromProtoBuf(chunk))); - dataBuf = dataBuf.concat(current); - c = chunk; - } - metrics.incContainerBytesStats(Type.GetSmallFile, bytes); - return FileUtils.getGetSmallFileResponse(msg, dataBuf.toByteArray(), - ChunkInfo.getFromProtoBuf(c)); - } catch (StorageContainerException e) { - return ContainerUtils.logAndReturnError(LOG, e, msg); - } catch (IOException e) { - throw new StorageContainerException("Get Small File Failed", e, - GET_SMALL_FILE_ERROR); - } - } -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java new file mode 100644 index 00000000000..bb5002ae69e --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.impl; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.HddsUtils; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerAction; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.Handler; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerType; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerLifeCycleState; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.Optional; + +/** + * Ozone Container dispatcher takes a call from the netty server and routes it + * to the right handler function. + */ +public class HddsDispatcher implements ContainerDispatcher { + + static final Logger LOG = LoggerFactory.getLogger(HddsDispatcher.class); + + private final Map handlers; + private final Configuration conf; + private final ContainerSet containerSet; + private final VolumeSet volumeSet; + private final StateContext context; + private final float containerCloseThreshold; + private String scmID; + private ContainerMetrics metrics; + + /** + * Constructs an OzoneContainer that receives calls from + * XceiverServerHandler. + */ + public HddsDispatcher(Configuration config, ContainerSet contSet, + VolumeSet volumes, StateContext context) { + this.conf = config; + this.containerSet = contSet; + this.volumeSet = volumes; + this.context = context; + this.handlers = Maps.newHashMap(); + this.metrics = ContainerMetrics.create(conf); + for (ContainerType containerType : ContainerType.values()) { + handlers.put(containerType, + Handler.getHandlerForContainerType( + containerType, conf, containerSet, volumeSet, metrics)); + } + this.containerCloseThreshold = conf.getFloat( + HddsConfigKeys.HDDS_CONTAINER_CLOSE_THRESHOLD, + HddsConfigKeys.HDDS_CONTAINER_CLOSE_THRESHOLD_DEFAULT); + + } + + @Override + public void init() { + } + + @Override + public void shutdown() { + // Shutdown the volumes + volumeSet.shutdown(); + } + + @Override + public ContainerCommandResponseProto dispatch( + ContainerCommandRequestProto msg) { + LOG.trace("Command {}, trace ID: {} ", msg.getCmdType().toString(), + msg.getTraceID()); + Preconditions.checkNotNull(msg); + + Container container = null; + ContainerType containerType = null; + ContainerCommandResponseProto responseProto = null; + long startTime = System.nanoTime(); + ContainerProtos.Type cmdType = msg.getCmdType(); + try { + long containerID = msg.getContainerID(); + + metrics.incContainerOpsMetrics(cmdType); + if (cmdType != ContainerProtos.Type.CreateContainer) { + container = getContainer(containerID); + containerType = getContainerType(container); + } else { + if (!msg.hasCreateContainer()) { + return ContainerUtils.malformedRequest(msg); + } + containerType = msg.getCreateContainer().getContainerType(); + } + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, msg); + } + // Small performance optimization. We check if the operation is of type + // write before trying to send CloseContainerAction. + if (!HddsUtils.isReadOnly(msg)) { + sendCloseContainerActionIfNeeded(container); + } + Handler handler = getHandler(containerType); + if (handler == null) { + StorageContainerException ex = new StorageContainerException("Invalid " + + "ContainerType " + containerType, + ContainerProtos.Result.CONTAINER_INTERNAL_ERROR); + return ContainerUtils.logAndReturnError(LOG, ex, msg); + } + responseProto = handler.handle(msg, container); + if (responseProto != null) { + metrics.incContainerOpsLatencies(cmdType, System.nanoTime() - startTime); + return responseProto; + } else { + return ContainerUtils.unsupportedRequest(msg); + } + } + + /** + * If the container usage reaches the close threshold we send Close + * ContainerAction to SCM. + * + * @param container current state of container + */ + private void sendCloseContainerActionIfNeeded(Container container) { + // We have to find a more efficient way to close a container. + Boolean isOpen = Optional.ofNullable(container) + .map(cont -> cont.getContainerState() == ContainerLifeCycleState.OPEN) + .orElse(Boolean.FALSE); + if (isOpen) { + ContainerData containerData = container.getContainerData(); + double containerUsedPercentage = 1.0f * containerData.getBytesUsed() / + containerData.getMaxSize(); + if (containerUsedPercentage >= containerCloseThreshold) { + ContainerAction action = ContainerAction.newBuilder() + .setContainerID(containerData.getContainerID()) + .setAction(ContainerAction.Action.CLOSE) + .setReason(ContainerAction.Reason.CONTAINER_FULL) + .build(); + context.addContainerActionIfAbsent(action); + } + } + } + + @Override + public Handler getHandler(ContainerProtos.ContainerType containerType) { + return handlers.get(containerType); + } + + @Override + public void setScmId(String scmId) { + Preconditions.checkNotNull(scmId, "scmId Cannot be null"); + if (this.scmID == null) { + this.scmID = scmId; + for (Map.Entry handlerMap : handlers.entrySet()) { + handlerMap.getValue().setScmID(scmID); + } + } + } + + @VisibleForTesting + public Container getContainer(long containerID) + throws StorageContainerException { + Container container = containerSet.getContainer(containerID); + if (container == null) { + throw new StorageContainerException( + "ContainerID " + containerID + " does not exist", + ContainerProtos.Result.CONTAINER_NOT_FOUND); + } + return container; + } + + private ContainerType getContainerType(Container container) { + return container.getContainerType(); + } + + @VisibleForTesting + public void setMetricsForTesting(ContainerMetrics containerMetrics) { + this.metrics = containerMetrics; + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/KeyManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/KeyManagerImpl.java deleted file mode 100644 index 40ae1c70a7d..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/KeyManagerImpl.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.impl; - -import com.google.common.base.Preconditions; -import com.google.common.primitives.Longs; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.scm.container.common.helpers - .StorageContainerException; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; -import org.apache.hadoop.ozone.container.common.helpers.KeyData; -import org.apache.hadoop.ozone.container.common.helpers.KeyUtils; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerManager; -import org.apache.hadoop.ozone.container.common.interfaces.KeyManager; -import org.apache.hadoop.ozone.container.common.utils.ContainerCache; -import org.apache.hadoop.utils.MetadataStore; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.NO_SUCH_KEY; - -/** - * Key Manager impl. - */ -public class KeyManagerImpl implements KeyManager { - static final Logger LOG = - LoggerFactory.getLogger(KeyManagerImpl.class); - - private static final float LOAD_FACTOR = 0.75f; - private final ContainerManager containerManager; - private final Configuration conf; - - /** - * Constructs a key Manager. - * - * @param containerManager - Container Manager. - */ - public KeyManagerImpl(ContainerManager containerManager, Configuration conf) { - Preconditions.checkNotNull(containerManager, "Container manager cannot be" + - " null"); - Preconditions.checkNotNull(conf, "Config cannot be null"); - this.containerManager = containerManager; - this.conf = conf; - } - - /** - * {@inheritDoc} - */ - @Override - public void putKey(KeyData data) throws IOException { - Preconditions.checkNotNull(data, - "KeyData cannot be null for put operation."); - Preconditions.checkState(data.getContainerID() >= 0, - "Container ID cannot be negative"); - containerManager.readLock(); - try { - // We are not locking the key manager since LevelDb serializes all actions - // against a single DB. We rely on DB level locking to avoid conflicts. - ContainerData cData = containerManager.readContainer( - data.getContainerID()); - MetadataStore db = KeyUtils.getDB(cData, conf); - - // This is a post condition that acts as a hint to the user. - // Should never fail. - Preconditions.checkNotNull(db, "DB cannot be null here"); - db.put(Longs.toByteArray(data.getLocalID()), data - .getProtoBufMessage().toByteArray()); - } finally { - containerManager.readUnlock(); - } - } - - /** - * {@inheritDoc} - */ - @Override - public KeyData getKey(KeyData data) throws IOException { - containerManager.readLock(); - try { - Preconditions.checkNotNull(data, "Key data cannot be null"); - Preconditions.checkNotNull(data.getContainerID(), - "Container name cannot be null"); - ContainerData cData = containerManager.readContainer(data - .getContainerID()); - MetadataStore db = KeyUtils.getDB(cData, conf); - - // This is a post condition that acts as a hint to the user. - // Should never fail. - Preconditions.checkNotNull(db, "DB cannot be null here"); - - byte[] kData = db.get(Longs.toByteArray(data.getLocalID())); - if (kData == null) { - throw new StorageContainerException("Unable to find the key.", - NO_SUCH_KEY); - } - ContainerProtos.KeyData keyData = - ContainerProtos.KeyData.parseFrom(kData); - return KeyData.getFromProtoBuf(keyData); - } finally { - containerManager.readUnlock(); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void deleteKey(BlockID blockID) - throws IOException { - Preconditions.checkNotNull(blockID, "block ID cannot be null."); - Preconditions.checkState(blockID.getContainerID() >= 0, - "Container ID cannot be negative."); - Preconditions.checkState(blockID.getLocalID() >= 0, - "Local ID cannot be negative."); - - containerManager.readLock(); - try { - - ContainerData cData = containerManager - .readContainer(blockID.getContainerID()); - MetadataStore db = KeyUtils.getDB(cData, conf); - - // This is a post condition that acts as a hint to the user. - // Should never fail. - Preconditions.checkNotNull(db, "DB cannot be null here"); - // Note : There is a race condition here, since get and delete - // are not atomic. Leaving it here since the impact is refusing - // to delete a key which might have just gotten inserted after - // the get check. - - byte[] kKey = Longs.toByteArray(blockID.getLocalID()); - byte[] kData = db.get(kKey); - if (kData == null) { - throw new StorageContainerException("Unable to find the key.", - NO_SUCH_KEY); - } - db.delete(kKey); - } finally { - containerManager.readUnlock(); - } - } - - /** - * {@inheritDoc} - */ - @Override - public List listKey( - long containerID, long startLocalID, int count) - throws IOException { - Preconditions.checkState(containerID >= 0, - "Container ID cannot be negative"); - Preconditions.checkState(startLocalID >= 0, - "startLocal ID cannot be negative"); - Preconditions.checkArgument(count > 0, - "Count must be a positive number."); - ContainerData cData = containerManager.readContainer(containerID); - MetadataStore db = KeyUtils.getDB(cData, conf); - - List result = new ArrayList<>(); - byte[] startKeyInBytes = Longs.toByteArray(startLocalID); - List> range = - db.getSequentialRangeKVs(startKeyInBytes, count, null); - for (Map.Entry entry : range) { - KeyData value = KeyUtils.getKeyData(entry.getValue()); - KeyData data = new KeyData(value.getBlockID()); - result.add(data); - } - return result; - } - - /** - * Shutdown keyManager. - */ - @Override - public void shutdown() { - Preconditions.checkState(this.containerManager.hasWriteLock(), "asserts " + - "that we are holding the container manager lock when shutting down."); - KeyUtils.shutdownCache(ContainerCache.getInstance(conf)); - } -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/OpenContainerBlockMap.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/OpenContainerBlockMap.java new file mode 100644 index 00000000000..1ef3d0dc966 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/OpenContainerBlockMap.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.impl; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.KeyData; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.Function; + +/** + * Map: containerId -> (localId -> {@link KeyData}). + * The outer container map does not entail locking for a better performance. + * The inner {@link KeyDataMap} is synchronized. + * + * This class will maintain list of open keys per container when closeContainer + * command comes, it should autocommit all open keys of a open container before + * marking the container as closed. + */ +public class OpenContainerBlockMap { + /** + * Map: localId -> KeyData. + * + * In order to support {@link #getAll()}, the update operations are + * synchronized. + */ + static class KeyDataMap { + private final ConcurrentMap blocks = + new ConcurrentHashMap<>(); + + KeyData get(long localId) { + return blocks.get(localId); + } + + synchronized int removeAndGetSize(long localId) { + blocks.remove(localId); + return blocks.size(); + } + + synchronized KeyData computeIfAbsent( + long localId, Function f) { + return blocks.computeIfAbsent(localId, f); + } + + synchronized List getAll() { + return new ArrayList<>(blocks.values()); + } + } + + /** + * TODO : We may construct the openBlockMap by reading the Block Layout + * for each block inside a container listing all chunk files and reading the + * sizes. This will help to recreate the openKeys Map once the DataNode + * restarts. + * + * For now, we will track all open blocks of a container in the blockMap. + */ + private final ConcurrentMap containers = + new ConcurrentHashMap<>(); + + /** + * Removes the Container matching with specified containerId. + * @param containerId containerId + */ + public void removeContainer(long containerId) { + Preconditions + .checkState(containerId >= 0, "Container Id cannot be negative."); + containers.remove(containerId); + } + + public void addChunk(BlockID blockID, ChunkInfo info) { + Preconditions.checkNotNull(info); + containers.computeIfAbsent(blockID.getContainerID(), id -> new KeyDataMap()) + .computeIfAbsent(blockID.getLocalID(), id -> new KeyData(blockID)) + .addChunk(info); + } + + /** + * Removes the chunk from the chunkInfo list for the given block. + * @param blockID id of the block + * @param chunkInfo chunk info. + */ + public void removeChunk(BlockID blockID, ChunkInfo chunkInfo) { + Preconditions.checkNotNull(chunkInfo); + Preconditions.checkNotNull(blockID); + Optional.ofNullable(containers.get(blockID.getContainerID())) + .map(blocks -> blocks.get(blockID.getLocalID())) + .ifPresent(keyData -> keyData.removeChunk(chunkInfo)); + } + + /** + * Returns the list of open to the openContainerBlockMap. + * @param containerId container id + * @return List of open Keys(blocks) + */ + public List getOpenKeys(long containerId) { + return Optional.ofNullable(containers.get(containerId)) + .map(KeyDataMap::getAll) + .orElseGet(Collections::emptyList); + } + + /** + * removes the block from the block map. + * @param blockID + */ + public void removeFromKeyMap(BlockID blockID) { + Preconditions.checkNotNull(blockID); + containers.computeIfPresent(blockID.getContainerID(), (containerId, blocks) + -> blocks.removeAndGetSize(blockID.getLocalID()) == 0? null: blocks); + } + + /** + * Returns true if the block exists in the map, false otherwise. + * + * @param blockID + * @return True, if it exists, false otherwise + */ + public boolean checkIfBlockExists(BlockID blockID) { + KeyDataMap keyDataMap = containers.get(blockID.getContainerID()); + return keyDataMap != null && keyDataMap.get(blockID.getLocalID()) != null; + } + + @VisibleForTesting + KeyDataMap getKeyDataMap(long containerId) { + return containers.get(containerId); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/RandomContainerDeletionChoosingPolicy.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/RandomContainerDeletionChoosingPolicy.java index 97fdb9ecfce..5c6c319600e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/RandomContainerDeletionChoosingPolicy.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/RandomContainerDeletionChoosingPolicy.java @@ -21,9 +21,9 @@ import org.apache.hadoop.hdds.scm.container.common.helpers .StorageContainerException; import org.apache.hadoop.hdfs.DFSUtil; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces .ContainerDeletionChoosingPolicy; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,7 +59,7 @@ LOG.debug("Select container {} for block deletion, " + "pending deletion blocks num: {}.", entry.getContainerID(), - entry.getNumPendingDeletionBlocks()); + ((KeyValueContainerData)entry).getNumPendingDeletionBlocks()); } else { break; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/TopNOrderedContainerDeletionChoosingPolicy.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/TopNOrderedContainerDeletionChoosingPolicy.java index 9a109e8c99c..b17680c4198 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/TopNOrderedContainerDeletionChoosingPolicy.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/TopNOrderedContainerDeletionChoosingPolicy.java @@ -20,9 +20,9 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.scm.container.common.helpers .StorageContainerException; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces .ContainerDeletionChoosingPolicy; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,14 +42,11 @@ LoggerFactory.getLogger(TopNOrderedContainerDeletionChoosingPolicy.class); /** customized comparator used to compare differentiate container data. **/ - private static final Comparator CONTAINER_DATA_COMPARATOR - = new Comparator() { - @Override - public int compare(ContainerData c1, ContainerData c2) { - return Integer.compare(c2.getNumPendingDeletionBlocks(), - c1.getNumPendingDeletionBlocks()); - } - }; + private static final Comparator + KEY_VALUE_CONTAINER_DATA_COMPARATOR = (KeyValueContainerData c1, + KeyValueContainerData c2) -> + Integer.compare(c2.getNumPendingDeletionBlocks(), + c1.getNumPendingDeletionBlocks()); @Override public List chooseContainerForBlockDeletion(int count, @@ -59,13 +56,15 @@ public int compare(ContainerData c1, ContainerData c2) { "Internal assertion: candidate containers cannot be null"); List result = new LinkedList<>(); - List orderedList = new LinkedList<>(); - orderedList.addAll(candidateContainers.values()); - Collections.sort(orderedList, CONTAINER_DATA_COMPARATOR); + List orderedList = new LinkedList<>(); + for (ContainerData entry : candidateContainers.values()) { + orderedList.add((KeyValueContainerData)entry); + } + Collections.sort(orderedList, KEY_VALUE_CONTAINER_DATA_COMPARATOR); // get top N list ordered by pending deletion blocks' number int currentCount = 0; - for (ContainerData entry : orderedList) { + for (KeyValueContainerData entry : orderedList) { if (currentCount < count) { if (entry.getNumPendingDeletionBlocks() > 0) { result.add(entry); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/BlockIterator.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/BlockIterator.java new file mode 100644 index 00000000000..f6931e37a4c --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/BlockIterator.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.interfaces; + + +import java.io.IOException; +import java.util.NoSuchElementException; + +/** + * Block Iterator for container. Each container type need to implement this + * interface. + * @param + */ +public interface BlockIterator { + + /** + * This checks if iterator has next element. If it has returns true, + * otherwise false. + * @return boolean + */ + boolean hasNext() throws IOException; + + /** + * Seek to first entry. + */ + void seekToFirst(); + + /** + * Seek to last entry. + */ + void seekToLast(); + + /** + * Get next block in the container. + * @return next block or null if there are no blocks + * @throws IOException + */ + T nextBlock() throws IOException, NoSuchElementException; + + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java new file mode 100644 index 00000000000..9380f0cb36f --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Container.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.interfaces; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Map; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerLifeCycleState; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; + +import org.apache.hadoop.hdfs.util.RwLock; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; + +/** + * Interface for Container Operations. + */ +public interface Container extends RwLock { + + /** + * Creates a container. + * + * @throws StorageContainerException + */ + void create(VolumeSet volumeSet, VolumeChoosingPolicy volumeChoosingPolicy, + String scmId) throws StorageContainerException; + + /** + * Deletes the container. + * + * @param forceDelete - whether this container should be deleted forcibly. + * @throws StorageContainerException + */ + void delete(boolean forceDelete) throws StorageContainerException; + + /** + * Update the container. + * + * @param metaData + * @param forceUpdate if true, update container forcibly. + * @throws StorageContainerException + */ + void update(Map metaData, boolean forceUpdate) + throws StorageContainerException; + + /** + * Get metadata about the container. + * + * @return ContainerData - Container Data. + * @throws StorageContainerException + */ + CONTAINERDATA getContainerData(); + + /** + * Get the Container Lifecycle state. + * + * @return ContainerLifeCycleState - Container State. + * @throws StorageContainerException + */ + ContainerLifeCycleState getContainerState(); + + /** + * Closes a open container, if it is already closed or does not exist a + * StorageContainerException is thrown. + * + * @throws StorageContainerException + */ + void close() throws StorageContainerException; + + /** + * Return the ContainerType for the container. + */ + ContainerProtos.ContainerType getContainerType(); + + /** + * Returns containerFile. + */ + File getContainerFile(); + + /** + * updates the DeleteTransactionId. + * @param deleteTransactionId + */ + void updateDeleteTransactionId(long deleteTransactionId); + + /** + * Returns blockIterator for the container. + * @return BlockIterator + * @throws IOException + */ + BlockIterator blockIterator() throws IOException; + + /** + * Import the container from an external archive. + */ + void importContainerData(InputStream stream, + ContainerPacker packer) throws IOException; + + /** + * Export all the data of the container to one output archive with the help + * of the packer. + * + */ + void exportContainerData(OutputStream stream, + ContainerPacker packer) throws IOException; + + /** + * Returns containerReport for the container. + */ + StorageContainerDatanodeProtocolProtos.ContainerInfo getContainerReport() + throws StorageContainerException; +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDeletionChoosingPolicy.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDeletionChoosingPolicy.java index 1ed50fb03b5..84c4f903f37 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDeletionChoosingPolicy.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDeletionChoosingPolicy.java @@ -17,9 +17,10 @@ */ package org.apache.hadoop.ozone.container.common.interfaces; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.container.common.helpers .StorageContainerException; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; import java.util.List; import java.util.Map; @@ -42,4 +43,16 @@ List chooseContainerForBlockDeletion(int count, Map candidateContainers) throws StorageContainerException; + + /** + * Determine if the container has suitable type for this policy. + * @param type type of the container + * @return whether the container type suitable for this policy. + */ + default boolean isValidContainerType(ContainerProtos.ContainerType type) { + if (type == ContainerProtos.ContainerType.KeyValueContainer) { + return true; + } + return false; + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java index 7e12614c6e3..18644bb9c03 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerDispatcher.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.container.common.interfaces; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos @@ -48,4 +49,17 @@ * Shutdown Dispatcher services. */ void shutdown(); + + /** + * Returns the handler for the specified containerType. + * @param containerType + * @return + */ + Handler getHandler(ContainerProtos.ContainerType containerType); + + /** + * If scmId is not set, this will set scmId, otherwise it is a no-op. + * @param scmId + */ + void setScmId(String scmId); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerManager.java deleted file mode 100644 index cf68b08a23f..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerManager.java +++ /dev/null @@ -1,269 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.interfaces; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.scm.container.common.helpers - .StorageContainerException; -import org.apache.hadoop.hdfs.server.datanode.StorageLocation; -import org.apache.hadoop.hdfs.util.RwLock; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.NodeReportProto; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; - -import java.io.IOException; -import java.security.NoSuchAlgorithmException; -import java.util.List; - -/** - * Interface for container operations. - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public interface ContainerManager extends RwLock { - - /** - * Init call that sets up a container Manager. - * - * @param config - Configuration. - * @param containerDirs - List of Metadata Container locations. - * @param datanodeDetails - DatanodeDetails - * @throws StorageContainerException - */ - void init(Configuration config, List containerDirs, - DatanodeDetails datanodeDetails) throws IOException; - - /** - * Creates a container with the given name. - * - * @param containerData - Container Name and metadata. - * @throws StorageContainerException - */ - void createContainer(ContainerData containerData) - throws StorageContainerException; - - /** - * Deletes an existing container. - * - * @param containerID - ID of the container. - * @param forceDelete - whether this container should be deleted forcibly. - * @throws StorageContainerException - */ - void deleteContainer(long containerID, - boolean forceDelete) throws StorageContainerException; - - /** - * Update an existing container. - * - * @param containerID ID of the container - * @param data container data - * @param forceUpdate if true, update container forcibly. - * @throws StorageContainerException - */ - void updateContainer(long containerID, ContainerData data, - boolean forceUpdate) throws StorageContainerException; - - /** - * As simple interface for container Iterations. - * - * @param startContainerID - Return containers with ID >= startContainerID. - * @param count - how many to return - * @param data - Actual containerData - * @throws StorageContainerException - */ - void listContainer(long startContainerID, long count, - List data) throws StorageContainerException; - - /** - * Choose containers for block deletion. - * - * @param count - how many to return - * @throws StorageContainerException - */ - List chooseContainerForBlockDeletion(int count) - throws StorageContainerException; - - /** - * Get metadata about a specific container. - * - * @param containerID - ID of the container. - * @return ContainerData - Container Data. - * @throws StorageContainerException - */ - ContainerData readContainer(long containerID) - throws StorageContainerException; - - /** - * Closes a open container, if it is already closed or does not exist a - * StorageContainerException is thrown. - * @param containerID - ID of the container. - * @throws StorageContainerException - */ - void closeContainer(long containerID) - throws StorageContainerException, NoSuchAlgorithmException; - - /** - * Checks if a container exists. - * @param containerID - ID of the container. - * @return true if the container is open false otherwise. - * @throws StorageContainerException - Throws Exception if we are not - * able to find the container. - */ - boolean isOpen(long containerID) throws StorageContainerException; - - /** - * Supports clean shutdown of container. - * - * @throws StorageContainerException - */ - void shutdown() throws IOException; - - /** - * Sets the Chunk Manager. - * - * @param chunkManager - ChunkManager. - */ - void setChunkManager(ChunkManager chunkManager); - - /** - * Gets the Chunk Manager. - * - * @return ChunkManager. - */ - ChunkManager getChunkManager(); - - /** - * Sets the Key Manager. - * - * @param keyManager - Key Manager. - */ - void setKeyManager(KeyManager keyManager); - - /** - * Gets the Key Manager. - * - * @return KeyManager. - */ - KeyManager getKeyManager(); - - /** - * Get the Node Report of container storage usage. - * @return node report. - */ - NodeReportProto getNodeReport() throws IOException; - - /** - * Gets container report. - * @return container report. - * @throws IOException - */ - ContainerReportsProto getContainerReport() throws IOException; - - /** - * Gets container reports. - * @return List of all closed containers. - * @throws IOException - */ - List getClosedContainerReports() throws IOException; - - /** - * Increase pending deletion blocks count number of specified container. - * - * @param numBlocks - * increment count number - * @param containerId - * container id - */ - void incrPendingDeletionBlocks(int numBlocks, long containerId); - - /** - * Decrease pending deletion blocks count number of specified container. - * - * @param numBlocks - * decrement count number - * @param containerId - * container id - */ - void decrPendingDeletionBlocks(int numBlocks, long containerId); - - /** - * Increase the read count of the container. - * @param containerId - ID of the container. - */ - void incrReadCount(long containerId); - - /** - * Increse the read counter for bytes read from the container. - * @param containerId - ID of the container. - * @param readBytes - bytes read from the container. - */ - void incrReadBytes(long containerId, long readBytes); - - - /** - * Increase the write count of the container. - * @param containerId - ID of the container. - */ - void incrWriteCount(long containerId); - - /** - * Increase the write counter for bytes write into the container. - * @param containerId - ID of the container. - * @param writeBytes - bytes write into the container. - */ - void incrWriteBytes(long containerId, long writeBytes); - - /** - * Increase the bytes used by the container. - * @param containerId - ID of the container. - * @param used - additional bytes used by the container. - * @return the current bytes used. - */ - long incrBytesUsed(long containerId, long used); - - /** - * Decrease the bytes used by the container. - * @param containerId - ID of the container. - * @param used - additional bytes reclaimed by the container. - * @return the current bytes used. - */ - long decrBytesUsed(long containerId, long used); - - /** - * Get the bytes used by the container. - * @param containerId - ID of the container. - * @return the current bytes used by the container. - */ - long getBytesUsed(long containerId); - - /** - * Get the number of keys in the container. - * @param containerId - ID of the container. - * @return the current key count. - */ - long getNumKeys(long containerId); - - void updateDeleteTransactionId(long containerId, long deleteTransactionId); - -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerPacker.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerPacker.java new file mode 100644 index 00000000000..8308c23866b --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ContainerPacker.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.interfaces; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.hadoop.ozone.container.common.impl.ContainerData; + +/** + * Service to pack/unpack ContainerData container data to/from a single byte + * stream. + */ +public interface ContainerPacker { + + /** + * Extract the container data to the path defined by the container. + *

+ * This doesn't contain the extraction of the container descriptor file. + * + * @return the byte content of the descriptor (which won't be written to a + * file but returned). + */ + byte[] unpackContainerData(Container container, + InputStream inputStream) + throws IOException; + + /** + * Compress all the container data (chunk data, metadata db AND container + * descriptor) to one single archive. + */ + void pack(Container container, OutputStream destination) + throws IOException; + + /** + * Read the descriptor from the finished archive to get the data before + * importing the container. + */ + byte[] unpackContainerDescriptor(InputStream inputStream) + throws IOException; +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java new file mode 100644 index 00000000000..53e1c68a4ec --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.interfaces; + + +import java.io.FileInputStream; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerType; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; +import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker; + +/** + * Dispatcher sends ContainerCommandRequests to Handler. Each Container Type + * should have an implementation for Handler. + */ +public abstract class Handler { + + protected final Configuration conf; + protected final ContainerSet containerSet; + protected final VolumeSet volumeSet; + protected String scmID; + protected final ContainerMetrics metrics; + + protected Handler(Configuration config, ContainerSet contSet, + VolumeSet volumeSet, ContainerMetrics containerMetrics) { + conf = config; + containerSet = contSet; + this.volumeSet = volumeSet; + this.metrics = containerMetrics; + } + + public static Handler getHandlerForContainerType(ContainerType containerType, + Configuration config, ContainerSet contSet, VolumeSet volumeSet, + ContainerMetrics metrics) { + switch (containerType) { + case KeyValueContainer: + return new KeyValueHandler(config, contSet, volumeSet, metrics); + default: + throw new IllegalArgumentException("Handler for ContainerType: " + + containerType + "doesn't exist."); + } + } + + public abstract ContainerCommandResponseProto handle( + ContainerCommandRequestProto msg, Container container); + + /** + * Import container data from a raw input stream. + */ + public abstract Container importContainer( + long containerID, + long maxSize, + FileInputStream rawContainerStream, + TarContainerPacker packer) + throws IOException; + + public void setScmID(String scmId) { + this.scmID = scmId; + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/KeyManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/KeyManager.java deleted file mode 100644 index 158ce38efc7..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/KeyManager.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.ozone.container.common.interfaces; - -import org.apache.hadoop.hdds.scm.container.common.helpers - .StorageContainerException; -import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.ozone.container.common.helpers.KeyData; - -import java.io.IOException; -import java.util.List; - -/** - * KeyManager deals with Key Operations in the container Level. - */ -public interface KeyManager { - /** - * Puts or overwrites a key. - * - * @param data - Key Data. - * @throws IOException - */ - void putKey(KeyData data) throws IOException; - - /** - * Gets an existing key. - * - * @param data - Key Data. - * @return Key Data. - * @throws IOException - */ - KeyData getKey(KeyData data) throws IOException; - - /** - * Deletes an existing Key. - * - * @param blockID - ID of the block. - * @throws StorageContainerException - */ - void deleteKey(BlockID blockID) - throws IOException; - - /** - * List keys in a container. - * - * @param containerID - ID of the container. - * @param startLocalID - Key to start from, 0 to begin. - * @param count - Number of keys to return. - * @return List of Keys that match the criteria. - */ - List listKey(long containerID, long startLocalID, - int count) throws IOException; - - /** - * Shutdown keyManager. - */ - void shutdown(); -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/VolumeChoosingPolicy.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/VolumeChoosingPolicy.java new file mode 100644 index 00000000000..7de0e2a967d --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/VolumeChoosingPolicy.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.common.interfaces; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; + +import java.io.IOException; +import java.util.List; + +/** + * This interface specifies the policy for choosing volumes to store replicas. + */ +@InterfaceAudience.Private +public interface VolumeChoosingPolicy { + + /** + * Choose a volume to place a container, + * given a list of volumes and the max container size sought for storage. + * + * The implementations of this interface must be thread-safe. + * + * @param volumes - a list of available volumes. + * @param maxContainerSize - the maximum size of the container for which a + * volume is sought. + * @return the chosen volume. + * @throws IOException when disks are unavailable or are full. + */ + HddsVolume chooseVolume(List volumes, long maxContainerSize) + throws IOException; +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/CommandStatusReportPublisher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/CommandStatusReportPublisher.java new file mode 100644 index 00000000000..4cf6321e4ce --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/CommandStatusReportPublisher.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.container.common.report; + +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatus.Status; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto; +import org.apache.hadoop.hdds.scm.HddsServerUtil; +import org.apache.hadoop.ozone.protocol.commands.CommandStatus; + +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_COMMAND_STATUS_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_COMMAND_STATUS_REPORT_INTERVAL_DEFAULT; + +/** + * Publishes CommandStatusReport which will be sent to SCM as part of + * heartbeat. CommandStatusReport consist of the following information: + * - type : type of command. + * - status : status of command execution (PENDING, EXECUTED, FAILURE). + * - cmdId : Command id. + * - msg : optional message. + */ +public class CommandStatusReportPublisher extends + ReportPublisher { + + private long cmdStatusReportInterval = -1; + + @Override + protected long getReportFrequency() { + if (cmdStatusReportInterval == -1) { + cmdStatusReportInterval = getConf().getTimeDuration( + HDDS_COMMAND_STATUS_REPORT_INTERVAL, + HDDS_COMMAND_STATUS_REPORT_INTERVAL_DEFAULT, + TimeUnit.MILLISECONDS); + + long heartbeatFrequency = HddsServerUtil.getScmHeartbeatInterval( + getConf()); + + Preconditions.checkState( + heartbeatFrequency <= cmdStatusReportInterval, + HDDS_COMMAND_STATUS_REPORT_INTERVAL + + " cannot be configured lower than heartbeat frequency."); + } + return cmdStatusReportInterval; + } + + @Override + protected CommandStatusReportsProto getReport() { + Map map = this.getContext() + .getCommandStatusMap(); + Iterator iterator = map.keySet().iterator(); + CommandStatusReportsProto.Builder builder = CommandStatusReportsProto + .newBuilder(); + + iterator.forEachRemaining(key -> { + CommandStatus cmdStatus = map.get(key); + builder.addCmdStatus(cmdStatus.getProtoBufMessage()); + // If status is still pending then don't remove it from map as + // CommandHandler will change its status when it works on this command. + if (!cmdStatus.getStatus().equals(Status.PENDING)) { + map.remove(key); + } + }); + return builder.build(); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ContainerReportPublisher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ContainerReportPublisher.java index ea2b987036a..ccb9a9aadac 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ContainerReportPublisher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ContainerReportPublisher.java @@ -17,13 +17,20 @@ package org.apache.hadoop.ozone.container.common.report; +import com.google.common.base.Preconditions; import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; -import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.hdds.scm.HddsServerUtil; +import java.io.IOException; import java.util.concurrent.TimeUnit; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_CONTAINER_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_CONTAINER_REPORT_INTERVAL_DEFAULT; + /** * Publishes ContainerReport which will be sent to SCM as part of heartbeat. @@ -49,9 +56,17 @@ protected long getReportFrequency() { if (containerReportInterval == null) { containerReportInterval = getConf().getTimeDuration( - OzoneConfigKeys.OZONE_CONTAINER_REPORT_INTERVAL, - OzoneConfigKeys.OZONE_CONTAINER_REPORT_INTERVAL_DEFAULT, + HDDS_CONTAINER_REPORT_INTERVAL, + HDDS_CONTAINER_REPORT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + + long heartbeatFrequency = HddsServerUtil.getScmHeartbeatInterval( + getConf()); + + Preconditions.checkState( + heartbeatFrequency <= containerReportInterval, + HDDS_CONTAINER_REPORT_INTERVAL + + " cannot be configured lower than heartbeat frequency."); } // Add a random delay (0~30s) on top of the container report // interval (60s) so tha the SCM is overwhelmed by the container reports @@ -64,7 +79,7 @@ private long getRandomReportDelay() { } @Override - protected ContainerReportsProto getReport() { - return ContainerReportsProto.getDefaultInstance(); + protected ContainerReportsProto getReport() throws IOException { + return getContext().getParent().getContainer().getContainerReport(); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/NodeReportPublisher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/NodeReportPublisher.java index 704b1f5b19d..6ac99dd4d32 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/NodeReportPublisher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/NodeReportPublisher.java @@ -17,8 +17,18 @@ package org.apache.hadoop.ozone.container.common.report; +import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.NodeReportProto; +import org.apache.hadoop.hdds.scm.HddsServerUtil; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_NODE_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_NODE_REPORT_INTERVAL_DEFAULT; /** * Publishes NodeReport which will be sent to SCM as part of heartbeat. @@ -28,13 +38,29 @@ */ public class NodeReportPublisher extends ReportPublisher { + private Long nodeReportInterval; + @Override protected long getReportFrequency() { - return 90000L; + if (nodeReportInterval == null) { + nodeReportInterval = getConf().getTimeDuration( + HDDS_NODE_REPORT_INTERVAL, + HDDS_NODE_REPORT_INTERVAL_DEFAULT, + TimeUnit.MILLISECONDS); + + long heartbeatFrequency = HddsServerUtil.getScmHeartbeatInterval( + getConf()); + + Preconditions.checkState( + heartbeatFrequency <= nodeReportInterval, + HDDS_NODE_REPORT_INTERVAL + + " cannot be configured lower than heartbeat frequency."); + } + return nodeReportInterval; } @Override - protected NodeReportProto getReport() { - return NodeReportProto.getDefaultInstance(); + protected NodeReportProto getReport() throws IOException { + return getContext().getParent().getContainer().getNodeReport(); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java index 4ff47a05232..e3910dbda1a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java @@ -23,7 +23,10 @@ import org.apache.hadoop.ozone.container.common.statemachine .DatanodeStateMachine.DatanodeStates; import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.IOException; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -34,6 +37,9 @@ public abstract class ReportPublisher implements Configurable, Runnable { + private static final Logger LOG = LoggerFactory.getLogger( + ReportPublisher.class); + private Configuration config; private StateContext context; private ScheduledExecutorService executor; @@ -76,7 +82,11 @@ public void run() { * Generates and publishes the report to datanode state context. */ private void publishReport() { - context.addReport(getReport()); + try { + context.addReport(getReport()); + } catch (IOException e) { + LOG.error("Exception while publishing report.", e); + } } /** @@ -91,6 +101,15 @@ private void publishReport() { * * @return datanode report */ - protected abstract T getReport(); + protected abstract T getReport() throws IOException; + + /** + * Returns {@link StateContext}. + * + * @return stateContext report + */ + protected StateContext getContext() { + return context; + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisherFactory.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisherFactory.java index dc246d9428c..ea892807291 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisherFactory.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisherFactory.java @@ -19,6 +19,8 @@ import com.google.protobuf.GeneratedMessage; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.protocol.proto @@ -49,6 +51,8 @@ public ReportPublisherFactory(Configuration conf) { report2publisher.put(NodeReportProto.class, NodeReportPublisher.class); report2publisher.put(ContainerReportsProto.class, ContainerReportPublisher.class); + report2publisher.put(CommandStatusReportsProto.class, + CommandStatusReportPublisher.class); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java index b073d7b81d8..875d0638d5c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java @@ -16,11 +16,17 @@ */ package org.apache.hadoop.ozone.container.common.statemachine; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.protocol.proto @@ -38,17 +44,12 @@ import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.concurrent.HadoopExecutors; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Closeable; -import java.io.IOException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; - -import static org.apache.hadoop.hdds.scm.HddsServerUtil.getScmHeartbeatInterval; - /** * State Machine Class. */ @@ -59,7 +60,6 @@ private final ExecutorService executorService; private final Configuration conf; private final SCMConnectionManager connectionManager; - private final long heartbeatFrequency; private StateContext context; private final OzoneContainer container; private DatanodeDetails datanodeDetails; @@ -85,19 +85,18 @@ public DatanodeStateMachine(DatanodeDetails datanodeDetails, .setNameFormat("Datanode State Machine Thread - %d").build()); connectionManager = new SCMConnectionManager(conf); context = new StateContext(this.conf, DatanodeStates.getInitState(), this); - heartbeatFrequency = TimeUnit.SECONDS.toMillis( - getScmHeartbeatInterval(conf)); container = new OzoneContainer(this.datanodeDetails, - new OzoneConfiguration(conf)); + new OzoneConfiguration(conf), context); nextHB = new AtomicLong(Time.monotonicNow()); - // When we add new handlers just adding a new handler here should do the + // When we add new handlers just adding a new handler here should do the // trick. commandDispatcher = CommandDispatcher.newBuilder() .addHandler(new CloseContainerCommandHandler()) - .addHandler(new DeleteBlocksCommandHandler( - container.getContainerManager(), conf)) - .addHandler(new ReplicateContainerCommandHandler()) + .addHandler(new DeleteBlocksCommandHandler(container.getContainerSet(), + conf)) + .addHandler(new ReplicateContainerCommandHandler(conf, + container.getContainerSet(), container.getDispatcher())) .setConnectionManager(connectionManager) .setContainer(container) .setContext(context) @@ -107,6 +106,7 @@ public DatanodeStateMachine(DatanodeDetails datanodeDetails, .setStateContext(context) .addPublisherFor(NodeReportProto.class) .addPublisherFor(ContainerReportsProto.class) + .addPublisherFor(CommandStatusReportsProto.class) .build(); } @@ -146,6 +146,7 @@ private void start() throws IOException { while (context.getState() != DatanodeStates.SHUTDOWN) { try { LOG.debug("Executing cycle Number : {}", context.getExecutionCount()); + long heartbeatFrequency = context.getHeartbeatFrequency(); nextHB.set(Time.monotonicNow() + heartbeatFrequency); context.execute(executorService, heartbeatFrequency, TimeUnit.MILLISECONDS); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java index 7e85923d315..5f78a333d98 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.time.ZonedDateTime; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -203,11 +204,11 @@ public void logIfNeeded(Exception ex) { this.incMissed(); if (this.getMissedCount() % getLogWarnInterval(conf) == 0) { - LOG.warn("Unable to communicate to SCM server at {}. We have not been " + - "able to communicate to this SCM server for past {} seconds.", + LOG.error( + "Unable to communicate to SCM server at {} for past {} seconds.", this.getAddress().getHostString() + ":" + this.getAddress().getPort(), - this.getMissedCount() * getScmHeartbeatInterval( - this.conf)); + TimeUnit.MILLISECONDS.toSeconds( + this.getMissedCount() * getScmHeartbeatInterval(this.conf)), ex); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java index 19722f04a52..85fb5805241 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java @@ -67,7 +67,7 @@ public SCMConnectionManager(Configuration conf) { this.rpcTimeout = timeOut.intValue(); this.scmMachines = new HashMap<>(); this.conf = conf; - jmxBean = MBeans.register("OzoneDataNode", + jmxBean = MBeans.register("HddsDatanode", "SCMConnectionManager", this); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java index 98eb7a05f64..9d5a7781b03 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java @@ -17,17 +17,29 @@ package org.apache.hadoop.ozone.container.common.statemachine; import com.google.protobuf.GeneratedMessage; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.PipelineAction; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerAction; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatus.Status; import org.apache.hadoop.ozone.container.common.states.DatanodeState; import org.apache.hadoop.ozone.container.common.states.datanode .InitDatanodeState; import org.apache.hadoop.ozone.container.common.states.datanode .RunningDatanodeState; +import org.apache.hadoop.ozone.protocol.commands.CommandStatus; +import org.apache.hadoop.ozone.protocol.commands.CommandStatus + .CommandStatusBuilder; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; + +import static org.apache.hadoop.hdds.scm.HddsServerUtil.getScmHeartbeatInterval; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.Queue; @@ -38,6 +50,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Collectors; import static org.apache.hadoop.ozone.OzoneConsts.INVALID_PORT; @@ -48,13 +61,23 @@ static final Logger LOG = LoggerFactory.getLogger(StateContext.class); private final Queue commandQueue; + private final Map cmdStatusMap; private final Lock lock; private final DatanodeStateMachine parent; private final AtomicLong stateExecutionCount; private final Configuration conf; private final Queue reports; + private final Queue containerActions; + private final Queue pipelineActions; private DatanodeStateMachine.DatanodeStates state; + /** + * Starting with a 2 sec heartbeat frequency which will be updated to the + * real HB frequency after scm registration. With this method the + * initial registration could be significant faster. + */ + private AtomicLong heartbeatFrequency = new AtomicLong(2000); + /** * Constructs a StateContext. * @@ -68,7 +91,10 @@ public StateContext(Configuration conf, DatanodeStateMachine.DatanodeStates this.state = state; this.parent = parent; commandQueue = new LinkedList<>(); + cmdStatusMap = new ConcurrentHashMap<>(); reports = new LinkedList<>(); + containerActions = new LinkedList<>(); + pipelineActions = new LinkedList<>(); lock = new ReentrantLock(); stateExecutionCount = new AtomicLong(0); } @@ -180,15 +206,99 @@ public GeneratedMessage getNextReport() { * @return List */ public List getReports(int maxLimit) { - List results = new ArrayList<>(); synchronized (reports) { - GeneratedMessage report = reports.poll(); - while(results.size() < maxLimit && report != null) { - results.add(report); - report = reports.poll(); + return reports.parallelStream().limit(maxLimit) + .collect(Collectors.toList()); + } + } + + + /** + * Adds the ContainerAction to ContainerAction queue. + * + * @param containerAction ContainerAction to be added + */ + public void addContainerAction(ContainerAction containerAction) { + synchronized (containerActions) { + containerActions.add(containerAction); + } + } + + /** + * Add ContainerAction to ContainerAction queue if it's not present. + * + * @param containerAction ContainerAction to be added + */ + public void addContainerActionIfAbsent(ContainerAction containerAction) { + synchronized (containerActions) { + if (!containerActions.contains(containerAction)) { + containerActions.add(containerAction); + } + } + } + + /** + * Returns all the pending ContainerActions from the ContainerAction queue, + * or empty list if the queue is empty. + * + * @return List + */ + public List getAllPendingContainerActions() { + return getPendingContainerAction(Integer.MAX_VALUE); + } + + /** + * Returns pending ContainerActions from the ContainerAction queue with a + * max limit on list size, or empty list if the queue is empty. + * + * @return List + */ + public List getPendingContainerAction(int maxLimit) { + synchronized (containerActions) { + return containerActions.parallelStream().limit(maxLimit) + .collect(Collectors.toList()); + } + } + + /** + * Add PipelineAction to PipelineAction queue if it's not present. + * + * @param pipelineAction PipelineAction to be added + */ + public void addPipelineActionIfAbsent(PipelineAction pipelineAction) { + synchronized (pipelineActions) { + /** + * If pipelineAction queue already contains entry for the pipeline id + * with same action, we should just return. + * Note: We should not use pipelineActions.contains(pipelineAction) here + * as, pipelineAction has a msg string. So even if two msgs differ though + * action remains same on the given pipeline, it will end up adding it + * multiple times here. + */ + for (PipelineAction pipelineActionIter : pipelineActions) { + if (pipelineActionIter.getAction() == pipelineAction.getAction() + && pipelineActionIter.hasClosePipeline() && pipelineAction + .hasClosePipeline() + && pipelineActionIter.getClosePipeline().getPipelineID() + .equals(pipelineAction.getClosePipeline().getPipelineID())) { + return; + } } + pipelineActions.add(pipelineAction); + } + } + + /** + * Returns pending PipelineActions from the PipelineAction queue with a + * max limit on list size, or empty list if the queue is empty. + * + * @return List + */ + public List getPendingPipelineAction(int maxLimit) { + synchronized (pipelineActions) { + return pipelineActions.parallelStream().limit(maxLimit) + .collect(Collectors.toList()); } - return results; } /** @@ -269,6 +379,7 @@ public void addCommand(SCMCommand command) { } finally { lock.unlock(); } + this.addCmdStatus(command); } /** @@ -279,4 +390,77 @@ public long getExecutionCount() { return stateExecutionCount.get(); } + /** + * Returns the next {@link CommandStatus} or null if it is empty. + * + * @return {@link CommandStatus} or Null. + */ + public CommandStatus getCmdStatus(Long key) { + return cmdStatusMap.get(key); + } + + /** + * Adds a {@link CommandStatus} to the State Machine. + * + * @param status - {@link CommandStatus}. + */ + public void addCmdStatus(Long key, CommandStatus status) { + cmdStatusMap.put(key, status); + } + + /** + * Adds a {@link CommandStatus} to the State Machine for given SCMCommand. + * + * @param cmd - {@link SCMCommand}. + */ + public void addCmdStatus(SCMCommand cmd) { + this.addCmdStatus(cmd.getId(), + CommandStatusBuilder.newBuilder() + .setCmdId(cmd.getId()) + .setStatus(Status.PENDING) + .setType(cmd.getType()) + .build()); + } + + /** + * Get map holding all {@link CommandStatus} objects. + * + */ + public Map getCommandStatusMap() { + return cmdStatusMap; + } + + /** + * Remove object from cache in StateContext#cmdStatusMap. + * + */ + public void removeCommandStatus(Long cmdId) { + cmdStatusMap.remove(cmdId); + } + + /** + * Updates status of a pending status command. + * @param cmdId command id + * @param cmdExecuted SCMCommand + * @return true if command status updated successfully else false. + */ + public boolean updateCommandStatus(Long cmdId, boolean cmdExecuted) { + if(cmdStatusMap.containsKey(cmdId)) { + cmdStatusMap.get(cmdId) + .setStatus(cmdExecuted ? Status.EXECUTED : Status.FAILED); + return true; + } + return false; + } + + public void configureHeartbeatFrequency(){ + heartbeatFrequency.set(getScmHeartbeatInterval(conf)); + } + + /** + * Return current heartbeat frequency in ms. + */ + public long getHeartbeatFrequency() { + return heartbeatFrequency.get(); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java index 45f2bbd145f..030a35788fb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java @@ -41,6 +41,7 @@ LoggerFactory.getLogger(CloseContainerCommandHandler.class); private int invocationCount; private long totalTime; + private boolean cmdExecuted; /** * Constructs a ContainerReport handler. @@ -61,6 +62,7 @@ public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) { LOG.debug("Processing Close Container command."); invocationCount++; + cmdExecuted = false; long startTime = Time.monotonicNow(); // TODO: define this as INVALID_CONTAINER_ID in HddsConsts.java (TBA) long containerID = -1; @@ -71,27 +73,27 @@ public void handle(SCMCommand command, OzoneContainer container, CloseContainerCommandProto .parseFrom(command.getProtoBufMessage()); containerID = closeContainerProto.getContainerID(); + HddsProtos.PipelineID pipelineID = closeContainerProto.getPipelineID(); HddsProtos.ReplicationType replicationType = closeContainerProto.getReplicationType(); - ContainerProtos.CloseContainerRequestProto.Builder closeRequest = - ContainerProtos.CloseContainerRequestProto.newBuilder(); - closeRequest.setContainerID(containerID); - ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder(); request.setCmdType(ContainerProtos.Type.CloseContainer); - request.setCloseContainer(closeRequest); + request.setContainerID(containerID); + request.setCloseContainer( + ContainerProtos.CloseContainerRequestProto.getDefaultInstance()); request.setTraceID(UUID.randomUUID().toString()); request.setDatanodeUuid( context.getParent().getDatanodeDetails().getUuidString()); // submit the close container request for the XceiverServer to handle container.submitContainerRequest( - request.build(), replicationType); - + request.build(), replicationType, pipelineID); + cmdExecuted = true; } catch (Exception e) { LOG.error("Can't close container " + containerID, e); } finally { + updateCommandStatus(context, command, cmdExecuted, LOG); long endTime = Time.monotonicNow(); totalTime += endTime - startTime; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandHandler.java index 60e2dc479da..71c25b5a5bf 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CommandHandler.java @@ -24,6 +24,7 @@ import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.slf4j.Logger; /** * Generic interface for handlers. @@ -58,4 +59,14 @@ void handle(SCMCommand command, OzoneContainer container, */ long getAverageRunTime(); + /** + * Default implementation for updating command status. + */ + default void updateCommandStatus(StateContext context, SCMCommand command, + boolean cmdExecuted, Logger log) { + if (!context.updateCommandStatus(command.getId(), cmdExecuted)) { + log.debug("{} with Id:{} not found.", command.getType(), + command.getId()); + } + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java index d215da98656..b0d4cbc0c22 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java @@ -18,8 +18,11 @@ import com.google.common.primitives.Longs; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMCommandProto; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto; @@ -29,11 +32,12 @@ import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; import org.apache.hadoop.ozone.container.common.helpers .DeletedContainerBlocksSummary; -import org.apache.hadoop.ozone.container.common.helpers.KeyUtils; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerManager; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyUtils; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; import org.apache.hadoop.ozone.container.common.statemachine .EndpointStateMachine; import org.apache.hadoop.ozone.container.common.statemachine @@ -51,6 +55,9 @@ import java.io.IOException; import java.util.List; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.CONTAINER_NOT_FOUND; + /** * Handle block deletion commands. */ @@ -59,114 +66,140 @@ private static final Logger LOG = LoggerFactory.getLogger(DeleteBlocksCommandHandler.class); - private ContainerManager containerManager; - private Configuration conf; + private final ContainerSet containerSet; + private final Configuration conf; private int invocationCount; private long totalTime; + private boolean cmdExecuted; - public DeleteBlocksCommandHandler(ContainerManager containerManager, + public DeleteBlocksCommandHandler(ContainerSet cset, Configuration conf) { - this.containerManager = containerManager; + this.containerSet = cset; this.conf = conf; } @Override public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) { - if (command.getType() != SCMCommandProto.Type.deleteBlocksCommand) { - LOG.warn("Skipping handling command, expected command " - + "type {} but found {}", - SCMCommandProto.Type.deleteBlocksCommand, command.getType()); - return; - } - LOG.debug("Processing block deletion command."); - invocationCount++; + cmdExecuted = false; long startTime = Time.monotonicNow(); - - // move blocks to deleting state. - // this is a metadata update, the actual deletion happens in another - // recycling thread. - DeleteBlocksCommand cmd = (DeleteBlocksCommand) command; - List containerBlocks = cmd.blocksTobeDeleted(); - - - DeletedContainerBlocksSummary summary = - DeletedContainerBlocksSummary.getFrom(containerBlocks); - LOG.info("Start to delete container blocks, TXIDs={}, " - + "numOfContainers={}, numOfBlocks={}", - summary.getTxIDSummary(), - summary.getNumOfContainers(), - summary.getNumOfBlocks()); - - ContainerBlocksDeletionACKProto.Builder resultBuilder = - ContainerBlocksDeletionACKProto.newBuilder(); - containerBlocks.forEach(entry -> { - DeleteBlockTransactionResult.Builder txResultBuilder = - DeleteBlockTransactionResult.newBuilder(); - txResultBuilder.setTxID(entry.getTxID()); - try { - deleteContainerBlocks(entry, conf); - txResultBuilder.setSuccess(true); - } catch (IOException e) { - LOG.warn("Failed to delete blocks for container={}, TXID={}", - entry.getContainerID(), entry.getTxID(), e); - txResultBuilder.setSuccess(false); + try { + if (command.getType() != SCMCommandProto.Type.deleteBlocksCommand) { + LOG.warn("Skipping handling command, expected command " + + "type {} but found {}", + SCMCommandProto.Type.deleteBlocksCommand, command.getType()); + return; } - resultBuilder.addResults(txResultBuilder.build()); - }); - ContainerBlocksDeletionACKProto blockDeletionACK = resultBuilder.build(); - - // Send ACK back to SCM as long as meta updated - // TODO Or we should wait until the blocks are actually deleted? - if (!containerBlocks.isEmpty()) { - for (EndpointStateMachine endPoint : connectionManager.getValues()) { + LOG.debug("Processing block deletion command."); + invocationCount++; + + // move blocks to deleting state. + // this is a metadata update, the actual deletion happens in another + // recycling thread. + DeleteBlocksCommand cmd = (DeleteBlocksCommand) command; + List containerBlocks = cmd.blocksTobeDeleted(); + + DeletedContainerBlocksSummary summary = + DeletedContainerBlocksSummary.getFrom(containerBlocks); + LOG.info("Start to delete container blocks, TXIDs={}, " + + "numOfContainers={}, numOfBlocks={}", + summary.getTxIDSummary(), + summary.getNumOfContainers(), + summary.getNumOfBlocks()); + + ContainerBlocksDeletionACKProto.Builder resultBuilder = + ContainerBlocksDeletionACKProto.newBuilder(); + containerBlocks.forEach(entry -> { + DeleteBlockTransactionResult.Builder txResultBuilder = + DeleteBlockTransactionResult.newBuilder(); + txResultBuilder.setTxID(entry.getTxID()); + long containerId = entry.getContainerID(); try { - if (LOG.isDebugEnabled()) { - LOG.debug("Sending following block deletion ACK to SCM"); - for (DeleteBlockTransactionResult result : - blockDeletionACK.getResultsList()) { - LOG.debug(result.getTxID() + " : " + result.getSuccess()); - } + Container cont = containerSet.getContainer(containerId); + if (cont == null) { + throw new StorageContainerException("Unable to find the container " + + containerId, CONTAINER_NOT_FOUND); + } + ContainerProtos.ContainerType containerType = cont.getContainerType(); + switch (containerType) { + case KeyValueContainer: + KeyValueContainerData containerData = (KeyValueContainerData) + cont.getContainerData(); + deleteKeyValueContainerBlocks(containerData, entry); + txResultBuilder.setContainerID(containerId) + .setSuccess(true); + break; + default: + LOG.error( + "Delete Blocks Command Handler is not implemented for " + + "containerType {}", containerType); } - endPoint.getEndPoint() - .sendContainerBlocksDeletionACK(blockDeletionACK); } catch (IOException e) { - LOG.error("Unable to send block deletion ACK to SCM {}", - endPoint.getAddress().toString(), e); + LOG.warn("Failed to delete blocks for container={}, TXID={}", + entry.getContainerID(), entry.getTxID(), e); + txResultBuilder.setContainerID(containerId) + .setSuccess(false); + } + resultBuilder.addResults(txResultBuilder.build()) + .setDnId(context.getParent().getDatanodeDetails() + .getUuid().toString()); + }); + ContainerBlocksDeletionACKProto blockDeletionACK = resultBuilder.build(); + + // Send ACK back to SCM as long as meta updated + // TODO Or we should wait until the blocks are actually deleted? + if (!containerBlocks.isEmpty()) { + for (EndpointStateMachine endPoint : connectionManager.getValues()) { + try { + if (LOG.isDebugEnabled()) { + LOG.debug("Sending following block deletion ACK to SCM"); + for (DeleteBlockTransactionResult result : + blockDeletionACK.getResultsList()) { + LOG.debug(result.getTxID() + " : " + result.getSuccess()); + } + } + endPoint.getEndPoint() + .sendContainerBlocksDeletionACK(blockDeletionACK); + } catch (IOException e) { + LOG.error("Unable to send block deletion ACK to SCM {}", + endPoint.getAddress().toString(), e); + } } } + cmdExecuted = true; + } finally { + updateCommandStatus(context, command, cmdExecuted, LOG); + long endTime = Time.monotonicNow(); + totalTime += endTime - startTime; } - - long endTime = Time.monotonicNow(); - totalTime += endTime - startTime; } /** - * Move a bunch of blocks from a container to deleting state. - * This is a meta update, the actual deletes happen in async mode. + * Move a bunch of blocks from a container to deleting state. This is a meta + * update, the actual deletes happen in async mode. * + * @param containerData - KeyValueContainerData * @param delTX a block deletion transaction. - * @param config configuration. * @throws IOException if I/O error occurs. */ - private void deleteContainerBlocks(DeletedBlocksTransaction delTX, - Configuration config) throws IOException { + private void deleteKeyValueContainerBlocks( + KeyValueContainerData containerData, DeletedBlocksTransaction delTX) + throws IOException { long containerId = delTX.getContainerID(); - ContainerData containerInfo = containerManager.readContainer(containerId); if (LOG.isDebugEnabled()) { LOG.debug("Processing Container : {}, DB path : {}", containerId, - containerInfo.getDBPath()); + containerData.getMetadataPath()); } - if (delTX.getTxID() < containerInfo.getDeleteTransactionId()) { + if (delTX.getTxID() < containerData.getDeleteTransactionId()) { LOG.debug(String.format("Ignoring delete blocks for containerId: %d." + " Outdated delete transactionId %d < %d", containerId, - delTX.getTxID(), containerInfo.getDeleteTransactionId())); + delTX.getTxID(), containerData.getDeleteTransactionId())); return; } int newDeletionBlocks = 0; - MetadataStore containerDB = KeyUtils.getDB(containerInfo, config); + MetadataStore containerDB = KeyUtils.getDB(containerData, conf); for (Long blk : delTX.getLocalIDList()) { BatchOperation batch = new BatchOperation(); byte[] blkBytes = Longs.toByteArray(blk); @@ -201,17 +234,17 @@ private void deleteContainerBlocks(DeletedBlocksTransaction delTX, } } else { LOG.debug("Block {} not found or already under deletion in" - + " container {}, skip deleting it.", blk, containerId); + + " container {}, skip deleting it.", blk, containerId); } } containerDB.put(DFSUtil.string2Bytes( OzoneConsts.DELETE_TRANSACTION_KEY_PREFIX + delTX.getContainerID()), Longs.toByteArray(delTX.getTxID())); - containerManager - .updateDeleteTransactionId(delTX.getContainerID(), delTX.getTxID()); + containerData + .updateDeleteTransactionId(delTX.getTxID()); // update pending deletion blocks count in in-memory container status - containerManager.incrPendingDeletionBlocks(newDeletionBlocks, containerId); + containerData.incrPendingDeletionBlocks(newDeletionBlocks); } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java index b4e83b7d40c..cb677c272cc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/ReplicateContainerCommandHandler.java @@ -16,16 +16,37 @@ */ package org.apache.hadoop.ozone.container.common.statemachine.commandhandler; +import java.io.FileInputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMCommandProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; +import org.apache.hadoop.ozone.container.common.interfaces.Handler; import org.apache.hadoop.ozone.container.common.statemachine .SCMConnectionManager; import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; +import org.apache.hadoop.ozone.container.replication.ContainerDownloader; +import org.apache.hadoop.ozone.container.replication.SimpleContainerDownloader; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,18 +54,127 @@ * Command handler to copy containers from sources. */ public class ReplicateContainerCommandHandler implements CommandHandler { + static final Logger LOG = LoggerFactory.getLogger(ReplicateContainerCommandHandler.class); + private ContainerDispatcher containerDispatcher; + private int invocationCount; private long totalTime; + private ContainerDownloader downloader; + + private Configuration conf; + + private TarContainerPacker packer = new TarContainerPacker(); + + private ContainerSet containerSet; + + private Lock lock = new ReentrantLock(); + + public ReplicateContainerCommandHandler( + Configuration conf, + ContainerSet containerSet, + ContainerDispatcher containerDispatcher, + ContainerDownloader downloader) { + this.conf = conf; + this.containerSet = containerSet; + this.downloader = downloader; + this.containerDispatcher = containerDispatcher; + } + + public ReplicateContainerCommandHandler( + Configuration conf, + ContainerSet containerSet, + ContainerDispatcher containerDispatcher) { + this(conf, containerSet, containerDispatcher, + new SimpleContainerDownloader(conf)); + } + @Override public void handle(SCMCommand command, OzoneContainer container, StateContext context, SCMConnectionManager connectionManager) { - LOG.warn("Replicate command is not yet handled"); + ReplicateContainerCommand replicateCommand = + (ReplicateContainerCommand) command; + try { + List sourceDatanodes = + replicateCommand.getSourceDatanodes(); + long containerID = replicateCommand.getContainerID(); + + Preconditions.checkArgument(sourceDatanodes.size() > 0, + String.format("Replication command is received for container %d " + + "but the size of source datanodes was 0.", containerID)); + + LOG.info("Starting replication of container {} from {}", containerID, + sourceDatanodes); + CompletableFuture tempTarFile = downloader + .getContainerDataFromReplicas(containerID, + sourceDatanodes); + + CompletableFuture result = + tempTarFile.thenAccept(path -> { + LOG.info("Container {} is downloaded, starting to import.", + containerID); + importContainer(containerID, path); + }); + + result.whenComplete((aVoid, throwable) -> { + if (throwable != null) { + LOG.error("Container replication was unsuccessful .", throwable); + } else { + LOG.info("Container {} is replicated successfully", containerID); + } + }); + } finally { + updateCommandStatus(context, command, true, LOG); + + } + } + + protected void importContainer(long containerID, Path tarFilePath) { + lock.lock(); + try { + ContainerData originalContainerData; + try (FileInputStream tempContainerTarStream = new FileInputStream( + tarFilePath.toFile())) { + byte[] containerDescriptorYaml = + packer.unpackContainerDescriptor(tempContainerTarStream); + originalContainerData = ContainerDataYaml.readContainer( + containerDescriptorYaml); + } + + try (FileInputStream tempContainerTarStream = new FileInputStream( + tarFilePath.toFile())) { + + Handler handler = containerDispatcher.getHandler( + originalContainerData.getContainerType()); + + Container container = handler.importContainer(containerID, + originalContainerData.getMaxSize(), + tempContainerTarStream, + packer); + + containerSet.addContainer(container); + } + + } catch (Exception e) { + LOG.error( + "Can't import the downloaded container data id=" + containerID, + e); + try { + Files.delete(tarFilePath); + } catch (Exception ex) { + LOG.error( + "Container import is failed and the downloaded file can't be " + + "deleted: " + + tarFilePath.toAbsolutePath().toString()); + } + } finally { + lock.unlock(); + } } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java index f04d3925321..995f172d273 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java @@ -92,6 +92,16 @@ public InitDatanodeState(Configuration conf, LOG.error("Null or empty SCM address list found."); return DatanodeStateMachine.DatanodeStates.SHUTDOWN; } else { + for (InetSocketAddress addr : addresses) { + if (addr.isUnresolved()) { + LOG.warn("One SCM address ({}) can't (yet?) be resolved. Postpone " + + "initialization.", addr); + + //skip any further initialization. DatanodeStateMachine will try it + // again after the hb frequency + return this.context.getState(); + } + } for (InetSocketAddress addr : addresses) { connectionManager.addSCMServer(addr); } @@ -106,7 +116,7 @@ public InitDatanodeState(Configuration conf, /** * Persist DatanodeDetails to datanode.id file. */ - private void persistContainerDatanodeDetails() throws IOException { + private void persistContainerDatanodeDetails() { String dataNodeIDPath = HddsUtils.getDatanodeIdFilePath(conf); if (Strings.isNullOrEmpty(dataNodeIDPath)) { LOG.error("A valid file path is needed for config setting {}", @@ -118,7 +128,15 @@ private void persistContainerDatanodeDetails() throws IOException { DatanodeDetails datanodeDetails = this.context.getParent() .getDatanodeDetails(); if (datanodeDetails != null && !idPath.exists()) { - ContainerUtils.writeDatanodeDetailsTo(datanodeDetails, idPath); + try { + ContainerUtils.writeDatanodeDetailsTo(datanodeDetails, idPath); + } catch (IOException ex) { + // As writing DatanodeDetails in to datanodeid file failed, which is + // a critical thing, so shutting down the state machine. + LOG.error("Writing to {} failed {}", dataNodeIDPath, ex.getMessage()); + this.context.setState(DatanodeStateMachine.DatanodeStates.SHUTDOWN); + return; + } LOG.info("DatanodeDetails is persisted to {}", dataNodeIDPath); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/RunningDatanodeState.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/RunningDatanodeState.java index 3e11d1233ca..ec2358ae18b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/RunningDatanodeState.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/RunningDatanodeState.java @@ -95,11 +95,13 @@ public void execute(ExecutorService executor) { getEndPointTask(EndpointStateMachine endpoint) { switch (endpoint.getState()) { case GETVERSION: - return new VersionEndpointTask(endpoint, conf); + return new VersionEndpointTask(endpoint, conf, context.getParent() + .getContainer()); case REGISTER: return RegisterEndpointTask.newBuilder() .setConfig(conf) .setEndpointStateMachine(endpoint) + .setContext(context) .setDatanodeDetails(context.getParent().getDatanodeDetails()) .setOzoneContainer(context.getParent().getContainer()) .build(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java index 260a245ceb3..5769e6d2f64 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java @@ -24,6 +24,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDetailsProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.PipelineActionsProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.PipelineAction; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerActionsProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerAction; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto; import org.apache.hadoop.hdds.protocol.proto @@ -46,8 +54,18 @@ import java.io.IOException; import java.time.ZonedDateTime; +import java.util.List; import java.util.concurrent.Callable; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_CONTAINER_ACTION_MAX_LIMIT; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_CONTAINER_ACTION_MAX_LIMIT_DEFAULT; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_PIPELINE_ACTION_MAX_LIMIT; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_PIPELINE_ACTION_MAX_LIMIT_DEFAULT; + /** * Heartbeat class for SCMs. */ @@ -59,6 +77,8 @@ private final Configuration conf; private DatanodeDetailsProto datanodeDetailsProto; private StateContext context; + private int maxContainerActionsPerHB; + private int maxPipelineActionsPerHB; /** * Constructs a SCM heart beat. @@ -70,6 +90,10 @@ public HeartbeatEndpointTask(EndpointStateMachine rpcEndpoint, this.rpcEndpoint = rpcEndpoint; this.conf = conf; this.context = context; + this.maxContainerActionsPerHB = conf.getInt(HDDS_CONTAINER_ACTION_MAX_LIMIT, + HDDS_CONTAINER_ACTION_MAX_LIMIT_DEFAULT); + this.maxPipelineActionsPerHB = conf.getInt(HDDS_PIPELINE_ACTION_MAX_LIMIT, + HDDS_PIPELINE_ACTION_MAX_LIMIT_DEFAULT); } /** @@ -107,7 +131,8 @@ public void setDatanodeDetailsProto(DatanodeDetailsProto SCMHeartbeatRequestProto.newBuilder() .setDatanodeDetails(datanodeDetailsProto); addReports(requestBuilder); - + addContainerActions(requestBuilder); + addPipelineActions(requestBuilder); SCMHeartbeatResponseProto reponse = rpcEndpoint.getEndPoint() .sendHeartbeat(requestBuilder.build()); processResponse(reponse, datanodeDetailsProto); @@ -139,6 +164,40 @@ private void addReports(SCMHeartbeatRequestProto.Builder requestBuilder) { } } + /** + * Adds all the pending ContainerActions to the heartbeat. + * + * @param requestBuilder builder to which the report has to be added. + */ + private void addContainerActions( + SCMHeartbeatRequestProto.Builder requestBuilder) { + List actions = context.getPendingContainerAction( + maxContainerActionsPerHB); + if (!actions.isEmpty()) { + ContainerActionsProto cap = ContainerActionsProto.newBuilder() + .addAllContainerActions(actions) + .build(); + requestBuilder.setContainerActions(cap); + } + } + + /** + * Adds all the pending PipelineActions to the heartbeat. + * + * @param requestBuilder builder to which the report has to be added. + */ + private void addPipelineActions( + SCMHeartbeatRequestProto.Builder requestBuilder) { + List actions = context.getPendingPipelineAction( + maxPipelineActionsPerHB); + if (!actions.isEmpty()) { + PipelineActionsProto pap = PipelineActionsProto.newBuilder() + .addAllPipelineActions(actions) + .build(); + requestBuilder.setPipelineActions(pap); + } + } + /** * Returns a builder class for HeartbeatEndpointTask task. * @return Builder. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/RegisterEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/RegisterEndpointTask.java index b3d2b620091..ccab0956e72 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/RegisterEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/RegisterEndpointTask.java @@ -29,6 +29,7 @@ .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,6 +51,7 @@ private Future result; private DatanodeDetails datanodeDetails; private final OzoneContainer datanodeContainerManager; + private StateContext stateContext; /** * Creates a register endpoint task. @@ -60,10 +62,12 @@ */ @VisibleForTesting public RegisterEndpointTask(EndpointStateMachine rpcEndPoint, - Configuration conf, OzoneContainer ozoneContainer) { + Configuration conf, OzoneContainer ozoneContainer, + StateContext context) { this.rpcEndPoint = rpcEndPoint; this.conf = conf; this.datanodeContainerManager = ozoneContainer; + this.stateContext = context; } @@ -124,9 +128,9 @@ public void setDatanodeDetails( rpcEndPoint.getState().getNextState(); rpcEndPoint.setState(nextState); rpcEndPoint.zeroMissedCount(); + this.stateContext.configureHeartbeatFrequency(); } catch (IOException ex) { - rpcEndPoint.logIfNeeded(ex - ); + rpcEndPoint.logIfNeeded(ex); } finally { rpcEndPoint.unlock(); } @@ -151,6 +155,7 @@ public static Builder newBuilder() { private Configuration conf; private DatanodeDetails datanodeDetails; private OzoneContainer container; + private StateContext context; /** * Constructs the builder class. @@ -201,6 +206,10 @@ public Builder setOzoneContainer(OzoneContainer ozoneContainer) { return this; } + public Builder setContext(StateContext stateContext) { + this.context = stateContext; + return this; + } public RegisterEndpointTask build() { if (endPointStateMachine == null) { @@ -211,8 +220,9 @@ public RegisterEndpointTask build() { if (conf == null) { LOG.error("No config specified."); - throw new IllegalArgumentException("A valid configration is needed to" + - " construct RegisterEndpoint task"); + throw new IllegalArgumentException( + "A valid configuration is needed to construct RegisterEndpoint " + + "task"); } if (datanodeDetails == null) { @@ -224,13 +234,20 @@ public RegisterEndpointTask build() { if (container == null) { LOG.error("Container is not specified"); throw new IllegalArgumentException("Container is not specified to " + - "constrict RegisterEndpoint task"); + "construct RegisterEndpoint task"); + } + + if (context == null) { + LOG.error("StateContext is not specified"); + throw new IllegalArgumentException("Container is not specified to " + + "construct RegisterEndpoint task"); } RegisterEndpointTask task = new RegisterEndpointTask(this - .endPointStateMachine, this.conf, this.container); + .endPointStateMachine, this.conf, this.container, this.context); task.setDatanodeDetails(datanodeDetails); return task; } + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java index b048ee5b5ca..64e078d2967 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java @@ -16,14 +16,24 @@ */ package org.apache.hadoop.ozone.container.common.states.endpoint; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.statemachine .EndpointStateMachine; +import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.hadoop.ozone.protocol.VersionResponse; +import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.Map; import java.util.concurrent.Callable; /** @@ -31,13 +41,17 @@ */ public class VersionEndpointTask implements Callable { + public static final Logger LOG = LoggerFactory.getLogger(VersionEndpointTask + .class); private final EndpointStateMachine rpcEndPoint; private final Configuration configuration; + private final OzoneContainer ozoneContainer; public VersionEndpointTask(EndpointStateMachine rpcEndPoint, - Configuration conf) { + Configuration conf, OzoneContainer container) { this.rpcEndPoint = rpcEndPoint; this.configuration = conf; + this.ozoneContainer = container; } /** @@ -52,13 +66,43 @@ public VersionEndpointTask(EndpointStateMachine rpcEndPoint, try{ SCMVersionResponseProto versionResponse = rpcEndPoint.getEndPoint().getVersion(null); - rpcEndPoint.setVersion(VersionResponse.getFromProtobuf(versionResponse)); + VersionResponse response = VersionResponse.getFromProtobuf( + versionResponse); + rpcEndPoint.setVersion(response); + VolumeSet volumeSet = ozoneContainer.getVolumeSet(); + Map volumeMap = volumeSet.getVolumeMap(); + + String scmId = response.getValue(OzoneConsts.SCM_ID); + String clusterId = response.getValue(OzoneConsts.CLUSTER_ID); + + Preconditions.checkNotNull(scmId, "Reply from SCM: scmId cannot be " + + "null"); + Preconditions.checkNotNull(clusterId, "Reply from SCM: clusterId " + + "cannot be null"); + + // If version file does not exist create version file and also set scmId + for (Map.Entry entry : volumeMap.entrySet()) { + HddsVolume hddsVolume = entry.getValue(); + boolean result = HddsVolumeUtil.checkVolume(hddsVolume, scmId, + clusterId, LOG); + if (!result) { + volumeSet.failVolume(hddsVolume.getHddsRootDir().getPath()); + } + } + if (volumeSet.getVolumesList().size() == 0) { + // All volumes are inconsistent state + throw new DiskOutOfSpaceException("All configured Volumes are in " + + "Inconsistent State"); + } + ozoneContainer.getDispatcher().setScmId(scmId); EndpointStateMachine.EndPointStates nextState = rpcEndPoint.getState().getNextState(); rpcEndPoint.setState(nextState); rpcEndPoint.zeroMissedCount(); - } catch (IOException ex) { + } catch (DiskOutOfSpaceException ex) { + rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN); + } catch(IOException ex) { rpcEndPoint.logIfNeeded(ex); } finally { rpcEndPoint.unlock(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java index df6220cec7d..db4a86aa8c5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/GrpcXceiverService.java @@ -56,10 +56,8 @@ public void onNext(ContainerCommandRequestProto request) { ContainerCommandResponseProto resp = dispatcher.dispatch(request); responseObserver.onNext(resp); } catch (Throwable e) { - if (LOG.isDebugEnabled()) { - LOG.debug("{} got exception when processing" + LOG.error("{} got exception when processing" + " ContainerCommandRequestProto {}: {}", request, e); - } responseObserver.onError(e); } } @@ -67,13 +65,13 @@ public void onNext(ContainerCommandRequestProto request) { @Override public void onError(Throwable t) { // for now we just log a msg - LOG.info("{}: ContainerCommand send on error. Exception: {}", t); + LOG.error("{}: ContainerCommand send on error. Exception: {}", t); } @Override public void onCompleted() { if (isClosed.compareAndSet(false, true)) { - LOG.info("{}: ContainerCommand send completed"); + LOG.debug("{}: ContainerCommand send completed"); responseObserver.onCompleted(); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServer.java deleted file mode 100644 index 3a469de1fdd..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServer.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.transport.server; - -import com.google.common.base.Preconditions; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.ratis.shaded.io.netty.bootstrap.ServerBootstrap; -import org.apache.ratis.shaded.io.netty.channel.Channel; -import org.apache.ratis.shaded.io.netty.channel.EventLoopGroup; -import org.apache.ratis.shaded.io.netty.channel.nio.NioEventLoopGroup; -import org.apache.ratis.shaded.io.netty.channel.socket.nio - .NioServerSocketChannel; -import org.apache.ratis.shaded.io.netty.handler.logging.LogLevel; -import org.apache.ratis.shaded.io.netty.handler.logging.LoggingHandler; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.net.InetSocketAddress; -import java.net.ServerSocket; -import java.net.SocketAddress; - -/** - * Creates a netty server endpoint that acts as the communication layer for - * Ozone containers. - */ -public final class XceiverServer implements XceiverServerSpi { - private static final Logger - LOG = LoggerFactory.getLogger(XceiverServer.class); - private int port; - private final ContainerDispatcher storageContainer; - - private EventLoopGroup bossGroup; - private EventLoopGroup workerGroup; - private Channel channel; - - /** - * Constructs a netty server class. - * - * @param conf - Configuration - */ - public XceiverServer(DatanodeDetails datanodeDetails, Configuration conf, - ContainerDispatcher dispatcher) { - Preconditions.checkNotNull(conf); - - this.port = conf.getInt(OzoneConfigKeys.DFS_CONTAINER_IPC_PORT, - OzoneConfigKeys.DFS_CONTAINER_IPC_PORT_DEFAULT); - // Get an available port on current node and - // use that as the container port - if (conf.getBoolean(OzoneConfigKeys.DFS_CONTAINER_IPC_RANDOM_PORT, - OzoneConfigKeys.DFS_CONTAINER_IPC_RANDOM_PORT_DEFAULT)) { - try (ServerSocket socket = new ServerSocket()) { - socket.setReuseAddress(true); - SocketAddress address = new InetSocketAddress(0); - socket.bind(address); - this.port = socket.getLocalPort(); - LOG.info("Found a free port for the server : {}", this.port); - } catch (IOException e) { - LOG.error("Unable find a random free port for the server, " - + "fallback to use default port {}", this.port, e); - } - } - datanodeDetails.setPort( - DatanodeDetails.newPort(DatanodeDetails.Port.Name.STANDALONE, port)); - this.storageContainer = dispatcher; - } - - @Override - public int getIPCPort() { - return this.port; - } - - /** - * Returns the Replication type supported by this end-point. - * - * @return enum -- {Stand_Alone, Ratis, Chained} - */ - @Override - public HddsProtos.ReplicationType getServerType() { - return HddsProtos.ReplicationType.STAND_ALONE; - } - - @Override - public void start() throws IOException { - bossGroup = new NioEventLoopGroup(); - workerGroup = new NioEventLoopGroup(); - channel = new ServerBootstrap() - .group(bossGroup, workerGroup) - .channel(NioServerSocketChannel.class) - .handler(new LoggingHandler(LogLevel.INFO)) - .childHandler(new XceiverServerInitializer(storageContainer)) - .bind(port) - .syncUninterruptibly() - .channel(); - } - - @Override - public void stop() { - if (storageContainer != null) { - storageContainer.shutdown(); - } - if (bossGroup != null) { - bossGroup.shutdownGracefully(); - } - if (workerGroup != null) { - workerGroup.shutdownGracefully(); - } - if (channel != null) { - channel.close().awaitUninterruptibly(); - } - } - - @Override - public void submitRequest( - ContainerProtos.ContainerCommandRequestProto request) throws IOException { - storageContainer.dispatch(request); - } -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java index 0a9e1dbba48..4a90144f4e9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java @@ -21,10 +21,13 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; + +import org.apache.ratis.shaded.io.grpc.BindableService; import org.apache.ratis.shaded.io.grpc.Server; import org.apache.ratis.shaded.io.grpc.ServerBuilder; import org.apache.ratis.shaded.io.grpc.netty.NettyServerBuilder; @@ -53,7 +56,7 @@ * @param conf - Configuration */ public XceiverServerGrpc(DatanodeDetails datanodeDetails, Configuration conf, - ContainerDispatcher dispatcher) { + ContainerDispatcher dispatcher, BindableService... additionalServices) { Preconditions.checkNotNull(conf); this.port = conf.getInt(OzoneConfigKeys.DFS_CONTAINER_IPC_PORT, @@ -76,9 +79,17 @@ public XceiverServerGrpc(DatanodeDetails datanodeDetails, Configuration conf, datanodeDetails.setPort( DatanodeDetails.newPort(DatanodeDetails.Port.Name.STANDALONE, port)); server = ((NettyServerBuilder) ServerBuilder.forPort(port)) - .maxMessageSize(OzoneConfigKeys.DFS_CONTAINER_CHUNK_MAX_SIZE) + .maxInboundMessageSize(OzoneConfigKeys.DFS_CONTAINER_CHUNK_MAX_SIZE) .addService(new GrpcXceiverService(dispatcher)) .build(); + NettyServerBuilder nettyServerBuilder = + ((NettyServerBuilder) ServerBuilder.forPort(port)) + .maxInboundMessageSize(OzoneConfigKeys.DFS_CONTAINER_CHUNK_MAX_SIZE) + .addService(new GrpcXceiverService(dispatcher)); + for (BindableService service : additionalServices) { + nettyServerBuilder.addService(service); + } + server = nettyServerBuilder.build(); storageContainer = dispatcher; } @@ -108,8 +119,8 @@ public void stop() { } @Override - public void submitRequest( - ContainerProtos.ContainerCommandRequestProto request) throws IOException { + public void submitRequest(ContainerCommandRequestProto request, + HddsProtos.PipelineID pipelineID) { storageContainer.dispatch(request); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerHandler.java deleted file mode 100644 index 37652991c8e..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerHandler.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.transport.server; - -import org.apache.ratis.shaded.io.netty.channel.ChannelHandlerContext; -import org.apache.ratis.shaded.io.netty.channel.SimpleChannelInboundHandler; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerCommandRequestProto; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerCommandResponseProto; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Netty server handlers that respond to Network events. - */ -public class XceiverServerHandler extends - SimpleChannelInboundHandler { - - static final Logger LOG = LoggerFactory.getLogger(XceiverServerHandler.class); - private final ContainerDispatcher dispatcher; - - /** - * Constructor for server handler. - * @param dispatcher - Dispatcher interface - */ - public XceiverServerHandler(ContainerDispatcher dispatcher) { - this.dispatcher = dispatcher; - } - - /** - * Please keep in mind that this method will be renamed to {@code - * messageReceived(ChannelHandlerContext, I)} in 5.0. - *

- * Is called for each message of type {@link ContainerCommandRequestProto}. - * - * @param ctx the {@link ChannelHandlerContext} which this {@link - * SimpleChannelInboundHandler} belongs to - * @param msg the message to handle - * @throws Exception is thrown if an error occurred - */ - @Override - public void channelRead0(ChannelHandlerContext ctx, - ContainerCommandRequestProto msg) throws - Exception { - ContainerCommandResponseProto response = this.dispatcher.dispatch(msg); - LOG.debug("Writing the reponse back to client."); - ctx.writeAndFlush(response); - - } - - /** - * Calls {@link ChannelHandlerContext#fireExceptionCaught(Throwable)} - * Sub-classes may override this method to change behavior. - * - * @param ctx - Channel Handler Context - * @param cause - Exception - */ - @Override - public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) - throws Exception { - LOG.error("An exception caught in the pipeline : " + cause.toString()); - super.exceptionCaught(ctx, cause); - } -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerInitializer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerInitializer.java deleted file mode 100644 index e405cf99ccc..00000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerInitializer.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.transport.server; - -import com.google.common.base.Preconditions; -import org.apache.ratis.shaded.io.netty.channel.ChannelInitializer; -import org.apache.ratis.shaded.io.netty.channel.ChannelPipeline; -import org.apache.ratis.shaded.io.netty.channel.socket.SocketChannel; -import org.apache.ratis.shaded.io.netty.handler.codec.protobuf.ProtobufDecoder; -import org.apache.ratis.shaded.io.netty.handler.codec.protobuf.ProtobufEncoder; -import org.apache.ratis.shaded.io.netty.handler.codec.protobuf - .ProtobufVarint32FrameDecoder; -import org.apache.ratis.shaded.io.netty.handler.codec.protobuf - .ProtobufVarint32LengthFieldPrepender; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerCommandRequestProto; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; - -/** - * Creates a channel for the XceiverServer. - */ -public class XceiverServerInitializer extends ChannelInitializer{ - private final ContainerDispatcher dispatcher; - public XceiverServerInitializer(ContainerDispatcher dispatcher) { - Preconditions.checkNotNull(dispatcher); - this.dispatcher = dispatcher; - } - - /** - * This method will be called once the Channel is registered. After - * the method returns this instance will be removed from the {@link - * ChannelPipeline} - * - * @param ch the which was registered. - * @throws Exception is thrown if an error occurs. In that case the channel - * will be closed. - */ - @Override - protected void initChannel(SocketChannel ch) throws Exception { - ChannelPipeline pipeline = ch.pipeline(); - pipeline.addLast(new ProtobufVarint32FrameDecoder()); - pipeline.addLast(new ProtobufDecoder(ContainerCommandRequestProto - .getDefaultInstance())); - pipeline.addLast(new ProtobufVarint32LengthFieldPrepender()); - pipeline.addLast(new ProtobufEncoder()); - pipeline.addLast(new XceiverServerHandler(dispatcher)); - } -} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerSpi.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerSpi.java index 49579f2829b..1863f6d759a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerSpi.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerSpi.java @@ -18,7 +18,8 @@ package org.apache.hadoop.ozone.container.common.transport.server; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import java.io.IOException; @@ -45,6 +46,7 @@ * submits a containerRequest to be performed by the replication pipeline. * @param request ContainerCommandRequest */ - void submitRequest(ContainerProtos.ContainerCommandRequestProto request) + void submitRequest(ContainerCommandRequestProto request, + HddsProtos.PipelineID pipelineID) throws IOException; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/CSMMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/CSMMetrics.java new file mode 100644 index 00000000000..b6aed605a68 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/CSMMetrics.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.common.transport.server.ratis; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; + +/** + * This class is for maintaining Container State Machine statistics. + */ +@InterfaceAudience.Private +@Metrics(about="Container State Machine Metrics", context="dfs") +public class CSMMetrics { + public static final String SOURCE_NAME = + CSMMetrics.class.getSimpleName(); + + // ratis op metrics metrics + private @Metric MutableCounterLong numWriteStateMachineOps; + private @Metric MutableCounterLong numReadStateMachineOps; + private @Metric MutableCounterLong numApplyTransactionOps; + + // Failure Metrics + private @Metric MutableCounterLong numWriteStateMachineFails; + private @Metric MutableCounterLong numReadStateMachineFails; + private @Metric MutableCounterLong numApplyTransactionFails; + + public CSMMetrics() { + } + + public static CSMMetrics create() { + MetricsSystem ms = DefaultMetricsSystem.instance(); + return ms.register(SOURCE_NAME, + "Container State Machine", + new CSMMetrics()); + } + + public void incNumWriteStateMachineOps() { + numWriteStateMachineOps.incr(); + } + + public void incNumReadStateMachineOps() { + numReadStateMachineOps.incr(); + } + + public void incNumApplyTransactionsOps() { + numApplyTransactionOps.incr(); + } + + public void incNumWriteStateMachineFails() { + numWriteStateMachineFails.incr(); + } + + public void incNumReadStateMachineFails() { + numReadStateMachineFails.incr(); + } + + public void incNumApplyTransactionsFails() { + numApplyTransactionFails.incr(); + } + + @VisibleForTesting + public long getNumWriteStateMachineOps() { + return numWriteStateMachineOps.value(); + } + + @VisibleForTesting + public long getNumReadStateMachineOps() { + return numReadStateMachineOps.value(); + } + + @VisibleForTesting + public long getNumApplyTransactionsOps() { + return numApplyTransactionOps.value(); + } + + @VisibleForTesting + public long getNumWriteStateMachineFails() { + return numWriteStateMachineFails.value(); + } + + @VisibleForTesting + public long getNumReadStateMachineFails() { + return numReadStateMachineFails.value(); + } + + @VisibleForTesting + public long getNumApplyTransactionsFails() { + return numApplyTransactionFails.value(); + } + + public void unRegister() { + MetricsSystem ms = DefaultMetricsSystem.instance(); + ms.unregisterSource(SOURCE_NAME); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 176407d66d2..b84db668401 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -18,23 +18,32 @@ package org.apache.hadoop.ozone.container.common.transport.server.ratis; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.HddsUtils; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.server.RaftServer; import org.apache.ratis.shaded.com.google.protobuf .InvalidProtocolBufferException; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Stage; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .ContainerCommandResponseProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .WriteChunkRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ReadChunkRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ReadChunkResponseProto; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; -import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientRequest; -import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.shaded.com.google.protobuf.ByteString; +import org.apache.ratis.shaded.proto.RaftProtos.RoleInfoProto; import org.apache.ratis.shaded.proto.RaftProtos.LogEntryProto; import org.apache.ratis.shaded.proto.RaftProtos.SMLogEntryProto; import org.apache.ratis.statemachine.StateMachineStorage; @@ -46,9 +55,13 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ThreadPoolExecutor; +import java.util.stream.Collectors; /** A {@link org.apache.ratis.statemachine.StateMachine} for containers. * @@ -57,7 +70,7 @@ * requests. * * Read only requests are classified in - * {@link org.apache.hadoop.hdds.scm.XceiverClientRatis#isReadOnly} + * {@link org.apache.hadoop.hdds.HddsUtils#isReadOnly} * and these readonly requests are replied from the {@link #query(Message)}. * * The write requests can be divided into requests with user data @@ -84,6 +97,18 @@ * 2) Write chunk commit operation is executed after write chunk state machine * operation. This will ensure that commit operation is sync'd with the state * machine operation. + * + * Synchronization between {@link #writeStateMachineData} and + * {@link #applyTransaction} need to be enforced in the StateMachine + * implementation. For example, synchronization between writeChunk and + * createContainer in {@link ContainerStateMachine}. + * + * PutKey is synchronized with WriteChunk operations, PutKey for a block is + * executed only after all the WriteChunk preceding the PutKey have finished. + * + * CloseContainer is synchronized with WriteChunk and PutKey operations, + * CloseContainer for a container is processed after all the preceding write + * operations for the container have finished. * */ public class ContainerStateMachine extends BaseStateMachine { static final Logger LOG = LoggerFactory.getLogger( @@ -91,18 +116,24 @@ private final SimpleStateMachineStorage storage = new SimpleStateMachineStorage(); private final ContainerDispatcher dispatcher; - private ThreadPoolExecutor writeChunkExecutor; + private ThreadPoolExecutor chunkExecutor; + private final XceiverServerRatis ratisServer; private final ConcurrentHashMap> writeChunkFutureMap; - private final ConcurrentHashMap> - createContainerFutureMap; + private final ConcurrentHashMap stateMachineMap; + /** + * CSM metrics. + */ + private final CSMMetrics metrics; - ContainerStateMachine(ContainerDispatcher dispatcher, - ThreadPoolExecutor writeChunkExecutor) { + public ContainerStateMachine(ContainerDispatcher dispatcher, + ThreadPoolExecutor chunkExecutor, XceiverServerRatis ratisServer) { this.dispatcher = dispatcher; - this.writeChunkExecutor = writeChunkExecutor; + this.chunkExecutor = chunkExecutor; + this.ratisServer = ratisServer; this.writeChunkFutureMap = new ConcurrentHashMap<>(); - this.createContainerFutureMap = new ConcurrentHashMap<>(); + this.stateMachineMap = new ConcurrentHashMap<>(); + metrics = CSMMetrics.create(); } @Override @@ -110,11 +141,15 @@ public StateMachineStorage getStateMachineStorage() { return storage; } + public CSMMetrics getMetrics() { + return metrics; + } + @Override public void initialize( - RaftPeerId id, RaftProperties properties, RaftStorage raftStorage) + RaftServer server, RaftGroupId id, RaftStorage raftStorage) throws IOException { - super.initialize(id, properties, raftStorage); + super.initialize(server, id, raftStorage); storage.init(raftStorage); // TODO handle snapshots @@ -129,13 +164,13 @@ public TransactionContext startTransaction(RaftClientRequest request) getRequestProto(request.getMessage().getContent()); final SMLogEntryProto log; - if (proto.getCmdType() == ContainerProtos.Type.WriteChunk) { + if (proto.getCmdType() == Type.WriteChunk) { final WriteChunkRequestProto write = proto.getWriteChunk(); // create the state machine data proto final WriteChunkRequestProto dataWriteChunkProto = WriteChunkRequestProto .newBuilder(write) - .setStage(ContainerProtos.Stage.WRITE_DATA) + .setStage(Stage.WRITE_DATA) .build(); ContainerCommandRequestProto dataContainerCommandProto = ContainerCommandRequestProto @@ -150,7 +185,7 @@ public TransactionContext startTransaction(RaftClientRequest request) .setChunkData(write.getChunkData()) // skipping the data field as it is // already set in statemachine data proto - .setStage(ContainerProtos.Stage.COMMIT_DATA) + .setStage(Stage.COMMIT_DATA) .build(); ContainerCommandRequestProto commitContainerCommandProto = ContainerCommandRequestProto @@ -162,7 +197,7 @@ public TransactionContext startTransaction(RaftClientRequest request) .setData(commitContainerCommandProto.toByteString()) .setStateMachineData(dataContainerCommandProto.toByteString()) .build(); - } else if (proto.getCmdType() == ContainerProtos.Type.CreateContainer) { + } else if (proto.getCmdType() == Type.CreateContainer) { log = SMLogEntryProto.newBuilder() .setData(request.getMessage().getContent()) .setStateMachineData(request.getMessage().getContent()) @@ -180,56 +215,42 @@ private ContainerCommandRequestProto getRequestProto(ByteString request) return ContainerCommandRequestProto.parseFrom(request); } - private Message runCommand(ContainerCommandRequestProto requestProto) { + private ContainerCommandResponseProto dispatchCommand( + ContainerCommandRequestProto requestProto) { LOG.trace("dispatch {}", requestProto); ContainerCommandResponseProto response = dispatcher.dispatch(requestProto); LOG.trace("response {}", response); - return () -> response.toByteString(); - } - - private CompletableFuture handleWriteChunk( - ContainerCommandRequestProto requestProto, long entryIndex) { - final WriteChunkRequestProto write = requestProto.getWriteChunk(); - long containerID = write.getBlockID().getContainerID(); - CompletableFuture future = - createContainerFutureMap.get(containerID); - CompletableFuture writeChunkFuture; - if (future != null) { - writeChunkFuture = future.thenApplyAsync( - v -> runCommand(requestProto), writeChunkExecutor); - } else { - writeChunkFuture = CompletableFuture.supplyAsync( - () -> runCommand(requestProto), writeChunkExecutor); - } - writeChunkFutureMap.put(entryIndex, writeChunkFuture); - return writeChunkFuture; + return response; } - private CompletableFuture handleCreateContainer( - ContainerCommandRequestProto requestProto) { - long containerID = - requestProto.getCreateContainer().getContainerData().getContainerID(); - createContainerFutureMap. - computeIfAbsent(containerID, k -> new CompletableFuture<>()); - return CompletableFuture.completedFuture(() -> ByteString.EMPTY); + private Message runCommand(ContainerCommandRequestProto requestProto) { + return dispatchCommand(requestProto)::toByteString; } + /* + * writeStateMachineData calls are not synchronized with each other + * and also with applyTransaction. + */ @Override public CompletableFuture writeStateMachineData(LogEntryProto entry) { try { + metrics.incNumWriteStateMachineOps(); final ContainerCommandRequestProto requestProto = getRequestProto(entry.getSmLogEntry().getStateMachineData()); - ContainerProtos.Type cmdType = requestProto.getCmdType(); - switch (cmdType) { - case CreateContainer: - return handleCreateContainer(requestProto); - case WriteChunk: - return handleWriteChunk(requestProto, entry.getIndex()); - default: - throw new IllegalStateException("Cmd Type:" + cmdType - + " should not have state machine data"); + Type cmdType = requestProto.getCmdType(); + long containerId = requestProto.getContainerID(); + stateMachineMap + .computeIfAbsent(containerId, k -> new StateMachineHelper()); + CompletableFuture stateMachineFuture = + stateMachineMap.get(containerId) + .handleStateMachineData(requestProto, entry.getIndex()); + if (stateMachineFuture == null) { + throw new IllegalStateException( + "Cmd Type:" + cmdType + " should not have state machine data"); } + return stateMachineFuture; } catch (IOException e) { + metrics.incNumWriteStateMachineFails(); return completeExceptionally(e); } } @@ -237,41 +258,142 @@ private Message runCommand(ContainerCommandRequestProto requestProto) { @Override public CompletableFuture query(Message request) { try { + metrics.incNumReadStateMachineOps(); final ContainerCommandRequestProto requestProto = getRequestProto(request.getContent()); return CompletableFuture.completedFuture(runCommand(requestProto)); } catch (IOException e) { + metrics.incNumReadStateMachineFails(); return completeExceptionally(e); } } + private LogEntryProto readStateMachineData(LogEntryProto entry, + ContainerCommandRequestProto requestProto) { + WriteChunkRequestProto writeChunkRequestProto = + requestProto.getWriteChunk(); + // Assert that store log entry is for COMMIT_DATA, the WRITE_DATA is + // written through writeStateMachineData. + Preconditions.checkArgument(writeChunkRequestProto.getStage() + == Stage.COMMIT_DATA); + + // prepare the chunk to be read + ReadChunkRequestProto.Builder readChunkRequestProto = + ReadChunkRequestProto.newBuilder() + .setBlockID(writeChunkRequestProto.getBlockID()) + .setChunkData(writeChunkRequestProto.getChunkData()); + ContainerCommandRequestProto dataContainerCommandProto = + ContainerCommandRequestProto.newBuilder(requestProto) + .setCmdType(Type.ReadChunk) + .setReadChunk(readChunkRequestProto) + .build(); + + // read the chunk + ContainerCommandResponseProto response = + dispatchCommand(dataContainerCommandProto); + ReadChunkResponseProto responseProto = response.getReadChunk(); + + // assert that the response has data in it. + Preconditions.checkNotNull(responseProto.getData()); + + // reconstruct the write chunk request + final WriteChunkRequestProto.Builder dataWriteChunkProto = + WriteChunkRequestProto.newBuilder(writeChunkRequestProto) + // adding the state machine data + .setData(responseProto.getData()) + .setStage(Stage.WRITE_DATA); + + ContainerCommandRequestProto.Builder newStateMachineProto = + ContainerCommandRequestProto.newBuilder(requestProto) + .setWriteChunk(dataWriteChunkProto); + + return recreateLogEntryProto(entry, + newStateMachineProto.build().toByteString()); + } + + private LogEntryProto recreateLogEntryProto(LogEntryProto entry, + ByteString stateMachineData) { + // recreate the log entry + final SMLogEntryProto log = + SMLogEntryProto.newBuilder(entry.getSmLogEntry()) + .setStateMachineData(stateMachineData) + .build(); + return LogEntryProto.newBuilder(entry).setSmLogEntry(log).build(); + } + + /** + * Returns the combined future of all the writeChunks till the given log + * index. The Raft log worker will wait for the stateMachineData to complete + * flush as well. + * + * @param index log index till which the stateMachine data needs to be flushed + * @return Combined future of all writeChunks till the log index given. + */ + @Override + public CompletableFuture flushStateMachineData(long index) { + List> futureList = + writeChunkFutureMap.entrySet().stream().filter(x -> x.getKey() <= index) + .map(x -> x.getValue()).collect(Collectors.toList()); + CompletableFuture combinedFuture = CompletableFuture.allOf( + futureList.toArray(new CompletableFuture[futureList.size()])); + return combinedFuture; + } + /* + * This api is used by the leader while appending logs to the follower + * This allows the leader to read the state machine data from the + * state machine implementation in case cached state machine data has been + * evicted. + */ + @Override + public CompletableFuture readStateMachineData( + LogEntryProto entry) { + SMLogEntryProto smLogEntryProto = entry.getSmLogEntry(); + if (!smLogEntryProto.getStateMachineData().isEmpty()) { + return CompletableFuture.completedFuture(entry); + } + + try { + final ContainerCommandRequestProto requestProto = + getRequestProto(entry.getSmLogEntry().getData()); + // readStateMachineData should only be called for "write" to Ratis. + Preconditions.checkArgument(!HddsUtils.isReadOnly(requestProto)); + + if (requestProto.getCmdType() == Type.WriteChunk) { + return CompletableFuture.supplyAsync(() -> + readStateMachineData(entry, requestProto), + chunkExecutor); + } else if (requestProto.getCmdType() == Type.CreateContainer) { + LogEntryProto log = + recreateLogEntryProto(entry, requestProto.toByteString()); + return CompletableFuture.completedFuture(log); + } else { + throw new IllegalStateException("Cmd type:" + requestProto.getCmdType() + + " cannot have state machine data"); + } + } catch (Exception e) { + LOG.error("unable to read stateMachineData:" + e); + return completeExceptionally(e); + } + } + + /* + * ApplyTransaction calls in Ratis are sequential. + */ @Override public CompletableFuture applyTransaction(TransactionContext trx) { try { + metrics.incNumApplyTransactionsOps(); ContainerCommandRequestProto requestProto = getRequestProto(trx.getSMLogEntry().getData()); - ContainerProtos.Type cmdType = requestProto.getCmdType(); - - if (cmdType == ContainerProtos.Type.WriteChunk) { - WriteChunkRequestProto write = requestProto.getWriteChunk(); - // the data field has already been removed in start Transaction - Preconditions.checkArgument(!write.hasData()); - CompletableFuture stateMachineFuture = - writeChunkFutureMap.remove(trx.getLogEntry().getIndex()); - return stateMachineFuture - .thenComposeAsync(v -> - CompletableFuture.completedFuture(runCommand(requestProto))); - } else { - Message message = runCommand(requestProto); - if (cmdType == ContainerProtos.Type.CreateContainer) { - long containerID = - requestProto.getCreateContainer() - .getContainerData().getContainerID(); - createContainerFutureMap.remove(containerID).complete(message); - } - return CompletableFuture.completedFuture(message); - } + Preconditions.checkState(!HddsUtils.isReadOnly(requestProto)); + stateMachineMap.computeIfAbsent(requestProto.getContainerID(), + k -> new StateMachineHelper()); + long index = + trx.getLogEntry() == null ? -1 : trx.getLogEntry().getIndex(); + return stateMachineMap.get(requestProto.getContainerID()) + .executeContainerCommand(requestProto, index); } catch (IOException e) { + metrics.incNumApplyTransactionsFails(); return completeExceptionally(e); } } @@ -282,7 +404,253 @@ private Message runCommand(ContainerCommandRequestProto requestProto) { return future; } + @Override + public void notifySlowness(RaftGroup group, RoleInfoProto roleInfoProto) { + ratisServer.handleNodeSlowness(group, roleInfoProto); + } + + @Override + public void notifyExtendedNoLeader(RaftGroup group, + RoleInfoProto roleInfoProto) { + ratisServer.handleNoLeader(group, roleInfoProto); + } + @Override public void close() throws IOException { } + + /** + * Class to manage the future tasks for writeChunks. + */ + static class CommitChunkFutureMap { + private final ConcurrentHashMap> + block2ChunkMap = new ConcurrentHashMap<>(); + + synchronized int removeAndGetSize(long index) { + block2ChunkMap.remove(index); + return block2ChunkMap.size(); + } + + synchronized CompletableFuture add(long index, + CompletableFuture future) { + return block2ChunkMap.put(index, future); + } + + synchronized List> getAll() { + return new ArrayList<>(block2ChunkMap.values()); + } + } + + /** + * This class maintains maps and provide utilities to enforce synchronization + * among createContainer, writeChunk, putKey and closeContainer. + */ + private class StateMachineHelper { + + private CompletableFuture createContainerFuture; + + // Map for maintaining all writeChunk futures mapped to blockId + private final ConcurrentHashMap + block2ChunkMap; + + // Map for putKey futures + private final ConcurrentHashMap> + blockCommitMap; + + StateMachineHelper() { + createContainerFuture = null; + block2ChunkMap = new ConcurrentHashMap<>(); + blockCommitMap = new ConcurrentHashMap<>(); + } + + // The following section handles writeStateMachineData transactions + // on a container + + // enqueue the create container future during writeStateMachineData + // so that the write stateMachine data phase of writeChunk wait on + // create container to finish. + private CompletableFuture handleCreateContainer() { + createContainerFuture = new CompletableFuture<>(); + return CompletableFuture.completedFuture(() -> ByteString.EMPTY); + } + + // This synchronizes on create container to finish + private CompletableFuture handleWriteChunk( + ContainerCommandRequestProto requestProto, long entryIndex) { + CompletableFuture containerOpFuture; + + if (createContainerFuture != null) { + containerOpFuture = createContainerFuture + .thenApplyAsync(v -> runCommand(requestProto), chunkExecutor); + } else { + containerOpFuture = CompletableFuture + .supplyAsync(() -> runCommand(requestProto), chunkExecutor); + } + writeChunkFutureMap.put(entryIndex, containerOpFuture); + return containerOpFuture; + } + + CompletableFuture handleStateMachineData( + final ContainerCommandRequestProto requestProto, long index) { + Type cmdType = requestProto.getCmdType(); + if (cmdType == Type.CreateContainer) { + return handleCreateContainer(); + } else if (cmdType == Type.WriteChunk) { + return handleWriteChunk(requestProto, index); + } else { + return null; + } + } + + // The following section handles applyTransaction transactions + // on a container + + private CompletableFuture handlePutKey( + ContainerCommandRequestProto requestProto) { + List> futureList = new ArrayList<>(); + long localId = + requestProto.getPutKey().getKeyData().getBlockID().getLocalID(); + // Need not wait for create container future here as it has already + // finished. + if (block2ChunkMap.get(localId) != null) { + futureList.addAll(block2ChunkMap.get(localId).getAll()); + } + CompletableFuture effectiveFuture = + runCommandAfterFutures(futureList, requestProto); + + CompletableFuture putKeyFuture = + effectiveFuture.thenApply(message -> { + blockCommitMap.remove(localId); + return message; + }); + blockCommitMap.put(localId, putKeyFuture); + return putKeyFuture; + } + + // Close Container should be executed only if all pending WriteType + // container cmds get executed. Transactions which can return a future + // are WriteChunk and PutKey. + private CompletableFuture handleCloseContainer( + ContainerCommandRequestProto requestProto) { + List> futureList = new ArrayList<>(); + + // No need to wait for create container future here as it should have + // already finished. + block2ChunkMap.values().forEach(b -> futureList.addAll(b.getAll())); + futureList.addAll(blockCommitMap.values()); + + // There are pending write Chunk/PutKey type requests + // Queue this closeContainer request behind all these requests + CompletableFuture closeContainerFuture = + runCommandAfterFutures(futureList, requestProto); + + return closeContainerFuture.thenApply(message -> { + stateMachineMap.remove(requestProto.getContainerID()); + return message; + }); + } + + private CompletableFuture handleChunkCommit( + ContainerCommandRequestProto requestProto, long index) { + WriteChunkRequestProto write = requestProto.getWriteChunk(); + // the data field has already been removed in start Transaction + Preconditions.checkArgument(!write.hasData()); + CompletableFuture stateMachineFuture = + writeChunkFutureMap.remove(index); + CompletableFuture commitChunkFuture = stateMachineFuture + .thenComposeAsync(v -> CompletableFuture + .completedFuture(runCommand(requestProto))); + + long localId = requestProto.getWriteChunk().getBlockID().getLocalID(); + // Put the applyTransaction Future again to the Map. + // closeContainer should synchronize with this. + block2ChunkMap + .computeIfAbsent(localId, id -> new CommitChunkFutureMap()) + .add(index, commitChunkFuture); + return commitChunkFuture.thenApply(message -> { + block2ChunkMap.computeIfPresent(localId, (containerId, chunks) + -> chunks.removeAndGetSize(index) == 0? null: chunks); + return message; + }); + } + + private CompletableFuture runCommandAfterFutures( + List> futureList, + ContainerCommandRequestProto requestProto) { + CompletableFuture effectiveFuture; + if (futureList.isEmpty()) { + effectiveFuture = CompletableFuture + .supplyAsync(() -> runCommand(requestProto)); + + } else { + CompletableFuture allFuture = CompletableFuture.allOf( + futureList.toArray(new CompletableFuture[futureList.size()])); + effectiveFuture = allFuture + .thenApplyAsync(v -> runCommand(requestProto)); + } + return effectiveFuture; + } + + CompletableFuture handleCreateContainer( + ContainerCommandRequestProto requestProto) { + CompletableFuture future = + CompletableFuture.completedFuture(runCommand(requestProto)); + future.thenAccept(m -> { + createContainerFuture.complete(m); + createContainerFuture = null; + }); + return future; + } + + CompletableFuture handleOtherCommands( + ContainerCommandRequestProto requestProto) { + return CompletableFuture.completedFuture(runCommand(requestProto)); + } + + CompletableFuture executeContainerCommand( + ContainerCommandRequestProto requestProto, long index) { + Type cmdType = requestProto.getCmdType(); + switch (cmdType) { + case WriteChunk: + return handleChunkCommit(requestProto, index); + case CloseContainer: + return handleCloseContainer(requestProto); + case PutKey: + return handlePutKey(requestProto); + case CreateContainer: + return handleCreateContainer(requestProto); + default: + return handleOtherCommands(requestProto); + } + } + } + + @VisibleForTesting + public ConcurrentHashMap getStateMachineMap() { + return stateMachineMap; + } + + @VisibleForTesting + public CompletableFuture getCreateContainerFuture(long containerId) { + StateMachineHelper helper = stateMachineMap.get(containerId); + return helper == null ? null : helper.createContainerFuture; + } + + @VisibleForTesting + public List> getCommitChunkFutureMap( + long containerId) { + StateMachineHelper helper = stateMachineMap.get(containerId); + if (helper != null) { + List> futureList = new ArrayList<>(); + stateMachineMap.get(containerId).block2ChunkMap.values() + .forEach(b -> futureList.addAll(b.getAll())); + return futureList; + } + return null; + } + + @VisibleForTesting + public Collection> getWriteChunkFutureMap() { + return writeChunkFutureMap.values(); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index b9c7cae4938..24ea0b9a0db 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -23,10 +23,17 @@ import com.google.common.base.Strings; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ClosePipelineInfo; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.PipelineAction; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.common.transport.server .XceiverServerSpi; import org.apache.ratis.RaftConfigKeys; @@ -35,12 +42,22 @@ import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.grpc.GrpcConfigKeys; import org.apache.ratis.netty.NettyConfigKeys; -import org.apache.ratis.protocol.*; +import org.apache.ratis.protocol.RaftClientRequest; +import org.apache.ratis.protocol.Message; +import org.apache.ratis.protocol.RaftClientReply; +import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.NotLeaderException; +import org.apache.ratis.protocol.StateMachineException; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.rpc.RpcType; import org.apache.ratis.rpc.SupportedRpcType; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.shaded.proto.RaftProtos; +import org.apache.ratis.shaded.proto.RaftProtos.RoleInfoProto; +import org.apache.ratis.shaded.proto.RaftProtos.ReplicationLevel; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; import org.slf4j.Logger; @@ -52,6 +69,7 @@ import java.net.ServerSocket; import java.net.SocketAddress; import java.util.Objects; +import java.util.UUID; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ThreadPoolExecutor; @@ -64,33 +82,82 @@ */ public final class XceiverServerRatis implements XceiverServerSpi { static final Logger LOG = LoggerFactory.getLogger(XceiverServerRatis.class); - private static final AtomicLong callIdCounter = new AtomicLong(); + private static final AtomicLong CALL_ID_COUNTER = new AtomicLong(); private static long nextCallId() { - return callIdCounter.getAndIncrement() & Long.MAX_VALUE; + return CALL_ID_COUNTER.getAndIncrement() & Long.MAX_VALUE; } private final int port; private final RaftServer server; - private ThreadPoolExecutor writeChunkExecutor; + private ThreadPoolExecutor chunkExecutor; + private ClientId clientId = ClientId.randomId(); + private final StateContext context; + private final ReplicationLevel replicationLevel; + private long nodeFailureTimeoutMs; private XceiverServerRatis(DatanodeDetails dd, int port, String storageDir, - ContainerDispatcher dispatcher, Configuration conf) throws IOException { + ContainerDispatcher dispatcher, Configuration conf, StateContext context) + throws IOException { + Objects.requireNonNull(dd, "id == null"); + this.port = port; + RaftProperties serverProperties = newRaftProperties(conf, storageDir); + final int numWriteChunkThreads = conf.getInt( + OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_KEY, + OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_DEFAULT); + chunkExecutor = + new ThreadPoolExecutor(numWriteChunkThreads, numWriteChunkThreads, + 100, TimeUnit.SECONDS, + new ArrayBlockingQueue<>(1024), + new ThreadPoolExecutor.CallerRunsPolicy()); + this.context = context; + this.replicationLevel = + conf.getEnum(OzoneConfigKeys.DFS_CONTAINER_RATIS_REPLICATION_LEVEL_KEY, + OzoneConfigKeys.DFS_CONTAINER_RATIS_REPLICATION_LEVEL_DEFAULT); + ContainerStateMachine stateMachine = + new ContainerStateMachine(dispatcher, chunkExecutor, this); + this.server = RaftServer.newBuilder() + .setServerId(RatisHelper.toRaftPeerId(dd)) + .setGroup(RatisHelper.emptyRaftGroup()) + .setProperties(serverProperties) + .setStateMachine(stateMachine) + .build(); + } + + private RaftProperties newRaftProperties(Configuration conf, + String storageDir) { + final RaftProperties properties = new RaftProperties(); + + // Set rpc type final String rpcType = conf.get( OzoneConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_KEY, OzoneConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_DEFAULT); final RpcType rpc = SupportedRpcType.valueOfIgnoreCase(rpcType); + RaftConfigKeys.Rpc.setType(properties, rpc); + + // set raft segment size final int raftSegmentSize = conf.getInt( OzoneConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_SIZE_KEY, OzoneConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT); + RaftServerConfigKeys.Log.setSegmentSizeMax(properties, + SizeInBytes.valueOf(raftSegmentSize)); + + // set raft segment pre-allocated size final int raftSegmentPreallocatedSize = conf.getInt( OzoneConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY, OzoneConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT); + RaftServerConfigKeys.Log.Appender.setBufferCapacity(properties, + SizeInBytes.valueOf(raftSegmentPreallocatedSize)); + RaftServerConfigKeys.Log.setPreallocatedSize(properties, + SizeInBytes.valueOf(raftSegmentPreallocatedSize)); + + // Set max write buffer size, which is the scm chunk size final int maxChunkSize = OzoneConfigKeys.DFS_CONTAINER_CHUNK_MAX_SIZE; - final int numWriteChunkThreads = conf.getInt( - OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_KEY, - OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_DEFAULT); + RaftServerConfigKeys.Log.setWriteBufferSize(properties, + SizeInBytes.valueOf(maxChunkSize)); + + // Set the client requestTimeout TimeUnit timeUnit = OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_DEFAULT .getUnit(); @@ -100,6 +167,10 @@ private XceiverServerRatis(DatanodeDetails dd, int port, String storageDir, .getDuration(), timeUnit); final TimeDuration clientRequestTimeout = TimeDuration.valueOf(duration, timeUnit); + RaftClientConfigKeys.Rpc + .setRequestTimeout(properties, clientRequestTimeout); + + // Set the server Request timeout timeUnit = OzoneConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_DEFAULT .getUnit(); duration = conf.getTimeDuration( @@ -108,61 +179,69 @@ private XceiverServerRatis(DatanodeDetails dd, int port, String storageDir, .getDuration(), timeUnit); final TimeDuration serverRequestTimeout = TimeDuration.valueOf(duration, timeUnit); + RaftServerConfigKeys.Rpc + .setRequestTimeout(properties, serverRequestTimeout); - Objects.requireNonNull(dd, "id == null"); - this.port = port; - RaftProperties serverProperties = - newRaftProperties(rpc, port, storageDir, maxChunkSize, raftSegmentSize, - raftSegmentPreallocatedSize); - setRequestTimeout(serverProperties, clientRequestTimeout, - serverRequestTimeout); - - writeChunkExecutor = - new ThreadPoolExecutor(numWriteChunkThreads, numWriteChunkThreads, - 100, TimeUnit.SECONDS, - new ArrayBlockingQueue<>(1024), - new ThreadPoolExecutor.CallerRunsPolicy()); - ContainerStateMachine stateMachine = - new ContainerStateMachine(dispatcher, writeChunkExecutor); - this.server = RaftServer.newBuilder() - .setServerId(RatisHelper.toRaftPeerId(dd)) - .setGroup(RatisHelper.emptyRaftGroup()) - .setProperties(serverProperties) - .setStateMachine(stateMachine) - .build(); - } + // set timeout for a retry cache entry + timeUnit = + OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT + .getUnit(); + duration = conf.getTimeDuration( + OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY, + OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT + .getDuration(), timeUnit); + final TimeDuration retryCacheTimeout = + TimeDuration.valueOf(duration, timeUnit); + RaftServerConfigKeys.RetryCache + .setExpiryTime(properties, retryCacheTimeout); - private static void setRequestTimeout(RaftProperties serverProperties, - TimeDuration clientRequestTimeout, TimeDuration serverRequestTimeout) { - RaftClientConfigKeys.Rpc - .setRequestTimeout(serverProperties, clientRequestTimeout); + // Set the ratis leader election timeout + TimeUnit leaderElectionMinTimeoutUnit = + OzoneConfigKeys. + DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT + .getUnit(); + duration = conf.getTimeDuration( + OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, + OzoneConfigKeys. + DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT + .getDuration(), leaderElectionMinTimeoutUnit); + final TimeDuration leaderElectionMinTimeout = + TimeDuration.valueOf(duration, leaderElectionMinTimeoutUnit); RaftServerConfigKeys.Rpc - .setRequestTimeout(serverProperties, serverRequestTimeout); - } - - private static RaftProperties newRaftProperties( - RpcType rpc, int port, String storageDir, int scmChunkSize, - int raftSegmentSize, int raftSegmentPreallocatedSize) { - final RaftProperties properties = new RaftProperties(); + .setTimeoutMin(properties, leaderElectionMinTimeout); + long leaderElectionMaxTimeout = + leaderElectionMinTimeout.toLong(TimeUnit.MILLISECONDS) + 200; + RaftServerConfigKeys.Rpc.setTimeoutMax(properties, + TimeDuration.valueOf(leaderElectionMaxTimeout, TimeUnit.MILLISECONDS)); + // Enable batch append on raft server RaftServerConfigKeys.Log.Appender.setBatchEnabled(properties, true); - RaftServerConfigKeys.Log.Appender.setBufferCapacity(properties, - SizeInBytes.valueOf(raftSegmentPreallocatedSize)); - RaftServerConfigKeys.Log.setWriteBufferSize(properties, - SizeInBytes.valueOf(scmChunkSize)); - RaftServerConfigKeys.Log.setPreallocatedSize(properties, - SizeInBytes.valueOf(raftSegmentPreallocatedSize)); - RaftServerConfigKeys.Log.setSegmentSizeMax(properties, - SizeInBytes.valueOf(raftSegmentSize)); - RaftServerConfigKeys.setStorageDir(properties, new File(storageDir)); - RaftConfigKeys.Rpc.setType(properties, rpc); + // Set the maximum cache segments RaftServerConfigKeys.Log.setMaxCachedSegmentNum(properties, 2); + + // set the node failure timeout + timeUnit = OzoneConfigKeys.DFS_RATIS_SERVER_FAILURE_DURATION_DEFAULT + .getUnit(); + duration = conf.getTimeDuration( + OzoneConfigKeys.DFS_RATIS_SERVER_FAILURE_DURATION_KEY, + OzoneConfigKeys.DFS_RATIS_SERVER_FAILURE_DURATION_DEFAULT + .getDuration(), timeUnit); + final TimeDuration nodeFailureTimeout = + TimeDuration.valueOf(duration, timeUnit); + RaftServerConfigKeys.setLeaderElectionTimeout(properties, + nodeFailureTimeout); + RaftServerConfigKeys.Rpc.setSlownessTimeout(properties, + nodeFailureTimeout); + nodeFailureTimeoutMs = nodeFailureTimeout.toLong(TimeUnit.MILLISECONDS); + + // Set the ratis storage directory + RaftServerConfigKeys.setStorageDir(properties, new File(storageDir)); + + // For grpc set the maximum message size GrpcConfigKeys.setMessageSizeMax(properties, - SizeInBytes.valueOf(scmChunkSize + raftSegmentPreallocatedSize)); - RaftServerConfigKeys.Rpc.setTimeoutMin(properties, - TimeDuration.valueOf(800, TimeUnit.MILLISECONDS)); - RaftServerConfigKeys.Rpc.setTimeoutMax(properties, - TimeDuration.valueOf(1000, TimeUnit.MILLISECONDS)); + SizeInBytes.valueOf(maxChunkSize + raftSegmentPreallocatedSize)); + + // Set the ratis port number if (rpc == SupportedRpcType.GRPC) { GrpcConfigKeys.Server.setPort(properties, port); } else if (rpc == SupportedRpcType.NETTY) { @@ -173,7 +252,7 @@ private static RaftProperties newRaftProperties( public static XceiverServerRatis newXceiverServerRatis( DatanodeDetails datanodeDetails, Configuration ozoneConf, - ContainerDispatcher dispatcher) throws IOException { + ContainerDispatcher dispatcher, StateContext context) throws IOException { final String ratisDir = File.separator + "ratis"; int localPort = ozoneConf.getInt( OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_PORT, @@ -218,21 +297,21 @@ public static XceiverServerRatis newXceiverServerRatis( datanodeDetails.setPort( DatanodeDetails.newPort(DatanodeDetails.Port.Name.RATIS, localPort)); return new XceiverServerRatis(datanodeDetails, localPort, storageDir, - dispatcher, ozoneConf); + dispatcher, ozoneConf, context); } @Override public void start() throws IOException { LOG.info("Starting {} {} at port {}", getClass().getSimpleName(), server.getId(), getIPCPort()); - writeChunkExecutor.prestartAllCoreThreads(); + chunkExecutor.prestartAllCoreThreads(); server.start(); } @Override public void stop() { try { - writeChunkExecutor.shutdown(); + chunkExecutor.shutdown(); server.close(); } catch (IOException e) { throw new RuntimeException(e); @@ -282,17 +361,79 @@ private void processReply(RaftClientReply reply) { @Override public void submitRequest( - ContainerProtos.ContainerCommandRequestProto request) throws IOException { - ClientId clientId = ClientId.randomId(); + ContainerCommandRequestProto request, HddsProtos.PipelineID pipelineID) + throws IOException { + // ReplicationLevel.ALL ensures the transactions corresponding to + // the request here are applied on all the raft servers. RaftClientRequest raftClientRequest = - new RaftClientRequest(clientId, server.getId(), - RatisHelper.emptyRaftGroup().getGroupId(), nextCallId(), 0, - Message.valueOf(request.toByteString()), RaftClientRequest - // ReplicationLevel.ALL ensures the transactions corresponding to - // the request here are applied on all the raft servers. - .writeRequestType(RaftProtos.ReplicationLevel.ALL)); + createRaftClientRequest(request, pipelineID, + RaftClientRequest.writeRequestType(replicationLevel)); CompletableFuture reply = server.submitClientRequestAsync(raftClientRequest); reply.thenAccept(this::processReply); } + + private RaftClientRequest createRaftClientRequest( + ContainerCommandRequestProto request, HddsProtos.PipelineID pipelineID, + RaftClientRequest.Type type) { + return new RaftClientRequest(clientId, server.getId(), + PipelineID.getFromProtobuf(pipelineID).getRaftGroupID(), + nextCallId(), 0, Message.valueOf(request.toByteString()), type); + } + + private void handlePipelineFailure(RaftGroupId groupId, + RoleInfoProto roleInfoProto) { + String msg; + UUID datanode = RatisHelper.toDatanodeId(roleInfoProto.getSelf()); + RaftPeerId id = RaftPeerId.valueOf(roleInfoProto.getSelf().getId()); + switch (roleInfoProto.getRole()) { + case CANDIDATE: + msg = datanode + " is in candidate state for " + + roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() + "ms"; + break; + case LEADER: + StringBuilder sb = new StringBuilder(); + sb.append(datanode).append(" has not seen follower/s"); + for (RaftProtos.ServerRpcProto follower : roleInfoProto.getLeaderInfo() + .getFollowerInfoList()) { + if (follower.getLastRpcElapsedTimeMs() > nodeFailureTimeoutMs) { + sb.append(" ").append(RatisHelper.toDatanodeId(follower.getId())) + .append(" for ").append(follower.getLastRpcElapsedTimeMs()) + .append("ms"); + } + } + msg = sb.toString(); + break; + default: + LOG.error("unknown state:" + roleInfoProto.getRole()); + throw new IllegalStateException("node" + id + " is in illegal role " + + roleInfoProto.getRole()); + } + + PipelineID pipelineID = PipelineID.valueOf(groupId); + ClosePipelineInfo.Builder closePipelineInfo = + ClosePipelineInfo.newBuilder() + .setPipelineID(pipelineID.getProtobuf()) + .setReason(ClosePipelineInfo.Reason.PIPELINE_FAILED) + .setDetailedReason(msg); + + PipelineAction action = PipelineAction.newBuilder() + .setClosePipeline(closePipelineInfo) + .setAction(PipelineAction.Action.CLOSE) + .build(); + context.addPipelineActionIfAbsent(action); + LOG.debug( + "pipeline Action " + action.getAction() + " on pipeline " + pipelineID + + ".Reason : " + action.getClosePipeline().getDetailedReason()); + } + + void handleNodeSlowness( + RaftGroup group, RoleInfoProto roleInfoProto) { + handlePipelineFailure(group.getGroupId(), roleInfoProto); + } + + void handleNoLeader( + RaftGroup group, RoleInfoProto roleInfoProto) { + handlePipelineFailure(group.getGroupId(), roleInfoProto); + } } \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java index 7f611b92bcd..c63eb731c7a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerCache.java @@ -121,9 +121,12 @@ protected boolean removeLRU(LinkEntry entry) { * Returns a DB handle if available, create the handler otherwise. * * @param containerID - ID of the container. + * @param containerDBType - DB type of the container. + * @param containerDBPath - DB path of the container. * @return MetadataStore. */ - public MetadataStore getDB(long containerID, String containerDBPath) + public MetadataStore getDB(long containerID, String containerDBType, String + containerDBPath) throws IOException { Preconditions.checkState(containerID >= 0, "Container ID cannot be negative."); @@ -135,6 +138,7 @@ public MetadataStore getDB(long containerID, String containerDBPath) db = MetadataStoreBuilder.newBuilder() .setDbFile(new File(containerDBPath)) .setCreateIfMissing(false) + .setDBType(containerDBType) .build(); this.put(containerID, db); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/HddsVolumeUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/HddsVolumeUtil.java new file mode 100644 index 00000000000..bc0bd056b1d --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/HddsVolumeUtil.java @@ -0,0 +1,219 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.utils; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.common.InconsistentStorageStateException; +import org.apache.hadoop.ozone.container.common.DataNodeLayoutVersion; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; + +import java.io.File; +import java.io.IOException; +import java.util.Properties; +import java.util.UUID; + +/** + * A util class for {@link HddsVolume}. + */ +public final class HddsVolumeUtil { + + // Private constructor for Utility class. Unused. + private HddsVolumeUtil() { + } + + private static final String VERSION_FILE = "VERSION"; + private static final String STORAGE_ID_PREFIX = "DS-"; + + public static File getVersionFile(File rootDir) { + return new File(rootDir, VERSION_FILE); + } + + public static String generateUuid() { + return STORAGE_ID_PREFIX + UUID.randomUUID(); + } + + /** + * Get hddsRoot from volume root. If volumeRoot points to hddsRoot, it is + * returned as is. + * For a volumeRoot /data/disk1, the hddsRoot is /data/disk1/hdds. + * @param volumeRoot root of the volume. + * @return hddsRoot of the volume. + */ + public static String getHddsRoot(String volumeRoot) { + if (volumeRoot.endsWith(HddsVolume.HDDS_VOLUME_DIR)) { + return volumeRoot; + } else { + File hddsRoot = new File(volumeRoot, HddsVolume.HDDS_VOLUME_DIR); + return hddsRoot.getPath(); + } + } + + /** + * Returns storageID if it is valid. Throws an exception otherwise. + */ + @VisibleForTesting + public static String getStorageID(Properties props, File versionFile) + throws InconsistentStorageStateException { + return getProperty(props, OzoneConsts.STORAGE_ID, versionFile); + } + + /** + * Returns clusterID if it is valid. It should match the clusterID from the + * Datanode. Throws an exception otherwise. + */ + @VisibleForTesting + public static String getClusterID(Properties props, File versionFile, + String clusterID) throws InconsistentStorageStateException { + String cid = getProperty(props, OzoneConsts.CLUSTER_ID, versionFile); + + if (clusterID == null) { + return cid; + } + if (!clusterID.equals(cid)) { + throw new InconsistentStorageStateException("Mismatched " + + "ClusterIDs. Version File : " + versionFile + " has clusterID: " + + cid + " and Datanode has clusterID: " + clusterID); + } + return cid; + } + + /** + * Returns datanodeUuid if it is valid. It should match the UUID of the + * Datanode. Throws an exception otherwise. + */ + @VisibleForTesting + public static String getDatanodeUUID(Properties props, File versionFile, + String datanodeUuid) + throws InconsistentStorageStateException { + String datanodeID = getProperty(props, OzoneConsts.DATANODE_UUID, + versionFile); + + if (datanodeUuid != null && !datanodeUuid.equals(datanodeID)) { + throw new InconsistentStorageStateException("Mismatched " + + "DatanodeUUIDs. Version File : " + versionFile + " has datanodeUuid: " + + datanodeID + " and Datanode has datanodeUuid: " + datanodeUuid); + } + return datanodeID; + } + + /** + * Returns creationTime if it is valid. Throws an exception otherwise. + */ + @VisibleForTesting + public static long getCreationTime(Properties props, File versionFile) + throws InconsistentStorageStateException { + String cTimeStr = getProperty(props, OzoneConsts.CTIME, versionFile); + + long cTime = Long.parseLong(cTimeStr); + long currentTime = Time.now(); + if (cTime > currentTime || cTime < 0) { + throw new InconsistentStorageStateException("Invalid Creation time in " + + "Version File : " + versionFile + " - " + cTime + ". Current system" + + " time is " + currentTime); + } + return cTime; + } + + /** + * Returns layOutVersion if it is valid. Throws an exception otherwise. + */ + @VisibleForTesting + public static int getLayOutVersion(Properties props, File versionFile) throws + InconsistentStorageStateException { + String lvStr = getProperty(props, OzoneConsts.LAYOUTVERSION, versionFile); + + int lv = Integer.parseInt(lvStr); + if(DataNodeLayoutVersion.getLatestVersion().getVersion() != lv) { + throw new InconsistentStorageStateException("Invalid layOutVersion. " + + "Version file has layOutVersion as " + lv + " and latest Datanode " + + "layOutVersion is " + + DataNodeLayoutVersion.getLatestVersion().getVersion()); + } + return lv; + } + + private static String getProperty(Properties props, String propName, File + versionFile) + throws InconsistentStorageStateException { + String value = props.getProperty(propName); + if (StringUtils.isBlank(value)) { + throw new InconsistentStorageStateException("Invalid " + propName + + ". Version File : " + versionFile + " has null or empty " + propName); + } + return value; + } + + /** + * Check Volume is consistent state or not. + * @param hddsVolume + * @param scmId + * @param clusterId + * @param logger + * @return true - if volume is in consistent state, otherwise false. + */ + public static boolean checkVolume(HddsVolume hddsVolume, String scmId, String + clusterId, Logger logger) { + File hddsRoot = hddsVolume.getHddsRootDir(); + String volumeRoot = hddsRoot.getPath(); + File scmDir = new File(hddsRoot, scmId); + + try { + hddsVolume.format(clusterId); + } catch (IOException ex) { + logger.error("Error during formatting volume {}, exception is {}", + volumeRoot, ex); + return false; + } + + File[] hddsFiles = hddsRoot.listFiles(); + + if(hddsFiles == null) { + // This is the case for IOException, where listFiles returns null. + // So, we fail the volume. + return false; + } else if (hddsFiles.length == 1) { + // DN started for first time or this is a newly added volume. + // So we create scm directory. + if (!scmDir.mkdir()) { + logger.error("Unable to create scmDir {}", scmDir); + return false; + } + return true; + } else if(hddsFiles.length == 2) { + // The files should be Version and SCM directory + if (scmDir.exists()) { + return true; + } else { + logger.error("Volume {} is in Inconsistent state, expected scm " + + "directory {} does not exist", volumeRoot, scmDir + .getAbsolutePath()); + return false; + } + } else { + // The hdds root dir should always have 2 files. One is Version file + // and other is SCM directory. + return false; + } + + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java new file mode 100644 index 00000000000..6b90146753a --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -0,0 +1,408 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.GetSpaceUsed; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.server.datanode.StorageLocation; +import org.apache.hadoop.ozone.common.InconsistentStorageStateException; +import org.apache.hadoop.ozone.container.common.DataNodeLayoutVersion; +import org.apache.hadoop.ozone.container.common.helpers.DatanodeVersionFile; +import org.apache.hadoop.ozone.container.common.impl.ChunkLayOutVersion; +import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; + +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.Properties; +import java.util.UUID; + +/** + * HddsVolume represents volume in a datanode. {@link VolumeSet} maitains a + * list of HddsVolumes, one for each volume in the Datanode. + * {@link VolumeInfo} in encompassed by this class. + * + * The disk layout per volume is as follows: + * ../hdds/VERSION + * ../hdds/<>/current/<>/<>/metadata + * ../hdds/<>/current/<>/<>/<> + * + * Each hdds volume has its own VERSION file. The hdds volume will have one + * scmUuid directory for each SCM it is a part of (currently only one SCM is + * supported). + * + * During DN startup, if the VERSION file exists, we verify that the + * clusterID in the version file matches the clusterID from SCM. + */ +public final class HddsVolume { + + private static final Logger LOG = LoggerFactory.getLogger(HddsVolume.class); + + public static final String HDDS_VOLUME_DIR = "hdds"; + + private final File hddsRootDir; + private final VolumeInfo volumeInfo; + private VolumeState state; + private final VolumeIOStats volumeIOStats; + + // VERSION file properties + private String storageID; // id of the file system + private String clusterID; // id of the cluster + private String datanodeUuid; // id of the DataNode + private long cTime; // creation time of the file system state + private int layoutVersion; // layout version of the storage data + + /** + * Builder for HddsVolume. + */ + public static class Builder { + private final String volumeRootStr; + private Configuration conf; + private StorageType storageType; + private long configuredCapacity; + + private String datanodeUuid; + private String clusterID; + private boolean failedVolume = false; + + public Builder(String rootDirStr) { + this.volumeRootStr = rootDirStr; + } + + public Builder conf(Configuration config) { + this.conf = config; + return this; + } + + public Builder storageType(StorageType st) { + this.storageType = st; + return this; + } + + public Builder configuredCapacity(long capacity) { + this.configuredCapacity = capacity; + return this; + } + + public Builder datanodeUuid(String datanodeUUID) { + this.datanodeUuid = datanodeUUID; + return this; + } + + public Builder clusterID(String cid) { + this.clusterID = cid; + return this; + } + + // This is added just to create failed volume objects, which will be used + // to create failed HddsVolume objects in the case of any exceptions caused + // during creating HddsVolume object. + public Builder failedVolume(boolean failed) { + this.failedVolume = failed; + return this; + } + + public HddsVolume build() throws IOException { + return new HddsVolume(this); + } + } + + private HddsVolume(Builder b) throws IOException { + if (!b.failedVolume) { + StorageLocation location = StorageLocation.parse(b.volumeRootStr); + hddsRootDir = new File(location.getUri().getPath(), HDDS_VOLUME_DIR); + this.state = VolumeState.NOT_INITIALIZED; + this.clusterID = b.clusterID; + this.datanodeUuid = b.datanodeUuid; + this.volumeIOStats = new VolumeIOStats(); + + VolumeInfo.Builder volumeBuilder = + new VolumeInfo.Builder(b.volumeRootStr, b.conf) + .storageType(b.storageType) + .configuredCapacity(b.configuredCapacity); + this.volumeInfo = volumeBuilder.build(); + + LOG.info("Creating Volume: " + this.hddsRootDir + " of storage type : " + + b.storageType + " and capacity : " + volumeInfo.getCapacity()); + + initialize(); + } else { + // Builder is called with failedVolume set, so create a failed volume + // HddsVolumeObject. + hddsRootDir = new File(b.volumeRootStr); + volumeIOStats = null; + volumeInfo = null; + storageID = UUID.randomUUID().toString(); + state = VolumeState.FAILED; + } + } + + public VolumeInfo getVolumeInfo() { + return volumeInfo; + } + + /** + * Initializes the volume. + * Creates the Version file if not present, + * otherwise returns with IOException. + * @throws IOException + */ + private void initialize() throws IOException { + VolumeState intialVolumeState = analyzeVolumeState(); + switch (intialVolumeState) { + case NON_EXISTENT: + // Root directory does not exist. Create it. + if (!hddsRootDir.mkdir()) { + throw new IOException("Cannot create directory " + hddsRootDir); + } + setState(VolumeState.NOT_FORMATTED); + createVersionFile(); + break; + case NOT_FORMATTED: + // Version File does not exist. Create it. + createVersionFile(); + break; + case NOT_INITIALIZED: + // Version File exists. Verify its correctness and update property fields. + readVersionFile(); + setState(VolumeState.NORMAL); + break; + case INCONSISTENT: + // Volume Root is in an inconsistent state. Skip loading this volume. + throw new IOException("Volume is in an " + VolumeState.INCONSISTENT + + " state. Skipped loading volume: " + hddsRootDir.getPath()); + default: + throw new IOException("Unrecognized initial state : " + + intialVolumeState + "of volume : " + hddsRootDir); + } + } + + private VolumeState analyzeVolumeState() { + if (!hddsRootDir.exists()) { + // Volume Root does not exist. + return VolumeState.NON_EXISTENT; + } + if (!hddsRootDir.isDirectory()) { + // Volume Root exists but is not a directory. + return VolumeState.INCONSISTENT; + } + File[] files = hddsRootDir.listFiles(); + if (files == null || files.length == 0) { + // Volume Root exists and is empty. + return VolumeState.NOT_FORMATTED; + } + if (!getVersionFile().exists()) { + // Volume Root is non empty but VERSION file does not exist. + return VolumeState.INCONSISTENT; + } + // Volume Root and VERSION file exist. + return VolumeState.NOT_INITIALIZED; + } + + public void format(String cid) throws IOException { + Preconditions.checkNotNull(cid, "clusterID cannot be null while " + + "formatting Volume"); + this.clusterID = cid; + initialize(); + } + + /** + * Create Version File and write property fields into it. + * @throws IOException + */ + private void createVersionFile() throws IOException { + this.storageID = HddsVolumeUtil.generateUuid(); + this.cTime = Time.now(); + this.layoutVersion = ChunkLayOutVersion.getLatestVersion().getVersion(); + + if (this.clusterID == null || datanodeUuid == null) { + // HddsDatanodeService does not have the cluster information yet. Wait + // for registration with SCM. + LOG.debug("ClusterID not available. Cannot format the volume {}", + this.hddsRootDir.getPath()); + setState(VolumeState.NOT_FORMATTED); + } else { + // Write the version file to disk. + writeVersionFile(); + setState(VolumeState.NORMAL); + } + } + + private void writeVersionFile() throws IOException { + Preconditions.checkNotNull(this.storageID, + "StorageID cannot be null in Version File"); + Preconditions.checkNotNull(this.clusterID, + "ClusterID cannot be null in Version File"); + Preconditions.checkNotNull(this.datanodeUuid, + "DatanodeUUID cannot be null in Version File"); + Preconditions.checkArgument(this.cTime > 0, + "Creation Time should be positive"); + Preconditions.checkArgument(this.layoutVersion == + DataNodeLayoutVersion.getLatestVersion().getVersion(), + "Version File should have the latest LayOutVersion"); + + File versionFile = getVersionFile(); + LOG.debug("Writing Version file to disk, {}", versionFile); + + DatanodeVersionFile dnVersionFile = new DatanodeVersionFile(this.storageID, + this.clusterID, this.datanodeUuid, this.cTime, this.layoutVersion); + dnVersionFile.createVersionFile(versionFile); + } + + /** + * Read Version File and update property fields. + * Get common storage fields. + * Should be overloaded if additional fields need to be read. + * + * @throws IOException on error + */ + private void readVersionFile() throws IOException { + File versionFile = getVersionFile(); + Properties props = DatanodeVersionFile.readFrom(versionFile); + if (props.isEmpty()) { + throw new InconsistentStorageStateException( + "Version file " + versionFile + " is missing"); + } + + LOG.debug("Reading Version file from disk, {}", versionFile); + this.storageID = HddsVolumeUtil.getStorageID(props, versionFile); + this.clusterID = HddsVolumeUtil.getClusterID(props, versionFile, + this.clusterID); + this.datanodeUuid = HddsVolumeUtil.getDatanodeUUID(props, versionFile, + this.datanodeUuid); + this.cTime = HddsVolumeUtil.getCreationTime(props, versionFile); + this.layoutVersion = HddsVolumeUtil.getLayOutVersion(props, versionFile); + } + + private File getVersionFile() { + return HddsVolumeUtil.getVersionFile(hddsRootDir); + } + + public File getHddsRootDir() { + return hddsRootDir; + } + + public StorageType getStorageType() { + if(volumeInfo != null) { + return volumeInfo.getStorageType(); + } + return StorageType.DEFAULT; + } + + public String getStorageID() { + return storageID; + } + + public String getClusterID() { + return clusterID; + } + + public String getDatanodeUuid() { + return datanodeUuid; + } + + public long getCTime() { + return cTime; + } + + public int getLayoutVersion() { + return layoutVersion; + } + + public VolumeState getStorageState() { + return state; + } + + public long getCapacity() throws IOException { + if(volumeInfo != null) { + return volumeInfo.getCapacity(); + } + return 0; + } + + public long getAvailable() throws IOException { + if(volumeInfo != null) { + return volumeInfo.getAvailable(); + } + return 0; + } + + public void setState(VolumeState state) { + this.state = state; + } + + public boolean isFailed() { + return (state == VolumeState.FAILED); + } + + public VolumeIOStats getVolumeIOStats() { + return volumeIOStats; + } + + public void failVolume() { + setState(VolumeState.FAILED); + if (volumeInfo != null) { + volumeInfo.shutdownUsageThread(); + } + } + + public void shutdown() { + this.state = VolumeState.NON_EXISTENT; + if (volumeInfo != null) { + volumeInfo.shutdownUsageThread(); + } + } + + /** + * VolumeState represents the different states a HddsVolume can be in. + * NORMAL => Volume can be used for storage + * FAILED => Volume has failed due and can no longer be used for + * storing containers. + * NON_EXISTENT => Volume Root dir does not exist + * INCONSISTENT => Volume Root dir is not empty but VERSION file is + * missing or Volume Root dir is not a directory + * NOT_FORMATTED => Volume Root exists but not formatted (no VERSION file) + * NOT_INITIALIZED => VERSION file exists but has not been verified for + * correctness. + */ + public enum VolumeState { + NORMAL, + FAILED, + NON_EXISTENT, + INCONSISTENT, + NOT_FORMATTED, + NOT_INITIALIZED + } + + /** + * Only for testing. Do not use otherwise. + */ + @VisibleForTesting + public void setScmUsageForTesting(GetSpaceUsed scmUsageForTest) { + if (volumeInfo != null) { + volumeInfo.setScmUsageForTesting(scmUsageForTest); + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/RoundRobinVolumeChoosingPolicy.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/RoundRobinVolumeChoosingPolicy.java new file mode 100644 index 00000000000..75c92ec024b --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/RoundRobinVolumeChoosingPolicy.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; +import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Choose volumes in round-robin order. + * The caller should synchronize access to the list of volumes. + */ +public class RoundRobinVolumeChoosingPolicy implements VolumeChoosingPolicy { + + public static final Log LOG = LogFactory.getLog( + RoundRobinVolumeChoosingPolicy.class); + + // Stores the index of the next volume to be returned. + private AtomicInteger nextVolumeIndex = new AtomicInteger(0); + + @Override + public HddsVolume chooseVolume(List volumes, + long maxContainerSize) throws IOException { + + // No volumes available to choose from + if (volumes.size() < 1) { + throw new DiskOutOfSpaceException("No more available volumes"); + } + + // since volumes could've been removed because of the failure + // make sure we are not out of bounds + int nextIndex = nextVolumeIndex.get(); + int currentVolumeIndex = nextIndex < volumes.size() ? nextIndex : 0; + + int startVolumeIndex = currentVolumeIndex; + long maxAvailable = 0; + + while (true) { + final HddsVolume volume = volumes.get(currentVolumeIndex); + long availableVolumeSize = volume.getAvailable(); + + currentVolumeIndex = (currentVolumeIndex + 1) % volumes.size(); + + if (availableVolumeSize > maxContainerSize) { + nextVolumeIndex.compareAndSet(nextIndex, currentVolumeIndex); + return volume; + } + + if (availableVolumeSize > maxAvailable) { + maxAvailable = availableVolumeSize; + } + + if (currentVolumeIndex == startVolumeIndex) { + throw new DiskOutOfSpaceException("Out of space: " + + "The volume with the most available space (=" + maxAvailable + + " B) is less than the container size (=" + maxContainerSize + + " B)."); + } + + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeIOStats.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeIOStats.java new file mode 100644 index 00000000000..9e2eb221e81 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeIOStats.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * This class is used to track Volume IO stats for each HDDS Volume. + */ +public class VolumeIOStats { + + private final AtomicLong readBytes; + private final AtomicLong readOpCount; + private final AtomicLong writeBytes; + private final AtomicLong writeOpCount; + private final AtomicLong readTime; + private final AtomicLong writeTime; + + public VolumeIOStats() { + readBytes = new AtomicLong(0); + readOpCount = new AtomicLong(0); + writeBytes = new AtomicLong(0); + writeOpCount = new AtomicLong(0); + readTime = new AtomicLong(0); + writeTime = new AtomicLong(0); + } + + /** + * Increment number of bytes read from the volume. + * @param bytesRead + */ + public void incReadBytes(long bytesRead) { + readBytes.addAndGet(bytesRead); + } + + /** + * Increment the read operations performed on the volume. + */ + public void incReadOpCount() { + readOpCount.incrementAndGet(); + } + + /** + * Increment number of bytes written on to the volume. + * @param bytesWritten + */ + public void incWriteBytes(long bytesWritten) { + writeBytes.addAndGet(bytesWritten); + } + + /** + * Increment the write operations performed on the volume. + */ + public void incWriteOpCount() { + writeOpCount.incrementAndGet(); + } + + /** + * Increment the time taken by read operation on the volume. + * @param time + */ + public void incReadTime(long time) { + readTime.addAndGet(time); + } + + /** + * Increment the time taken by write operation on the volume. + * @param time + */ + public void incWriteTime(long time) { + writeTime.addAndGet(time); + } + + /** + * Returns total number of bytes read from the volume. + * @return long + */ + public long getReadBytes() { + return readBytes.get(); + } + + /** + * Returns total number of bytes written to the volume. + * @return long + */ + public long getWriteBytes() { + return writeBytes.get(); + } + + /** + * Returns total number of read operations performed on the volume. + * @return long + */ + public long getReadOpCount() { + return readOpCount.get(); + } + + /** + * Returns total number of write operations performed on the volume. + * @return long + */ + public long getWriteOpCount() { + return writeOpCount.get(); + } + + /** + * Returns total read operations time on the volume. + * @return long + */ + public long getReadTime() { + return readTime.get(); + } + + /** + * Returns total write operations time on the volume. + * @return long + */ + public long getWriteTime() { + return writeTime.get(); + } + + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java new file mode 100644 index 00000000000..62fca6312d8 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfo.java @@ -0,0 +1,140 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.GetSpaceUsed; +import org.apache.hadoop.fs.StorageType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; + +/** + * Stores information about a disk/volume. + */ +public class VolumeInfo { + + private static final Logger LOG = LoggerFactory.getLogger(VolumeInfo.class); + + private final String rootDir; + private final StorageType storageType; + + // Space usage calculator + private VolumeUsage usage; + // Capacity configured. This is useful when we want to + // limit the visible capacity for tests. If negative, then we just + // query from the filesystem. + private long configuredCapacity; + + /** + * Builder for VolumeInfo. + */ + public static class Builder { + private final Configuration conf; + private final String rootDir; + private StorageType storageType; + private long configuredCapacity; + + public Builder(String root, Configuration config) { + this.rootDir = root; + this.conf = config; + } + + public Builder storageType(StorageType st) { + this.storageType = st; + return this; + } + + public Builder configuredCapacity(long capacity) { + this.configuredCapacity = capacity; + return this; + } + + public VolumeInfo build() throws IOException { + return new VolumeInfo(this); + } + } + + private VolumeInfo(Builder b) throws IOException { + + this.rootDir = b.rootDir; + File root = new File(this.rootDir); + + Boolean succeeded = root.isDirectory() || root.mkdirs(); + + if (!succeeded) { + LOG.error("Unable to create the volume root dir at : {}", root); + throw new IOException("Unable to create the volume root dir at " + root); + } + + this.storageType = (b.storageType != null ? + b.storageType : StorageType.DEFAULT); + + this.configuredCapacity = (b.configuredCapacity != 0 ? + b.configuredCapacity : -1); + + this.usage = new VolumeUsage(root, b.conf); + } + + public long getCapacity() { + return configuredCapacity < 0 ? usage.getCapacity() : configuredCapacity; + } + + public long getAvailable() throws IOException { + return usage.getAvailable(); + } + + public long getScmUsed() throws IOException { + return usage.getScmUsed(); + } + + protected void shutdownUsageThread() { + if (usage != null) { + usage.shutdown(); + } + usage = null; + } + + public String getRootDir() { + return this.rootDir; + } + + public StorageType getStorageType() { + return this.storageType; + } + + /** + * Only for testing. Do not use otherwise. + */ + @VisibleForTesting + public void setScmUsageForTesting(GetSpaceUsed scmUsageForTest) { + usage.setScmUsageForTesting(scmUsageForTest); + } + + /** + * Only for testing. Do not use otherwise. + */ + @VisibleForTesting + public VolumeUsage getUsageForTesting() { + return usage; + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeSet.java new file mode 100644 index 00000000000..06f48fc2936 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeSet.java @@ -0,0 +1,406 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StorageType; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; +import static org.apache.hadoop.util.RunJar.SHUTDOWN_HOOK_PRIORITY; + +import org.apache.hadoop.hdfs.server.datanode.StorageLocation; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.NodeReportProto; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.common.InconsistentStorageStateException; +import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport; +import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume.VolumeState; +import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; +import org.apache.hadoop.util.AutoCloseableLock; +import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; +import org.apache.hadoop.util.InstrumentedLock; +import org.apache.hadoop.util.ShutdownHookManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.EnumMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantLock; + +/** + * VolumeSet to manage volumes in a DataNode. + */ +public class VolumeSet { + + private static final Logger LOG = LoggerFactory.getLogger(VolumeSet.class); + + private Configuration conf; + + /** + * {@link VolumeSet#volumeMap} maintains a map of all active volumes in the + * DataNode. Each volume has one-to-one mapping with a volumeInfo object. + */ + private Map volumeMap; + /** + * {@link VolumeSet#failedVolumeMap} maintains a map of volumes which have + * failed. The keys in this map and {@link VolumeSet#volumeMap} are + * mutually exclusive. + */ + private Map failedVolumeMap; + + /** + * {@link VolumeSet#volumeStateMap} maintains a list of active volumes per + * StorageType. + */ + private EnumMap> volumeStateMap; + + /** + * Lock to synchronize changes to the VolumeSet. Any update to + * {@link VolumeSet#volumeMap}, {@link VolumeSet#failedVolumeMap}, or + * {@link VolumeSet#volumeStateMap} should be done after acquiring this lock. + */ + private final AutoCloseableLock volumeSetLock; + + private final String datanodeUuid; + private String clusterID; + + private Runnable shutdownHook; + + public VolumeSet(String dnUuid, Configuration conf) + throws IOException { + this(dnUuid, null, conf); + } + + public VolumeSet(String dnUuid, String clusterID, Configuration conf) + throws IOException { + this.datanodeUuid = dnUuid; + this.clusterID = clusterID; + this.conf = conf; + this.volumeSetLock = new AutoCloseableLock( + new InstrumentedLock(getClass().getName(), LOG, + new ReentrantLock(true), + conf.getTimeDuration( + OzoneConfigKeys.HDDS_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY, + OzoneConfigKeys.HDDS_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT, + TimeUnit.MILLISECONDS), + conf.getTimeDuration( + OzoneConfigKeys.HDDS_LOCK_SUPPRESS_WARNING_INTERVAL_MS_KEY, + OzoneConfigKeys.HDDS_LOCK_SUPPRESS_WARNING_INTERVAL_MS_DEAFULT, + TimeUnit.MILLISECONDS))); + + initializeVolumeSet(); + } + + // Add DN volumes configured through ConfigKeys to volumeMap. + private void initializeVolumeSet() throws IOException { + volumeMap = new ConcurrentHashMap<>(); + failedVolumeMap = new ConcurrentHashMap<>(); + volumeStateMap = new EnumMap<>(StorageType.class); + + Collection rawLocations = conf.getTrimmedStringCollection( + HDDS_DATANODE_DIR_KEY); + if (rawLocations.isEmpty()) { + rawLocations = conf.getTrimmedStringCollection(DFS_DATANODE_DATA_DIR_KEY); + } + if (rawLocations.isEmpty()) { + throw new IllegalArgumentException("No location configured in either " + + HDDS_DATANODE_DIR_KEY + " or " + DFS_DATANODE_DATA_DIR_KEY); + } + + for (StorageType storageType : StorageType.values()) { + volumeStateMap.put(storageType, new ArrayList()); + } + + for (String locationString : rawLocations) { + try { + StorageLocation location = StorageLocation.parse(locationString); + + HddsVolume hddsVolume = createVolume(location.getUri().getPath(), + location.getStorageType()); + + checkAndSetClusterID(hddsVolume.getClusterID()); + + volumeMap.put(hddsVolume.getHddsRootDir().getPath(), hddsVolume); + volumeStateMap.get(hddsVolume.getStorageType()).add(hddsVolume); + LOG.info("Added Volume : {} to VolumeSet", + hddsVolume.getHddsRootDir().getPath()); + } catch (IOException e) { + HddsVolume volume = new HddsVolume.Builder(locationString) + .failedVolume(true).build(); + failedVolumeMap.put(locationString, volume); + LOG.error("Failed to parse the storage location: " + locationString, e); + } + } + + if (volumeMap.size() == 0) { + throw new DiskOutOfSpaceException("No storage location configured"); + } + + // Ensure volume threads are stopped and scm df is saved during shutdown. + shutdownHook = () -> { + saveVolumeSetUsed(); + }; + ShutdownHookManager.get().addShutdownHook(shutdownHook, + SHUTDOWN_HOOK_PRIORITY); + } + + /** + * If Version file exists and the {@link VolumeSet#clusterID} is not set yet, + * assign it the value from Version file. Otherwise, check that the given + * id matches with the id from version file. + * @param idFromVersionFile value of the property from Version file + * @throws InconsistentStorageStateException + */ + private void checkAndSetClusterID(String idFromVersionFile) + throws InconsistentStorageStateException { + // If the clusterID is null (not set), assign it the value + // from version file. + if (this.clusterID == null) { + this.clusterID = idFromVersionFile; + return; + } + + // If the clusterID is already set, it should match with the value from the + // version file. + if (!idFromVersionFile.equals(this.clusterID)) { + throw new InconsistentStorageStateException( + "Mismatched ClusterIDs. VolumeSet has: " + this.clusterID + + ", and version file has: " + idFromVersionFile); + } + } + + public void acquireLock() { + volumeSetLock.acquire(); + } + + public void releaseLock() { + volumeSetLock.release(); + } + + private HddsVolume createVolume(String locationString, + StorageType storageType) throws IOException { + HddsVolume.Builder volumeBuilder = new HddsVolume.Builder(locationString) + .conf(conf) + .datanodeUuid(datanodeUuid) + .clusterID(clusterID) + .storageType(storageType); + return volumeBuilder.build(); + } + + + // Add a volume to VolumeSet + public boolean addVolume(String dataDir) { + return addVolume(dataDir, StorageType.DEFAULT); + } + + // Add a volume to VolumeSet + public boolean addVolume(String volumeRoot, StorageType storageType) { + String hddsRoot = HddsVolumeUtil.getHddsRoot(volumeRoot); + boolean success; + + try (AutoCloseableLock lock = volumeSetLock.acquire()) { + if (volumeMap.containsKey(hddsRoot)) { + LOG.warn("Volume : {} already exists in VolumeMap", hddsRoot); + success = false; + } else { + if (failedVolumeMap.containsKey(hddsRoot)) { + failedVolumeMap.remove(hddsRoot); + } + + HddsVolume hddsVolume = createVolume(volumeRoot, storageType); + volumeMap.put(hddsVolume.getHddsRootDir().getPath(), hddsVolume); + volumeStateMap.get(hddsVolume.getStorageType()).add(hddsVolume); + + LOG.info("Added Volume : {} to VolumeSet", + hddsVolume.getHddsRootDir().getPath()); + success = true; + } + } catch (IOException ex) { + LOG.error("Failed to add volume " + volumeRoot + " to VolumeSet", ex); + success = false; + } + return success; + } + + // Mark a volume as failed + public void failVolume(String dataDir) { + String hddsRoot = HddsVolumeUtil.getHddsRoot(dataDir); + + try (AutoCloseableLock lock = volumeSetLock.acquire()) { + if (volumeMap.containsKey(hddsRoot)) { + HddsVolume hddsVolume = volumeMap.get(hddsRoot); + hddsVolume.failVolume(); + + volumeMap.remove(hddsRoot); + volumeStateMap.get(hddsVolume.getStorageType()).remove(hddsVolume); + failedVolumeMap.put(hddsRoot, hddsVolume); + + LOG.info("Moving Volume : {} to failed Volumes", hddsRoot); + } else if (failedVolumeMap.containsKey(hddsRoot)) { + LOG.info("Volume : {} is not active", hddsRoot); + } else { + LOG.warn("Volume : {} does not exist in VolumeSet", hddsRoot); + } + } + } + + // Remove a volume from the VolumeSet completely. + public void removeVolume(String dataDir) throws IOException { + String hddsRoot = HddsVolumeUtil.getHddsRoot(dataDir); + + try (AutoCloseableLock lock = volumeSetLock.acquire()) { + if (volumeMap.containsKey(hddsRoot)) { + HddsVolume hddsVolume = volumeMap.get(hddsRoot); + hddsVolume.shutdown(); + + volumeMap.remove(hddsRoot); + volumeStateMap.get(hddsVolume.getStorageType()).remove(hddsVolume); + + LOG.info("Removed Volume : {} from VolumeSet", hddsRoot); + } else if (failedVolumeMap.containsKey(hddsRoot)) { + HddsVolume hddsVolume = failedVolumeMap.get(hddsRoot); + hddsVolume.setState(VolumeState.NON_EXISTENT); + + failedVolumeMap.remove(hddsRoot); + LOG.info("Removed Volume : {} from failed VolumeSet", hddsRoot); + } else { + LOG.warn("Volume : {} does not exist in VolumeSet", hddsRoot); + } + } + } + + public HddsVolume chooseVolume(long containerSize, + VolumeChoosingPolicy choosingPolicy) throws IOException { + return choosingPolicy.chooseVolume(getVolumesList(), containerSize); + } + + /** + * This method, call shutdown on each volume to shutdown volume usage + * thread and write scmUsed on each volume. + */ + private void saveVolumeSetUsed() { + for (HddsVolume hddsVolume : volumeMap.values()) { + try { + hddsVolume.shutdown(); + } catch (Exception ex) { + LOG.error("Failed to shutdown volume : " + hddsVolume.getHddsRootDir(), + ex); + } + } + } + + /** + * Shutdown's the volumeset, if saveVolumeSetUsed is false, call's + * {@link VolumeSet#saveVolumeSetUsed}. + */ + public void shutdown() { + saveVolumeSetUsed(); + if (shutdownHook != null) { + ShutdownHookManager.get().removeShutdownHook(shutdownHook); + } + } + + @VisibleForTesting + public List getVolumesList() { + return ImmutableList.copyOf(volumeMap.values()); + } + + @VisibleForTesting + public List getFailedVolumesList() { + return ImmutableList.copyOf(failedVolumeMap.values()); + } + + @VisibleForTesting + public Map getVolumeMap() { + return ImmutableMap.copyOf(volumeMap); + } + + @VisibleForTesting + public Map> getVolumeStateMap() { + return ImmutableMap.copyOf(volumeStateMap); + } + + public StorageContainerDatanodeProtocolProtos.NodeReportProto getNodeReport() + throws IOException { + boolean failed; + StorageLocationReport[] reports = new StorageLocationReport[volumeMap + .size() + failedVolumeMap.size()]; + int counter = 0; + HddsVolume hddsVolume; + for (Map.Entry entry : volumeMap.entrySet()) { + hddsVolume = entry.getValue(); + VolumeInfo volumeInfo = hddsVolume.getVolumeInfo(); + long scmUsed = 0; + long remaining = 0; + failed = false; + try { + scmUsed = volumeInfo.getScmUsed(); + remaining = volumeInfo.getAvailable(); + } catch (IOException ex) { + LOG.warn("Failed to get scmUsed and remaining for container " + + "storage location {}", volumeInfo.getRootDir()); + // reset scmUsed and remaining if df/du failed. + scmUsed = 0; + remaining = 0; + failed = true; + } + + StorageLocationReport.Builder builder = + StorageLocationReport.newBuilder(); + builder.setStorageLocation(volumeInfo.getRootDir()) + .setId(hddsVolume.getStorageID()) + .setFailed(failed) + .setCapacity(hddsVolume.getCapacity()) + .setRemaining(remaining) + .setScmUsed(scmUsed) + .setStorageType(hddsVolume.getStorageType()); + StorageLocationReport r = builder.build(); + reports[counter++] = r; + } + for (Map.Entry entry : failedVolumeMap.entrySet()) { + hddsVolume = entry.getValue(); + StorageLocationReport.Builder builder = StorageLocationReport + .newBuilder(); + builder.setStorageLocation(hddsVolume.getHddsRootDir() + .getAbsolutePath()).setId(hddsVolume.getStorageID()).setFailed(true) + .setCapacity(0).setRemaining(0).setScmUsed(0).setStorageType( + hddsVolume.getStorageType()); + StorageLocationReport r = builder.build(); + reports[counter++] = r; + } + NodeReportProto.Builder nrb = NodeReportProto.newBuilder(); + for (int i = 0; i < reports.length; i++) { + nrb.addStorageReport(reports[i].getProtoBufMessage()); + } + return nrb.build(); + } +} \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerStorageLocation.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java similarity index 59% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerStorageLocation.java rename to hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java index 7431baa9f26..2c7563e0859 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerStorageLocation.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeUsage.java @@ -16,18 +16,14 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.container.common.impl; +package org.apache.hadoop.ozone.container.common.volume; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CachingGetSpaceUsed; import org.apache.hadoop.fs.DF; import org.apache.hadoop.fs.GetSpaceUsed; -import org.apache.hadoop.fs.StorageType; -import org.apache.hadoop.hdfs.server.datanode.StorageLocation; -import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,102 +33,67 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; -import java.net.URI; import java.nio.charset.StandardCharsets; -import java.nio.file.Paths; import java.util.Scanner; -import static org.apache.hadoop.util.RunJar.SHUTDOWN_HOOK_PRIORITY; - /** - * Class that wraps the space usage of the Datanode Container Storage Location - * by SCM containers. + * Class that wraps the space df of the Datanode Volumes used by SCM + * containers. */ -public class ContainerStorageLocation { - private static final Logger LOG = - LoggerFactory.getLogger(ContainerStorageLocation.class); +public class VolumeUsage { + private static final Logger LOG = LoggerFactory.getLogger(VolumeUsage.class); + + private final File rootDir; + private final DF df; + private final File scmUsedFile; + private GetSpaceUsed scmUsage; private static final String DU_CACHE_FILE = "scmUsed"; private volatile boolean scmUsedSaved = false; - private final StorageLocation dataLocation; - private final String storageUuId; - private final DF usage; - private final GetSpaceUsed scmUsage; - private final File scmUsedFile; - - public ContainerStorageLocation(StorageLocation dataLoc, Configuration conf) + VolumeUsage(File dataLoc, Configuration conf) throws IOException { - this.dataLocation = dataLoc; - this.storageUuId = DatanodeStorage.generateUuid(); - File dataDir = Paths.get(dataLoc.getNormalizedUri()).resolve( - OzoneConsts.CONTAINER_PREFIX).toFile(); - // Initialize container data root if it does not exist as required by DF/DU - if (!dataDir.exists()) { - if (!dataDir.mkdirs()) { - LOG.error("Unable to create the container storage location at : {}", - dataDir); - throw new IllegalArgumentException("Unable to create the container" + - " storage location at : " + dataDir); - } - } - scmUsedFile = new File(dataDir, DU_CACHE_FILE); - // get overall disk usage - this.usage = new DF(dataDir, conf); - // get SCM specific usage - this.scmUsage = new CachingGetSpaceUsed.Builder().setPath(dataDir) - .setConf(conf) - .setInitialUsed(loadScmUsed()) - .build(); + this.rootDir = dataLoc; - // Ensure scm usage is saved during shutdown. - ShutdownHookManager.get().addShutdownHook( - new Runnable() { - @Override - public void run() { - if (!scmUsedSaved) { - saveScmUsed(); - } - } - }, SHUTDOWN_HOOK_PRIORITY); - } + // SCM used cache file + scmUsedFile = new File(rootDir, DU_CACHE_FILE); + // get overall disk df + this.df = new DF(rootDir, conf); - public URI getNormalizedUri() { - return dataLocation.getNormalizedUri(); + startScmUsageThread(conf); } - public String getStorageUuId() { - return storageUuId; + void startScmUsageThread(Configuration conf) throws IOException { + // get SCM specific df + this.scmUsage = new CachingGetSpaceUsed.Builder().setPath(rootDir) + .setConf(conf) + .setInitialUsed(loadScmUsed()) + .build(); } - public long getCapacity() { - long capacity = usage.getCapacity(); + + long getCapacity() { + long capacity = df.getCapacity(); return (capacity > 0) ? capacity : 0; } - public long getAvailable() throws IOException { + /* + * Calculate the available space in the volume. + */ + long getAvailable() throws IOException { long remaining = getCapacity() - getScmUsed(); - long available = usage.getAvailable(); + long available = df.getAvailable(); if (remaining > available) { remaining = available; } return (remaining > 0) ? remaining : 0; } - public long getScmUsed() throws IOException{ + long getScmUsed() throws IOException{ return scmUsage.getUsed(); } - public String getStorageLocation() { - return getNormalizedUri().getRawPath(); - } - - public StorageType getStorageType() { - return dataLocation.getStorageType(); - } - public void shutdown() { saveScmUsed(); - scmUsedSaved = true; if (scmUsage instanceof CachingGetSpaceUsed) { IOUtils.cleanupWithLogger(null, ((CachingGetSpaceUsed) scmUsage)); @@ -172,7 +133,7 @@ long loadScmUsed() { // Return the cached value if mtime is okay. if (mtime > 0 && (Time.now() - mtime < 600000L)) { - LOG.info("Cached ScmUsed found for {} : {} ", dataLocation, + LOG.info("Cached ScmUsed found for {} : {} ", rootDir, cachedScmUsed); return cachedScmUsed; } @@ -187,7 +148,7 @@ long loadScmUsed() { */ void saveScmUsed() { if (scmUsedFile.exists() && !scmUsedFile.delete()) { - LOG.warn("Failed to delete old scmUsed file in {}.", dataLocation); + LOG.warn("Failed to delete old scmUsed file in {}.", rootDir); } OutputStreamWriter out = null; try { @@ -209,4 +170,12 @@ void saveScmUsed() { IOUtils.cleanupWithLogger(null, out); } } + + /** + * Only for testing. Do not use otherwise. + */ + @VisibleForTesting + public void setScmUsageForTesting(GetSpaceUsed scmUsageForTest) { + this.scmUsage = scmUsageForTest; + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/package-info.java new file mode 100644 index 00000000000..86093c6015c --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; +/** + This package contains volume/ disk related classes. + */ \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueBlockIterator.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueBlockIterator.java new file mode 100644 index 00000000000..f800223fbd3 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueBlockIterator.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.helpers.KeyData; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; +import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyUtils; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerLocationUtil; +import org.apache.hadoop.utils.MetaStoreIterator; +import org.apache.hadoop.utils.MetadataKeyFilters; +import org.apache.hadoop.utils.MetadataKeyFilters.KeyPrefixFilter; +import org.apache.hadoop.utils.MetadataStore; +import org.apache.hadoop.utils.MetadataStore.KeyValue; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.NoSuchElementException; + + +/** + * Block Iterator for KeyValue Container. This block iterator returns blocks + * which match with the {@link MetadataKeyFilters.KeyPrefixFilter}. If no + * filter is specified, then default filter used is + * {@link MetadataKeyFilters#getNormalKeyFilter()} + */ +@InterfaceAudience.Public +public class KeyValueBlockIterator implements BlockIterator { + + private static final Logger LOG = LoggerFactory.getLogger( + KeyValueBlockIterator.class); + + private MetaStoreIterator blockIterator; + private static KeyPrefixFilter defaultBlockFilter = MetadataKeyFilters + .getNormalKeyFilter(); + private KeyPrefixFilter blockFilter; + private KeyData nextBlock; + private long containerId; + + /** + * KeyValueBlockIterator to iterate blocks in a container. + * @param id - container id + * @param path - container base path + * @throws IOException + */ + + public KeyValueBlockIterator(long id, File path) + throws IOException { + this(id, path, defaultBlockFilter); + } + + /** + * KeyValueBlockIterator to iterate blocks in a container. + * @param id - container id + * @param path - container base path + * @param filter - Block filter, filter to be applied for blocks + * @throws IOException + */ + public KeyValueBlockIterator(long id, File path, KeyPrefixFilter filter) + throws IOException { + containerId = id; + File metdataPath = new File(path, OzoneConsts.METADATA); + File containerFile = ContainerUtils.getContainerFile(metdataPath + .getParentFile()); + ContainerData containerData = ContainerDataYaml.readContainerFile( + containerFile); + KeyValueContainerData keyValueContainerData = (KeyValueContainerData) + containerData; + keyValueContainerData.setDbFile(KeyValueContainerLocationUtil + .getContainerDBFile(metdataPath, containerId)); + MetadataStore metadataStore = KeyUtils.getDB(keyValueContainerData, new + OzoneConfiguration()); + blockIterator = metadataStore.iterator(); + blockFilter = filter; + } + + /** + * This method returns blocks matching with the filter. + * @return next block or null if no more blocks + * @throws IOException + */ + @Override + public KeyData nextBlock() throws IOException, NoSuchElementException { + if (nextBlock != null) { + KeyData currentBlock = nextBlock; + nextBlock = null; + return currentBlock; + } + if(hasNext()) { + return nextBlock(); + } + throw new NoSuchElementException("Block Iterator reached end for " + + "ContainerID " + containerId); + } + + @Override + public boolean hasNext() throws IOException { + if (nextBlock != null) { + return true; + } + if (blockIterator.hasNext()) { + KeyValue block = blockIterator.next(); + if (blockFilter.filterKey(null, block.getKey(), null)) { + nextBlock = KeyUtils.getKeyData(block.getValue()); + LOG.trace("Block matching with filter found: blockID is : {} for " + + "containerID {}", nextBlock.getLocalID(), containerId); + return true; + } + hasNext(); + } + return false; + } + + @Override + public void seekToFirst() { + nextBlock = null; + blockIterator.seekToFirst(); + } + + @Override + public void seekToLast() { + nextBlock = null; + blockIterator.seekToLast(); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java new file mode 100644 index 00000000000..b893a389f14 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java @@ -0,0 +1,575 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.Map; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerLifeCycleState; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerType; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; +import org.apache.hadoop.io.nativeio.NativeIO; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerPacker; +import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyUtils; +import org.apache.hadoop.ozone.container.keyvalue.helpers + .KeyValueContainerLocationUtil; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil; +import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; +import org.apache.hadoop.utils.MetadataStore; + +import com.google.common.base.Preconditions; +import org.apache.commons.io.FileUtils; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.CONTAINER_ALREADY_EXISTS; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.CONTAINER_FILES_CREATE_ERROR; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.CONTAINER_INTERNAL_ERROR; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.DISK_OUT_OF_SPACE; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.ERROR_IN_COMPACT_DB; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.INVALID_CONTAINER_STATE; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.UNSUPPORTED_REQUEST; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class to perform KeyValue Container operations. + */ +public class KeyValueContainer implements Container { + + private static final Logger LOG = LoggerFactory.getLogger(Container.class); + + // Use a non-fair RW lock for better throughput, we may revisit this decision + // if this causes fairness issues. + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + private final KeyValueContainerData containerData; + private Configuration config; + + public KeyValueContainer(KeyValueContainerData containerData, Configuration + ozoneConfig) { + Preconditions.checkNotNull(containerData, "KeyValueContainerData cannot " + + "be null"); + Preconditions.checkNotNull(ozoneConfig, "Ozone configuration cannot " + + "be null"); + this.config = ozoneConfig; + this.containerData = containerData; + } + + @Override + public void create(VolumeSet volumeSet, VolumeChoosingPolicy + volumeChoosingPolicy, String scmId) throws StorageContainerException { + Preconditions.checkNotNull(volumeChoosingPolicy, "VolumeChoosingPolicy " + + "cannot be null"); + Preconditions.checkNotNull(volumeSet, "VolumeSet cannot be null"); + Preconditions.checkNotNull(scmId, "scmId cannot be null"); + + File containerMetaDataPath = null; + //acquiring volumeset lock and container lock + volumeSet.acquireLock(); + long maxSize = containerData.getMaxSize(); + try { + HddsVolume containerVolume = volumeChoosingPolicy.chooseVolume(volumeSet + .getVolumesList(), maxSize); + String hddsVolumeDir = containerVolume.getHddsRootDir().toString(); + + long containerID = containerData.getContainerID(); + + containerMetaDataPath = KeyValueContainerLocationUtil + .getContainerMetaDataPath(hddsVolumeDir, scmId, containerID); + containerData.setMetadataPath(containerMetaDataPath.getPath()); + + File chunksPath = KeyValueContainerLocationUtil.getChunksLocationPath( + hddsVolumeDir, scmId, containerID); + + // Check if it is new Container. + ContainerUtils.verifyIsNewContainer(containerMetaDataPath); + + //Create Metadata path chunks path and metadata db + File dbFile = getContainerDBFile(); + KeyValueContainerUtil.createContainerMetaData(containerMetaDataPath, + chunksPath, dbFile, config); + + String impl = config.getTrimmed(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL, + OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_DEFAULT); + + //Set containerData for the KeyValueContainer. + containerData.setChunksPath(chunksPath.getPath()); + containerData.setContainerDBType(impl); + containerData.setDbFile(dbFile); + containerData.setVolume(containerVolume); + + // Create .container file + File containerFile = getContainerFile(); + createContainerFile(containerFile); + + } catch (StorageContainerException ex) { + if (containerMetaDataPath != null && containerMetaDataPath.getParentFile() + .exists()) { + FileUtil.fullyDelete(containerMetaDataPath.getParentFile()); + } + throw ex; + } catch (DiskOutOfSpaceException ex) { + throw new StorageContainerException("Container creation failed, due to " + + "disk out of space", ex, DISK_OUT_OF_SPACE); + } catch (FileAlreadyExistsException ex) { + throw new StorageContainerException("Container creation failed because " + + "ContainerFile already exists", ex, CONTAINER_ALREADY_EXISTS); + } catch (IOException ex) { + if (containerMetaDataPath != null && containerMetaDataPath.getParentFile() + .exists()) { + FileUtil.fullyDelete(containerMetaDataPath.getParentFile()); + } + throw new StorageContainerException("Container creation failed.", ex, + CONTAINER_INTERNAL_ERROR); + } finally { + volumeSet.releaseLock(); + } + } + + /** + * Set all of the path realted container data fields based on the name + * conventions. + * + * @param scmId + * @param containerVolume + * @param hddsVolumeDir + */ + public void populatePathFields(String scmId, + HddsVolume containerVolume, String hddsVolumeDir) { + + long containerId = containerData.getContainerID(); + + File containerMetaDataPath = KeyValueContainerLocationUtil + .getContainerMetaDataPath(hddsVolumeDir, scmId, containerId); + + File chunksPath = KeyValueContainerLocationUtil.getChunksLocationPath( + hddsVolumeDir, scmId, containerId); + File dbFile = KeyValueContainerLocationUtil.getContainerDBFile( + containerMetaDataPath, containerId); + + //Set containerData for the KeyValueContainer. + containerData.setMetadataPath(containerMetaDataPath.getPath()); + containerData.setChunksPath(chunksPath.getPath()); + containerData.setDbFile(dbFile); + containerData.setVolume(containerVolume); + } + + /** + * Writes to .container file. + * + * @param containerFile container file name + * @param isCreate True if creating a new file. False is updating an + * existing container file. + * @throws StorageContainerException + */ + private void writeToContainerFile(File containerFile, boolean isCreate) + throws StorageContainerException { + File tempContainerFile = null; + long containerId = containerData.getContainerID(); + try { + tempContainerFile = createTempFile(containerFile); + ContainerDataYaml.createContainerFile( + ContainerType.KeyValueContainer, containerData, tempContainerFile); + + // NativeIO.renameTo is an atomic function. But it might fail if the + // container file already exists. Hence, we handle the two cases + // separately. + if (isCreate) { + NativeIO.renameTo(tempContainerFile, containerFile); + } else { + Files.move(tempContainerFile.toPath(), containerFile.toPath(), + StandardCopyOption.REPLACE_EXISTING); + } + + } catch (IOException ex) { + throw new StorageContainerException("Error while creating/ updating " + + ".container file. ContainerID: " + containerId, ex, + CONTAINER_FILES_CREATE_ERROR); + } finally { + if (tempContainerFile != null && tempContainerFile.exists()) { + if (!tempContainerFile.delete()) { + LOG.warn("Unable to delete container temporary file: {}.", + tempContainerFile.getAbsolutePath()); + } + } + } + } + + private void createContainerFile(File containerFile) + throws StorageContainerException { + writeToContainerFile(containerFile, true); + } + + private void updateContainerFile(File containerFile) + throws StorageContainerException { + writeToContainerFile(containerFile, false); + } + + + @Override + public void delete(boolean forceDelete) + throws StorageContainerException { + long containerId = containerData.getContainerID(); + try { + KeyValueContainerUtil.removeContainer(containerData, config, forceDelete); + } catch (StorageContainerException ex) { + throw ex; + } catch (IOException ex) { + // TODO : An I/O error during delete can leave partial artifacts on the + // disk. We will need the cleaner thread to cleanup this information. + String errMsg = String.format("Failed to cleanup container. ID: %d", + containerId); + LOG.error(errMsg, ex); + throw new StorageContainerException(errMsg, ex, CONTAINER_INTERNAL_ERROR); + } + } + + @Override + public void close() throws StorageContainerException { + + //TODO: writing .container file and compaction can be done + // asynchronously, otherwise rpc call for this will take a lot of time to + // complete this action + try { + writeLock(); + + containerData.closeContainer(); + File containerFile = getContainerFile(); + // update the new container data to .container File + updateContainerFile(containerFile); + + } catch (StorageContainerException ex) { + // Failed to update .container file. Reset the state to CLOSING + containerData.setState(ContainerLifeCycleState.CLOSING); + throw ex; + } finally { + writeUnlock(); + } + + // It is ok if this operation takes a bit of time. + // Close container is not expected to be instantaneous. + try { + MetadataStore db = KeyUtils.getDB(containerData, config); + db.compactDB(); + } catch (StorageContainerException ex) { + throw ex; + } catch (IOException ex) { + LOG.error("Error in DB compaction while closing container", ex); + throw new StorageContainerException(ex, ERROR_IN_COMPACT_DB); + } + } + + @Override + public KeyValueContainerData getContainerData() { + return containerData; + } + + @Override + public ContainerLifeCycleState getContainerState() { + return containerData.getState(); + } + + @Override + public ContainerType getContainerType() { + return ContainerType.KeyValueContainer; + } + + @Override + public void update(Map metadata, boolean forceUpdate) + throws StorageContainerException { + + // TODO: Now, when writing the updated data to .container file, we are + // holding lock and writing data to disk. We can have async implementation + // to flush the update container data to disk. + long containerId = containerData.getContainerID(); + if(!containerData.isValid()) { + LOG.debug("Invalid container data. ContainerID: {}", containerId); + throw new StorageContainerException("Invalid container data. " + + "ContainerID: " + containerId, INVALID_CONTAINER_STATE); + } + if (!forceUpdate && !containerData.isOpen()) { + throw new StorageContainerException( + "Updating a closed container without force option is not allowed. " + + "ContainerID: " + containerId, UNSUPPORTED_REQUEST); + } + + Map oldMetadata = containerData.getMetadata(); + try { + writeLock(); + for (Map.Entry entry : metadata.entrySet()) { + containerData.addMetadata(entry.getKey(), entry.getValue()); + } + + File containerFile = getContainerFile(); + // update the new container data to .container File + updateContainerFile(containerFile); + } catch (StorageContainerException ex) { + containerData.setMetadata(oldMetadata); + throw ex; + } finally { + writeUnlock(); + } + } + + @Override + public void updateDeleteTransactionId(long deleteTransactionId) { + containerData.updateDeleteTransactionId(deleteTransactionId); + } + + @Override + public KeyValueBlockIterator blockIterator() throws IOException{ + return new KeyValueBlockIterator(containerData.getContainerID(), new File( + containerData.getContainerPath())); + } + + @Override + public void importContainerData(InputStream input, + ContainerPacker packer) throws IOException { + writeLock(); + try { + if (getContainerFile().exists()) { + String errorMessage = String.format( + "Can't import container (cid=%d) data to a specific location" + + " as the container descriptor (%s) has already been exist.", + getContainerData().getContainerID(), + getContainerFile().getAbsolutePath()); + throw new IOException(errorMessage); + } + //copy the values from the input stream to the final destination + // directory. + byte[] descriptorContent = packer.unpackContainerData(this, input); + + Preconditions.checkNotNull(descriptorContent, + "Container descriptor is missing from the container archive: " + + getContainerData().getContainerID()); + + //now, we have extracted the container descriptor from the previous + //datanode. We can load it and upload it with the current data + // (original metadata + current filepath fields) + KeyValueContainerData originalContainerData = + (KeyValueContainerData) ContainerDataYaml + .readContainer(descriptorContent); + + + containerData.setState(originalContainerData.getState()); + containerData + .setContainerDBType(originalContainerData.getContainerDBType()); + containerData.setBytesUsed(originalContainerData.getBytesUsed()); + + //rewriting the yaml file with new checksum calculation. + update(originalContainerData.getMetadata(), true); + + //fill in memory stat counter (keycount, byte usage) + KeyValueContainerUtil.parseKVContainerData(containerData, config); + + } catch (Exception ex) { + //delete all the temporary data in case of any exception. + try { + FileUtils.deleteDirectory(new File(containerData.getMetadataPath())); + FileUtils.deleteDirectory(new File(containerData.getChunksPath())); + FileUtils.deleteDirectory(getContainerFile()); + } catch (Exception deleteex) { + LOG.error( + "Can not cleanup destination directories after a container import" + + " error (cid" + + containerData.getContainerID() + ")", deleteex); + } + throw ex; + } finally { + writeUnlock(); + } + } + + @Override + public void exportContainerData(OutputStream destination, + ContainerPacker packer) throws IOException { + if (getContainerData().getState() != ContainerLifeCycleState.CLOSED) { + throw new IllegalStateException( + "Only closed containers could be exported: ContainerId=" + + getContainerData().getContainerID()); + } + packer.pack(this, destination); + } + + /** + * Acquire read lock. + */ + public void readLock() { + this.lock.readLock().lock(); + + } + + /** + * Release read lock. + */ + public void readUnlock() { + this.lock.readLock().unlock(); + } + + /** + * Check if the current thread holds read lock. + */ + public boolean hasReadLock() { + return this.lock.readLock().tryLock(); + } + + /** + * Acquire write lock. + */ + public void writeLock() { + this.lock.writeLock().lock(); + } + + /** + * Release write lock. + */ + public void writeUnlock() { + this.lock.writeLock().unlock(); + + } + + /** + * Check if the current thread holds write lock. + */ + public boolean hasWriteLock() { + return this.lock.writeLock().isHeldByCurrentThread(); + } + + /** + * Acquire read lock, unless interrupted while waiting. + * @throws InterruptedException + */ + @Override + public void readLockInterruptibly() throws InterruptedException { + this.lock.readLock().lockInterruptibly(); + } + + /** + * Acquire write lock, unless interrupted while waiting. + * @throws InterruptedException + */ + @Override + public void writeLockInterruptibly() throws InterruptedException { + this.lock.writeLock().lockInterruptibly(); + + } + + /** + * Returns containerFile. + * @return .container File name + */ + @Override + public File getContainerFile() { + return new File(containerData.getMetadataPath(), containerData + .getContainerID() + OzoneConsts.CONTAINER_EXTENSION); + } + + /** + * Returns KeyValueContainerReport for the KeyValueContainer. + */ + @Override + public StorageContainerDatanodeProtocolProtos.ContainerInfo + getContainerReport() throws StorageContainerException{ + StorageContainerDatanodeProtocolProtos.ContainerInfo.Builder ciBuilder = + StorageContainerDatanodeProtocolProtos.ContainerInfo.newBuilder(); + ciBuilder.setContainerID(containerData.getContainerID()) + .setReadCount(containerData.getReadCount()) + .setWriteCount(containerData.getWriteCount()) + .setReadBytes(containerData.getReadBytes()) + .setWriteBytes(containerData.getWriteBytes()) + .setUsed(containerData.getBytesUsed()) + .setState(getHddsState()) + .setDeleteTransactionId(containerData.getDeleteTransactionId()); + return ciBuilder.build(); + } + + /** + * Returns LifeCycle State of the container. + * @return LifeCycle State of the container in HddsProtos format + * @throws StorageContainerException + */ + private HddsProtos.LifeCycleState getHddsState() + throws StorageContainerException { + HddsProtos.LifeCycleState state; + switch (containerData.getState()) { + case OPEN: + state = HddsProtos.LifeCycleState.OPEN; + break; + case CLOSING: + state = HddsProtos.LifeCycleState.CLOSING; + break; + case CLOSED: + state = HddsProtos.LifeCycleState.CLOSED; + break; + default: + throw new StorageContainerException("Invalid Container state found: " + + containerData.getContainerID(), INVALID_CONTAINER_STATE); + } + return state; + } + + /** + * Returns container DB file. + * @return + */ + public File getContainerDBFile() { + return new File(containerData.getMetadataPath(), containerData + .getContainerID() + OzoneConsts.DN_CONTAINER_DB); + } + + /** + * Creates a temporary file. + * @param file + * @return + * @throws IOException + */ + private File createTempFile(File file) throws IOException{ + return File.createTempFile("tmp_" + System.currentTimeMillis() + "_", + file.getName(), file.getParentFile()); + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java new file mode 100644 index 00000000000..7ffdbf558ea --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import java.util.Collections; + +import org.apache.hadoop.conf.StorageSize; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.yaml.snakeyaml.nodes.Tag; + + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +import static java.lang.Math.max; +import static org.apache.hadoop.ozone.OzoneConsts.CHUNKS_PATH; +import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_DB_TYPE; +import static org.apache.hadoop.ozone.OzoneConsts.METADATA_PATH; + +/** + * This class represents the KeyValueContainer metadata, which is the + * in-memory representation of container metadata and is represented on disk + * by the .container file. + */ +public class KeyValueContainerData extends ContainerData { + + // Yaml Tag used for KeyValueContainerData. + public static final Tag KEYVALUE_YAML_TAG = new Tag("KeyValueContainerData"); + + // Fields need to be stored in .container file. + private static final List KV_YAML_FIELDS; + + // Path to Container metadata Level DB/RocksDB Store and .container file. + private String metadataPath; + + // Path to Physical file system where chunks are stored. + private String chunksPath; + + //Type of DB used to store key to chunks mapping + private String containerDBType; + + private File dbFile = null; + + /** + * Number of pending deletion blocks in KeyValueContainer. + */ + private final AtomicInteger numPendingDeletionBlocks; + + private long deleteTransactionId; + + static { + // Initialize YAML fields + KV_YAML_FIELDS = Lists.newArrayList(); + KV_YAML_FIELDS.addAll(YAML_FIELDS); + KV_YAML_FIELDS.add(METADATA_PATH); + KV_YAML_FIELDS.add(CHUNKS_PATH); + KV_YAML_FIELDS.add(CONTAINER_DB_TYPE); + } + + /** + * Constructs KeyValueContainerData object. + * @param id - ContainerId + * @param size - maximum size of the container in bytes + */ + public KeyValueContainerData(long id, long size) { + super(ContainerProtos.ContainerType.KeyValueContainer, id, size); + this.numPendingDeletionBlocks = new AtomicInteger(0); + this.deleteTransactionId = 0; + } + + /** + * Constructs KeyValueContainerData object. + * @param id - ContainerId + * @param layOutVersion + * @param size - maximum size of the container in bytes + */ + public KeyValueContainerData(long id, int layOutVersion, long size) { + super(ContainerProtos.ContainerType.KeyValueContainer, id, layOutVersion, + size); + this.numPendingDeletionBlocks = new AtomicInteger(0); + this.deleteTransactionId = 0; + } + + + /** + * Sets Container dbFile. This should be called only during creation of + * KeyValue container. + * @param containerDbFile + */ + public void setDbFile(File containerDbFile) { + dbFile = containerDbFile; + } + + /** + * Returns container DB file. + * @return dbFile + */ + public File getDbFile() { + return dbFile; + } + + /** + * Returns container metadata path. + * @return - Physical path where container file and checksum is stored. + */ + public String getMetadataPath() { + return metadataPath; + } + + /** + * Sets container metadata path. + * + * @param path - String. + */ + public void setMetadataPath(String path) { + this.metadataPath = path; + } + + /** + * Returns the path to base dir of the container. + * @return Path to base dir + */ + public String getContainerPath() { + if (metadataPath == null) { + return null; + } + return new File(metadataPath).getParent(); + } + + /** + * Get chunks path. + * @return - Path where chunks are stored + */ + public String getChunksPath() { + return chunksPath; + } + + /** + * Set chunks Path. + * @param chunkPath - File path. + */ + public void setChunksPath(String chunkPath) { + this.chunksPath = chunkPath; + } + + /** + * Returns the DBType used for the container. + * @return containerDBType + */ + public String getContainerDBType() { + return containerDBType; + } + + /** + * Sets the DBType used for the container. + * @param containerDBType + */ + public void setContainerDBType(String containerDBType) { + this.containerDBType = containerDBType; + } + + /** + * Increase the count of pending deletion blocks. + * + * @param numBlocks increment number + */ + public void incrPendingDeletionBlocks(int numBlocks) { + this.numPendingDeletionBlocks.addAndGet(numBlocks); + } + + /** + * Decrease the count of pending deletion blocks. + * + * @param numBlocks decrement number + */ + public void decrPendingDeletionBlocks(int numBlocks) { + this.numPendingDeletionBlocks.addAndGet(-1 * numBlocks); + } + + /** + * Get the number of pending deletion blocks. + */ + public int getNumPendingDeletionBlocks() { + return this.numPendingDeletionBlocks.get(); + } + + /** + * Sets deleteTransactionId to latest delete transactionId for the container. + * + * @param transactionId latest transactionId of the container. + */ + public void updateDeleteTransactionId(long transactionId) { + deleteTransactionId = max(transactionId, deleteTransactionId); + } + + /** + * Return the latest deleteTransactionId of the container. + */ + public long getDeleteTransactionId() { + return deleteTransactionId; + } + + /** + * Returns a ProtoBuf Message from ContainerData. + * + * @return Protocol Buffer Message + */ + public ContainerProtos.ContainerData getProtoBufMessage() { + ContainerProtos.ContainerData.Builder builder = ContainerProtos + .ContainerData.newBuilder(); + builder.setContainerID(this.getContainerID()); + builder.setContainerPath(this.getMetadataPath()); + builder.setState(this.getState()); + + for (Map.Entry entry : getMetadata().entrySet()) { + ContainerProtos.KeyValue.Builder keyValBuilder = + ContainerProtos.KeyValue.newBuilder(); + builder.addMetadata(keyValBuilder.setKey(entry.getKey()) + .setValue(entry.getValue()).build()); + } + + if (this.getBytesUsed() >= 0) { + builder.setBytesUsed(this.getBytesUsed()); + } + + if(this.getContainerType() != null) { + builder.setContainerType(ContainerProtos.ContainerType.KeyValueContainer); + } + + return builder.build(); + } + + public static List getYamlFields() { + return Collections.unmodifiableList(KV_YAML_FIELDS); + } + + /** + * Constructs a KeyValueContainerData object from ProtoBuf classes. + * + * @param protoData - ProtoBuf Message + * @throws IOException + */ + @VisibleForTesting + public static KeyValueContainerData getFromProtoBuf( + ContainerProtos.ContainerData protoData) throws IOException { + // TODO: Add containerMaxSize to ContainerProtos.ContainerData + StorageSize storageSize = StorageSize.parse( + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT); + KeyValueContainerData data = new KeyValueContainerData( + protoData.getContainerID(), + (long)storageSize.getUnit().toBytes(storageSize.getValue())); + for (int x = 0; x < protoData.getMetadataCount(); x++) { + data.addMetadata(protoData.getMetadata(x).getKey(), + protoData.getMetadata(x).getValue()); + } + + if (protoData.hasContainerPath()) { + String metadataPath = protoData.getContainerPath()+ File.separator + + OzoneConsts.CONTAINER_META_PATH; + data.setMetadataPath(metadataPath); + } + + if (protoData.hasState()) { + data.setState(protoData.getState()); + } + + if (protoData.hasBytesUsed()) { + data.setBytesUsed(protoData.getBytesUsed()); + } + + return data; + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java new file mode 100644 index 00000000000..5acecb4f04b --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -0,0 +1,850 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + +import java.io.FileInputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerLifeCycleState; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerType; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .GetSmallFileRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.KeyValue; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .PutSmallFileRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.helpers.KeyData; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.impl.OpenContainerBlockMap; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.Handler; +import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume + .RoundRobinVolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.helpers.ChunkUtils; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyUtils; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil; +import org.apache.hadoop.ozone.container.keyvalue.helpers.SmallFileUtils; +import org.apache.hadoop.ozone.container.keyvalue.impl.ChunkManagerImpl; +import org.apache.hadoop.ozone.container.keyvalue.impl.KeyManagerImpl; +import org.apache.hadoop.ozone.container.keyvalue.interfaces.ChunkManager; +import org.apache.hadoop.ozone.container.keyvalue.interfaces.KeyManager; +import org.apache.hadoop.ozone.container.keyvalue.statemachine.background + .BlockDeletingService; +import org.apache.hadoop.util.AutoCloseableLock; +import org.apache.hadoop.util.ReflectionUtils; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.protobuf.ByteString; +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_DATANODE_VOLUME_CHOOSING_POLICY; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.BLOCK_NOT_COMMITTED; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.CLOSED_CONTAINER_IO; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.CONTAINER_INTERNAL_ERROR; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.DELETE_ON_OPEN_CONTAINER; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.GET_SMALL_FILE_ERROR; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.INVALID_CONTAINER_STATE; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.IO_EXCEPTION; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.PUT_SMALL_FILE_ERROR; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Stage; +import static org.apache.hadoop.ozone.OzoneConfigKeys + .OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConfigKeys + .OZONE_BLOCK_DELETING_SERVICE_INTERVAL_DEFAULT; +import static org.apache.hadoop.ozone.OzoneConfigKeys + .OZONE_BLOCK_DELETING_SERVICE_TIMEOUT; +import static org.apache.hadoop.ozone.OzoneConfigKeys + .OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Handler for KeyValue Container type. + */ +public class KeyValueHandler extends Handler { + + private static final Logger LOG = LoggerFactory.getLogger( + KeyValueHandler.class); + + private final ContainerType containerType; + private final KeyManager keyManager; + private final ChunkManager chunkManager; + private final BlockDeletingService blockDeletingService; + private final VolumeChoosingPolicy volumeChoosingPolicy; + private final long maxContainerSize; + private final AutoCloseableLock handlerLock; + private final OpenContainerBlockMap openContainerBlockMap; + + public KeyValueHandler(Configuration config, ContainerSet contSet, + VolumeSet volSet, ContainerMetrics metrics) { + super(config, contSet, volSet, metrics); + containerType = ContainerType.KeyValueContainer; + keyManager = new KeyManagerImpl(config); + chunkManager = new ChunkManagerImpl(); + long svcInterval = config + .getTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, + OZONE_BLOCK_DELETING_SERVICE_INTERVAL_DEFAULT, + TimeUnit.MILLISECONDS); + long serviceTimeout = config + .getTimeDuration(OZONE_BLOCK_DELETING_SERVICE_TIMEOUT, + OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT, + TimeUnit.MILLISECONDS); + this.blockDeletingService = + new BlockDeletingService(containerSet, svcInterval, serviceTimeout, + TimeUnit.MILLISECONDS, config); + blockDeletingService.start(); + volumeChoosingPolicy = ReflectionUtils.newInstance(conf.getClass( + HDDS_DATANODE_VOLUME_CHOOSING_POLICY, RoundRobinVolumeChoosingPolicy + .class, VolumeChoosingPolicy.class), conf); + maxContainerSize = (long)config.getStorageSize( + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); + // this handler lock is used for synchronizing createContainer Requests, + // so using a fair lock here. + handlerLock = new AutoCloseableLock(new ReentrantLock(true)); + openContainerBlockMap = new OpenContainerBlockMap(); + } + + @VisibleForTesting + public VolumeChoosingPolicy getVolumeChoosingPolicyForTesting() { + return volumeChoosingPolicy; + } + /** + * Returns OpenContainerBlockMap instance. + * + * @return OpenContainerBlockMap + */ + public OpenContainerBlockMap getOpenContainerBlockMap() { + return openContainerBlockMap; + } + + @Override + public ContainerCommandResponseProto handle( + ContainerCommandRequestProto request, Container container) { + + Type cmdType = request.getCmdType(); + KeyValueContainer kvContainer = (KeyValueContainer) container; + switch(cmdType) { + case CreateContainer: + return handleCreateContainer(request, kvContainer); + case ReadContainer: + return handleReadContainer(request, kvContainer); + case UpdateContainer: + return handleUpdateContainer(request, kvContainer); + case DeleteContainer: + return handleDeleteContainer(request, kvContainer); + case ListContainer: + return handleUnsupportedOp(request); + case CloseContainer: + return handleCloseContainer(request, kvContainer); + case PutKey: + return handlePutKey(request, kvContainer); + case GetKey: + return handleGetKey(request, kvContainer); + case DeleteKey: + return handleDeleteKey(request, kvContainer); + case ListKey: + return handleUnsupportedOp(request); + case ReadChunk: + return handleReadChunk(request, kvContainer); + case DeleteChunk: + return handleDeleteChunk(request, kvContainer); + case WriteChunk: + return handleWriteChunk(request, kvContainer); + case ListChunk: + return handleUnsupportedOp(request); + case CompactChunk: + return handleUnsupportedOp(request); + case PutSmallFile: + return handlePutSmallFile(request, kvContainer); + case GetSmallFile: + return handleGetSmallFile(request, kvContainer); + case GetCommittedBlockLength: + return handleGetCommittedBlockLength(request, kvContainer); + default: + return null; + } + } + + @VisibleForTesting + public ChunkManager getChunkManager() { + return this.chunkManager; + } + + @VisibleForTesting + public KeyManager getKeyManager() { + return this.keyManager; + } + + /** + * Handles Create Container Request. If successful, adds the container to + * ContainerSet. + */ + ContainerCommandResponseProto handleCreateContainer( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + if (!request.hasCreateContainer()) { + LOG.debug("Malformed Create Container request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + // Create Container request should be passed a null container as the + // container would be created here. + Preconditions.checkArgument(kvContainer == null); + + long containerID = request.getContainerID(); + + KeyValueContainerData newContainerData = new KeyValueContainerData( + containerID, maxContainerSize); + // TODO: Add support to add metadataList to ContainerData. Add metadata + // to container during creation. + KeyValueContainer newContainer = new KeyValueContainer( + newContainerData, conf); + + try { + handlerLock.acquire(); + if (containerSet.getContainer(containerID) == null) { + newContainer.create(volumeSet, volumeChoosingPolicy, scmID); + containerSet.addContainer(newContainer); + } else { + throw new StorageContainerException("Container already exists with " + + "container Id " + containerID, ContainerProtos.Result + .CONTAINER_EXISTS); + } + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } finally { + handlerLock.release(); + } + + return ContainerUtils.getSuccessResponse(request); + } + + public void populateContainerPathFields(KeyValueContainer container, + long maxSize) throws IOException { + volumeSet.acquireLock(); + try { + HddsVolume containerVolume = volumeChoosingPolicy.chooseVolume(volumeSet + .getVolumesList(), maxSize); + String hddsVolumeDir = containerVolume.getHddsRootDir().toString(); + container.populatePathFields(scmID, containerVolume, hddsVolumeDir); + } finally { + volumeSet.releaseLock(); + } + } + + /** + * Handles Read Container Request. Returns the ContainerData as response. + */ + ContainerCommandResponseProto handleReadContainer( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + if (!request.hasReadContainer()) { + LOG.debug("Malformed Read Container request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + KeyValueContainerData containerData = kvContainer.getContainerData(); + return KeyValueContainerUtil.getReadContainerResponse( + request, containerData); + } + + + /** + * Handles Update Container Request. If successful, the container metadata + * is updated. + */ + ContainerCommandResponseProto handleUpdateContainer( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasUpdateContainer()) { + LOG.debug("Malformed Update Container request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + boolean forceUpdate = request.getUpdateContainer().getForceUpdate(); + List keyValueList = + request.getUpdateContainer().getMetadataList(); + Map metadata = new HashMap<>(); + for (KeyValue keyValue : keyValueList) { + metadata.put(keyValue.getKey(), keyValue.getValue()); + } + + try { + if (!metadata.isEmpty()) { + kvContainer.update(metadata, forceUpdate); + } + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } + return ContainerUtils.getSuccessResponse(request); + } + + /** + * Handles Delete Container Request. + * Open containers cannot be deleted. + * Holds writeLock on ContainerSet till the container is removed from + * containerMap. On disk deletion of container files will happen + * asynchronously without the lock. + */ + ContainerCommandResponseProto handleDeleteContainer( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasDeleteContainer()) { + LOG.debug("Malformed Delete container request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + boolean forceDelete = request.getDeleteContainer().getForceDelete(); + kvContainer.writeLock(); + try { + // Check if container is open + if (kvContainer.getContainerData().isOpen()) { + kvContainer.writeUnlock(); + throw new StorageContainerException( + "Deletion of Open Container is not allowed.", + DELETE_ON_OPEN_CONTAINER); + } else if (!forceDelete && kvContainer.getContainerData().getKeyCount() + > 0) { + // If the container is not empty and cannot be deleted forcibly, + // then throw a SCE to stop deleting. + kvContainer.writeUnlock(); + throw new StorageContainerException( + "Container cannot be deleted because it is not empty.", + ContainerProtos.Result.ERROR_CONTAINER_NOT_EMPTY); + } else { + long containerId = kvContainer.getContainerData().getContainerID(); + containerSet.removeContainer(containerId); + openContainerBlockMap.removeContainer(containerId); + // Release the lock first. + // Avoid holding write locks for disk operations + kvContainer.writeUnlock(); + + kvContainer.delete(forceDelete); + } + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } finally { + if (kvContainer.hasWriteLock()) { + kvContainer.writeUnlock(); + } + } + return ContainerUtils.getSuccessResponse(request); + } + + /** + * Handles Close Container Request. An open container is closed. + */ + ContainerCommandResponseProto handleCloseContainer( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasCloseContainer()) { + LOG.debug("Malformed Update Container request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + long containerID = kvContainer.getContainerData().getContainerID(); + ContainerLifeCycleState containerState = kvContainer.getContainerState(); + + try { + if (containerState == ContainerLifeCycleState.CLOSED) { + LOG.debug("Container {} is already closed.", containerID); + return ContainerUtils.getSuccessResponse(request); + } else if (containerState == ContainerLifeCycleState.INVALID) { + LOG.debug("Invalid container data. ContainerID: {}", containerID); + throw new StorageContainerException("Invalid container data. " + + "ContainerID: " + containerID, INVALID_CONTAINER_STATE); + } + + KeyValueContainerData kvData = kvContainer.getContainerData(); + + // remove the container from open block map once, all the blocks + // have been committed and the container is closed + kvData.setState(ContainerProtos.ContainerLifeCycleState.CLOSING); + commitPendingKeys(kvContainer); + kvContainer.close(); + // make sure the the container open keys from BlockMap gets removed + openContainerBlockMap.removeContainer(kvData.getContainerID()); + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("Close Container failed", ex, + IO_EXCEPTION), request); + } + + return ContainerUtils.getSuccessResponse(request); + } + + /** + * Handle Put Key operation. Calls KeyManager to process the request. + */ + ContainerCommandResponseProto handlePutKey( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + long blockLength; + if (!request.hasPutKey()) { + LOG.debug("Malformed Put Key request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + try { + checkContainerOpen(kvContainer); + + KeyData keyData = KeyData.getFromProtoBuf( + request.getPutKey().getKeyData()); + long numBytes = keyData.getProtoBufMessage().toByteArray().length; + blockLength = commitKey(keyData, kvContainer); + metrics.incContainerBytesStats(Type.PutKey, numBytes); + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("Put Key failed", ex, IO_EXCEPTION), + request); + } + + return KeyUtils.putKeyResponseSuccess(request, blockLength); + } + + private void commitPendingKeys(KeyValueContainer kvContainer) + throws IOException { + long containerId = kvContainer.getContainerData().getContainerID(); + List pendingKeys = + this.openContainerBlockMap.getOpenKeys(containerId); + for(KeyData keyData : pendingKeys) { + commitKey(keyData, kvContainer); + } + } + + private long commitKey(KeyData keyData, KeyValueContainer kvContainer) + throws IOException { + Preconditions.checkNotNull(keyData); + long length = keyManager.putKey(kvContainer, keyData); + //update the open key Map in containerManager + this.openContainerBlockMap.removeFromKeyMap(keyData.getBlockID()); + return length; + } + /** + * Handle Get Key operation. Calls KeyManager to process the request. + */ + ContainerCommandResponseProto handleGetKey( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasGetKey()) { + LOG.debug("Malformed Get Key request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + KeyData responseData; + try { + BlockID blockID = BlockID.getFromProtobuf( + request.getGetKey().getBlockID()); + responseData = keyManager.getKey(kvContainer, blockID); + long numBytes = responseData.getProtoBufMessage().toByteArray().length; + metrics.incContainerBytesStats(Type.GetKey, numBytes); + + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("Get Key failed", ex, IO_EXCEPTION), + request); + } + + return KeyUtils.getKeyDataResponse(request, responseData); + } + + /** + * Handles GetCommittedBlockLength operation. + * Calls KeyManager to process the request. + */ + ContainerCommandResponseProto handleGetCommittedBlockLength( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + if (!request.hasGetCommittedBlockLength()) { + LOG.debug("Malformed Get Key request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + long blockLength; + try { + BlockID blockID = BlockID + .getFromProtobuf(request.getGetCommittedBlockLength().getBlockID()); + // Check if it really exists in the openContainerBlockMap + if (openContainerBlockMap.checkIfBlockExists(blockID)) { + String msg = "Block " + blockID + " is not committed yet."; + throw new StorageContainerException(msg, BLOCK_NOT_COMMITTED); + } + blockLength = keyManager.getCommittedBlockLength(kvContainer, blockID); + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("GetCommittedBlockLength failed", ex, + IO_EXCEPTION), request); + } + + return KeyUtils.getBlockLengthResponse(request, blockLength); + } + + /** + * Handle Delete Key operation. Calls KeyManager to process the request. + */ + ContainerCommandResponseProto handleDeleteKey( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasDeleteKey()) { + LOG.debug("Malformed Delete Key request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + try { + checkContainerOpen(kvContainer); + + BlockID blockID = BlockID.getFromProtobuf( + request.getDeleteKey().getBlockID()); + + keyManager.deleteKey(kvContainer, blockID); + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("Delete Key failed", ex, IO_EXCEPTION), + request); + } + + return KeyUtils.getKeyResponseSuccess(request); + } + + /** + * Handle Read Chunk operation. Calls ChunkManager to process the request. + */ + ContainerCommandResponseProto handleReadChunk( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasReadChunk()) { + LOG.debug("Malformed Read Chunk request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + ChunkInfo chunkInfo; + byte[] data; + try { + BlockID blockID = BlockID.getFromProtobuf( + request.getReadChunk().getBlockID()); + chunkInfo = ChunkInfo.getFromProtoBuf(request.getReadChunk() + .getChunkData()); + Preconditions.checkNotNull(chunkInfo); + + data = chunkManager.readChunk(kvContainer, blockID, chunkInfo); + metrics.incContainerBytesStats(Type.ReadChunk, data.length); + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("Read Chunk failed", ex, IO_EXCEPTION), + request); + } + + return ChunkUtils.getReadChunkResponse(request, data, chunkInfo); + } + + /** + * Handle Delete Chunk operation. Calls ChunkManager to process the request. + */ + ContainerCommandResponseProto handleDeleteChunk( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasDeleteChunk()) { + LOG.debug("Malformed Delete Chunk request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + try { + checkContainerOpen(kvContainer); + + BlockID blockID = BlockID.getFromProtobuf( + request.getDeleteChunk().getBlockID()); + ContainerProtos.ChunkInfo chunkInfoProto = request.getDeleteChunk() + .getChunkData(); + ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); + Preconditions.checkNotNull(chunkInfo); + + chunkManager.deleteChunk(kvContainer, blockID, chunkInfo); + openContainerBlockMap.removeChunk(blockID, chunkInfoProto); + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("Delete Chunk failed", ex, + IO_EXCEPTION), request); + } + + return ChunkUtils.getChunkResponseSuccess(request); + } + + /** + * Handle Write Chunk operation. Calls ChunkManager to process the request. + */ + ContainerCommandResponseProto handleWriteChunk( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasWriteChunk()) { + LOG.debug("Malformed Write Chunk request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + try { + checkContainerOpen(kvContainer); + + BlockID blockID = BlockID.getFromProtobuf( + request.getWriteChunk().getBlockID()); + ContainerProtos.ChunkInfo chunkInfoProto = + request.getWriteChunk().getChunkData(); + ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); + Preconditions.checkNotNull(chunkInfo); + + byte[] data = null; + if (request.getWriteChunk().getStage() == Stage.WRITE_DATA || + request.getWriteChunk().getStage() == Stage.COMBINED) { + data = request.getWriteChunk().getData().toByteArray(); + } + + chunkManager.writeChunk(kvContainer, blockID, chunkInfo, data, + request.getWriteChunk().getStage()); + + // We should increment stats after writeChunk + if (request.getWriteChunk().getStage() == Stage.WRITE_DATA || + request.getWriteChunk().getStage() == Stage.COMBINED) { + metrics.incContainerBytesStats(Type.WriteChunk, request.getWriteChunk() + .getChunkData().getLen()); + } + + if (request.getWriteChunk().getStage() == Stage.COMMIT_DATA + || request.getWriteChunk().getStage() == Stage.COMBINED) { + // the openContainerBlockMap should be updated only during + // COMMIT_STAGE of handling write chunk request. + openContainerBlockMap.addChunk(blockID, chunkInfoProto); + } + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("Write Chunk failed", ex, IO_EXCEPTION), + request); + } + + return ChunkUtils.getChunkResponseSuccess(request); + } + + /** + * Handle Put Small File operation. Writes the chunk and associated key + * using a single RPC. Calls KeyManager and ChunkManager to process the + * request. + */ + ContainerCommandResponseProto handlePutSmallFile( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasPutSmallFile()) { + LOG.debug("Malformed Put Small File request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + PutSmallFileRequestProto putSmallFileReq = + request.getPutSmallFile(); + + try { + checkContainerOpen(kvContainer); + + BlockID blockID = BlockID.getFromProtobuf(putSmallFileReq.getKey() + .getKeyData().getBlockID()); + KeyData keyData = KeyData.getFromProtoBuf( + putSmallFileReq.getKey().getKeyData()); + Preconditions.checkNotNull(keyData); + + ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf( + putSmallFileReq.getChunkInfo()); + Preconditions.checkNotNull(chunkInfo); + byte[] data = putSmallFileReq.getData().toByteArray(); + // chunks will be committed as a part of handling putSmallFile + // here. There is no need to maintain this info in openContainerBlockMap. + chunkManager.writeChunk( + kvContainer, blockID, chunkInfo, data, Stage.COMBINED); + + List chunks = new LinkedList<>(); + chunks.add(chunkInfo.getProtoBufMessage()); + keyData.setChunks(chunks); + keyManager.putKey(kvContainer, keyData); + metrics.incContainerBytesStats(Type.PutSmallFile, data.length); + + } catch (StorageContainerException ex) { + return ContainerUtils.logAndReturnError(LOG, ex, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("Read Chunk failed", ex, + PUT_SMALL_FILE_ERROR), request); + } + + return SmallFileUtils.getPutFileResponseSuccess(request); + } + + /** + * Handle Get Small File operation. Gets a data stream using a key. This + * helps in reducing the RPC overhead for small files. Calls KeyManager and + * ChunkManager to process the request. + */ + ContainerCommandResponseProto handleGetSmallFile( + ContainerCommandRequestProto request, KeyValueContainer kvContainer) { + + if (!request.hasGetSmallFile()) { + LOG.debug("Malformed Get Small File request. trace ID: {}", + request.getTraceID()); + return ContainerUtils.malformedRequest(request); + } + + GetSmallFileRequestProto getSmallFileReq = request.getGetSmallFile(); + + try { + BlockID blockID = BlockID.getFromProtobuf(getSmallFileReq.getKey() + .getBlockID()); + KeyData responseData = keyManager.getKey(kvContainer, blockID); + + ContainerProtos.ChunkInfo chunkInfo = null; + ByteString dataBuf = ByteString.EMPTY; + for (ContainerProtos.ChunkInfo chunk : responseData.getChunks()) { + byte[] data = chunkManager.readChunk(kvContainer, blockID, + ChunkInfo.getFromProtoBuf(chunk)); + ByteString current = ByteString.copyFrom(data); + dataBuf = dataBuf.concat(current); + chunkInfo = chunk; + } + metrics.incContainerBytesStats(Type.GetSmallFile, dataBuf.size()); + return SmallFileUtils.getGetSmallFileResponseSuccess(request, dataBuf + .toByteArray(), ChunkInfo.getFromProtoBuf(chunkInfo)); + } catch (StorageContainerException e) { + return ContainerUtils.logAndReturnError(LOG, e, request); + } catch (IOException ex) { + return ContainerUtils.logAndReturnError(LOG, + new StorageContainerException("Write Chunk failed", ex, + GET_SMALL_FILE_ERROR), request); + } + } + + /** + * Handle unsupported operation. + */ + ContainerCommandResponseProto handleUnsupportedOp( + ContainerCommandRequestProto request) { + // TODO : remove all unsupported operations or handle them. + return ContainerUtils.unsupportedRequest(request); + } + + /** + * Check if container is open. Throw exception otherwise. + * @param kvContainer + * @throws StorageContainerException + */ + private void checkContainerOpen(KeyValueContainer kvContainer) + throws StorageContainerException { + + ContainerLifeCycleState containerState = kvContainer.getContainerState(); + + if (containerState == ContainerLifeCycleState.OPEN) { + return; + } else { + String msg = "Requested operation not allowed as ContainerState is " + + containerState; + ContainerProtos.Result result = null; + switch (containerState) { + case CLOSING: + case CLOSED: + result = CLOSED_CONTAINER_IO; + break; + case INVALID: + result = INVALID_CONTAINER_STATE; + break; + default: + result = CONTAINER_INTERNAL_ERROR; + } + + throw new StorageContainerException(msg, result); + } + } + + public Container importContainer(long containerID, long maxSize, + FileInputStream rawContainerStream, + TarContainerPacker packer) + throws IOException { + + KeyValueContainerData containerData = + new KeyValueContainerData(containerID, + maxSize); + + KeyValueContainer container = new KeyValueContainer(containerData, + conf); + + populateContainerPathFields(container, maxSize); + container.importContainerData(rawContainerStream, packer); + return container; + + } +} \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java new file mode 100644 index 00000000000..13689a705ce --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/TarContainerPacker.java @@ -0,0 +1,249 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.keyvalue; + +import java.io.BufferedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.stream.Collectors; + +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerPacker; + +import com.google.common.base.Preconditions; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; +import org.apache.commons.compress.compressors.CompressorException; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorOutputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.io.IOUtils; + +/** + * Compress/uncompress KeyValueContainer data to a tar.gz archive. + */ +public class TarContainerPacker + implements ContainerPacker { + + private static final String CHUNKS_DIR_NAME = OzoneConsts.STORAGE_DIR_CHUNKS; + + private static final String DB_DIR_NAME = "db"; + + private static final String CONTAINER_FILE_NAME = "container.yaml"; + + + + /** + * Given an input stream (tar file) extract the data to the specified + * directories. + * + * @param container container which defines the destination structure. + * @param inputStream the input stream. + * @throws IOException + */ + @Override + public byte[] unpackContainerData(Container container, + InputStream inputStream) + throws IOException { + byte[] descriptorFileContent = null; + try { + KeyValueContainerData containerData = container.getContainerData(); + CompressorInputStream compressorInputStream = + new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.GZIP, + inputStream); + + TarArchiveInputStream tarInput = + new TarArchiveInputStream(compressorInputStream); + + TarArchiveEntry entry = tarInput.getNextTarEntry(); + while (entry != null) { + String name = entry.getName(); + if (name.startsWith(DB_DIR_NAME + "/")) { + Path destinationPath = containerData.getDbFile().toPath() + .resolve(name.substring(DB_DIR_NAME.length() + 1)); + extractEntry(tarInput, entry.getSize(), destinationPath); + } else if (name.startsWith(CHUNKS_DIR_NAME + "/")) { + Path destinationPath = Paths.get(containerData.getChunksPath()) + .resolve(name.substring(CHUNKS_DIR_NAME.length() + 1)); + extractEntry(tarInput, entry.getSize(), destinationPath); + } else if (name.equals(CONTAINER_FILE_NAME)) { + //Don't do anything. Container file should be unpacked in a + //separated step by unpackContainerDescriptor call. + descriptorFileContent = readEntry(tarInput, entry); + } else { + throw new IllegalArgumentException( + "Unknown entry in the tar file: " + "" + name); + } + entry = tarInput.getNextTarEntry(); + } + return descriptorFileContent; + + } catch (CompressorException e) { + throw new IOException( + "Can't uncompress the given container: " + container + .getContainerData().getContainerID(), + e); + } + } + + private void extractEntry(TarArchiveInputStream tarInput, long size, + Path path) throws IOException { + Preconditions.checkNotNull(path, "Path element should not be null"); + Path parent = Preconditions.checkNotNull(path.getParent(), + "Path element should have a parent directory"); + Files.createDirectories(parent); + try (BufferedOutputStream bos = new BufferedOutputStream( + new FileOutputStream(path.toAbsolutePath().toString()))) { + int bufferSize = 1024; + byte[] buffer = new byte[bufferSize + 1]; + long remaining = size; + while (remaining > 0) { + int read = + tarInput.read(buffer, 0, (int) Math.min(remaining, bufferSize)); + if (read >= 0) { + remaining -= read; + bos.write(buffer, 0, read); + } else { + remaining = 0; + } + } + } + + } + + /** + * Given a containerData include all the required container data/metadata + * in a tar file. + * + * @param container Container to archive (data + metadata). + * @param destination Destination tar file/stream. + * @throws IOException + */ + @Override + public void pack(Container container, + OutputStream destination) + throws IOException { + + KeyValueContainerData containerData = container.getContainerData(); + + try (CompressorOutputStream gzippedOut = new CompressorStreamFactory() + .createCompressorOutputStream(CompressorStreamFactory.GZIP, + destination)) { + + try (ArchiveOutputStream archiveOutputStream = new TarArchiveOutputStream( + gzippedOut)) { + + includePath(containerData.getDbFile().toString(), DB_DIR_NAME, + archiveOutputStream); + + includePath(containerData.getChunksPath(), CHUNKS_DIR_NAME, + archiveOutputStream); + + includeFile(container.getContainerFile(), + CONTAINER_FILE_NAME, + archiveOutputStream); + } + } catch (CompressorException e) { + throw new IOException( + "Can't compress the container: " + containerData.getContainerID(), + e); + } + + } + + @Override + public byte[] unpackContainerDescriptor(InputStream inputStream) + throws IOException { + try { + CompressorInputStream compressorInputStream = + new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.GZIP, + inputStream); + + TarArchiveInputStream tarInput = + new TarArchiveInputStream(compressorInputStream); + + TarArchiveEntry entry = tarInput.getNextTarEntry(); + while (entry != null) { + String name = entry.getName(); + if (name.equals(CONTAINER_FILE_NAME)) { + return readEntry(tarInput, entry); + } + entry = tarInput.getNextTarEntry(); + } + + } catch (CompressorException e) { + throw new IOException( + "Can't read the container descriptor from the container archive", + e); + } + throw new IOException( + "Container descriptor is missing from the container archive."); + } + + private byte[] readEntry(TarArchiveInputStream tarInput, + TarArchiveEntry entry) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int bufferSize = 1024; + byte[] buffer = new byte[bufferSize + 1]; + long remaining = entry.getSize(); + while (remaining > 0) { + int read = + tarInput.read(buffer, 0, (int) Math.min(remaining, bufferSize)); + remaining -= read; + bos.write(buffer, 0, read); + } + return bos.toByteArray(); + } + + private void includePath(String containerPath, String subdir, + ArchiveOutputStream archiveOutputStream) throws IOException { + + for (Path path : Files.list(Paths.get(containerPath)) + .collect(Collectors.toList())) { + + includeFile(path.toFile(), subdir + "/" + path.getFileName(), + archiveOutputStream); + } + } + + private void includeFile(File file, String entryName, + ArchiveOutputStream archiveOutputStream) throws IOException { + ArchiveEntry archiveEntry = + archiveOutputStream.createArchiveEntry(file, entryName); + archiveOutputStream.putArchiveEntry(archiveEntry); + try (FileInputStream fis = new FileInputStream(file)) { + IOUtils.copy(fis, archiveOutputStream); + } + archiveOutputStream.closeArchiveEntry(); + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ChunkUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java similarity index 73% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ChunkUtils.java rename to hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java index eba85947cfe..62e328eac95 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ChunkUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java @@ -15,18 +15,29 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.ozone.container.common.helpers; + +package org.apache.hadoop.ozone.container.keyvalue.helpers; import com.google.common.base.Preconditions; -import org.apache.ratis.shaded.com.google.protobuf.ByteString; import org.apache.commons.codec.binary.Hex; import org.apache.commons.codec.digest.DigestUtils; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ReadChunkResponseProto; import org.apache.hadoop.hdds.scm.container.common.helpers .StorageContainerException; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.container.common.impl.ChunkManagerImpl; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.keyvalue.impl.ChunkManagerImpl; +import org.apache.ratis.shaded.com.google.protobuf.ByteString; +import org.apache.hadoop.ozone.container.common.volume.VolumeIOStats; +import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,121 +51,15 @@ import java.security.NoSuchAlgorithmException; import java.util.concurrent.ExecutionException; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.CHECKSUM_MISMATCH; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.CONTAINER_INTERNAL_ERROR; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.CONTAINER_NOT_FOUND; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.INVALID_WRITE_SIZE; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.IO_EXCEPTION; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.OVERWRITE_FLAG_REQUIRED; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.UNABLE_TO_FIND_CHUNK; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .Result.UNABLE_TO_FIND_DATA_DIR; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.*; /** - * Set of utility functions used by the chunk Manager. + * Utility methods for chunk operations for KeyValue container. */ public final class ChunkUtils { - /* Never constructed. */ + /** Never constructed. **/ private ChunkUtils() { - } - - /** - * Checks if we are getting a request to overwrite an existing range of - * chunk. - * - * @param chunkFile - File - * @param chunkInfo - Buffer to write - * @return bool - */ - public static boolean isOverWriteRequested(File chunkFile, ChunkInfo - chunkInfo) { - - if (!chunkFile.exists()) { - return false; - } - - long offset = chunkInfo.getOffset(); - return offset < chunkFile.length(); - } - - /** - * Overwrite is permitted if an only if the user explicitly asks for it. We - * permit this iff the key/value pair contains a flag called - * [OverWriteRequested, true]. - * - * @param chunkInfo - Chunk info - * @return true if the user asks for it. - */ - public static boolean isOverWritePermitted(ChunkInfo chunkInfo) { - String overWrite = chunkInfo.getMetadata().get(OzoneConsts.CHUNK_OVERWRITE); - return (overWrite != null) && - (!overWrite.isEmpty()) && - (Boolean.valueOf(overWrite)); - } - - /** - * Validates chunk data and returns a file object to Chunk File that we are - * expected to write data to. - * - * @param data - container data. - * @param info - chunk info. - * @return File - * @throws StorageContainerException - */ - public static File validateChunk(ContainerData data, - ChunkInfo info) throws StorageContainerException { - - Logger log = LoggerFactory.getLogger(ChunkManagerImpl.class); - - File chunkFile = getChunkFile(data, info); - if (ChunkUtils.isOverWriteRequested(chunkFile, info)) { - if (!ChunkUtils.isOverWritePermitted(info)) { - log.error("Rejecting write chunk request. Chunk overwrite " + - "without explicit request. {}", info.toString()); - throw new StorageContainerException("Rejecting write chunk request. " + - "OverWrite flag required." + info.toString(), - OVERWRITE_FLAG_REQUIRED); - } - } - return chunkFile; - } - - /** - * Validates that Path to chunk file exists. - * - * @param data - Container Data - * @param info - Chunk info - * @return - File. - * @throws StorageContainerException - */ - public static File getChunkFile(ContainerData data, - ChunkInfo info) throws StorageContainerException { - - Preconditions.checkNotNull(data, "Container data can't be null"); - Logger log = LoggerFactory.getLogger(ChunkManagerImpl.class); - if (data.getContainerID() < 0) { - log.error("Invalid container id: {}", data.getContainerID()); - throw new StorageContainerException("Unable to find the container id:" + - " " + - data.getContainerID(), CONTAINER_NOT_FOUND); - } - - File dataDir = ContainerUtils.getDataDirectory(data).toFile(); - if (!dataDir.exists()) { - log.error("Unable to find the data directory: {}", dataDir); - throw new StorageContainerException("Unable to find the data directory:" + - " " + dataDir, UNABLE_TO_FIND_DATA_DIR); - } - - return dataDir.toPath().resolve(info.getChunkName()).toFile(); } @@ -164,10 +69,11 @@ public static File getChunkFile(ContainerData data, * @param chunkFile - File to write data to. * @param chunkInfo - Data stream to write. * @param data - The data buffer. + * @param volumeIOStats * @throws StorageContainerException */ public static void writeData(File chunkFile, ChunkInfo chunkInfo, - byte[] data) throws + byte[] data, VolumeIOStats volumeIOStats) throws StorageContainerException, ExecutionException, InterruptedException, NoSuchAlgorithmException { @@ -184,6 +90,12 @@ public static void writeData(File chunkFile, ChunkInfo chunkInfo, FileLock lock = null; try { + if (chunkInfo.getChecksum() != null && + !chunkInfo.getChecksum().isEmpty()) { + verifyChecksum(chunkInfo, data, log); + } + + long writeTimeStart = Time.monotonicNow(); file = AsynchronousFileChannel.open(chunkFile.toPath(), StandardOpenOption.CREATE, @@ -191,18 +103,20 @@ public static void writeData(File chunkFile, ChunkInfo chunkInfo, StandardOpenOption.SPARSE, StandardOpenOption.SYNC); lock = file.lock().get(); - if (chunkInfo.getChecksum() != null && - !chunkInfo.getChecksum().isEmpty()) { - verifyChecksum(chunkInfo, data, log); - } int size = file.write(ByteBuffer.wrap(data), chunkInfo.getOffset()).get(); + // Increment volumeIO stats here. + volumeIOStats.incWriteTime(Time.monotonicNow() - writeTimeStart); + volumeIOStats.incWriteOpCount(); + volumeIOStats.incWriteBytes(size); if (size != data.length) { log.error("Invalid write size found. Size:{} Expected: {} ", size, data.length); throw new StorageContainerException("Invalid write size found. " + "Size: " + size + " Expected: " + data.length, INVALID_WRITE_SIZE); } - } catch (IOException e) { + } catch (StorageContainerException ex) { + throw ex; + } catch(IOException e) { throw new StorageContainerException(e, IO_EXCEPTION); } finally { @@ -226,40 +140,20 @@ public static void writeData(File chunkFile, ChunkInfo chunkInfo, } } - /** - * Verifies the checksum of a chunk against the data buffer. - * - * @param chunkInfo - Chunk Info. - * @param data - data buffer - * @param log - log - * @throws NoSuchAlgorithmException - * @throws StorageContainerException - */ - private static void verifyChecksum(ChunkInfo chunkInfo, byte[] data, Logger - log) throws NoSuchAlgorithmException, StorageContainerException { - MessageDigest sha = MessageDigest.getInstance(OzoneConsts.FILE_HASH); - sha.update(data); - if (!Hex.encodeHexString(sha.digest()).equals( - chunkInfo.getChecksum())) { - log.error("Checksum mismatch. Provided: {} , computed: {}", - chunkInfo.getChecksum(), DigestUtils.sha256Hex(sha.digest())); - throw new StorageContainerException("Checksum mismatch. Provided: " + - chunkInfo.getChecksum() + " , computed: " + - DigestUtils.sha256Hex(sha.digest()), CHECKSUM_MISMATCH); - } - } - /** * Reads data from an existing chunk file. * * @param chunkFile - file where data lives. * @param data - chunk definition. + * @param volumeIOStats * @return ByteBuffer * @throws StorageContainerException * @throws ExecutionException * @throws InterruptedException */ - public static ByteBuffer readData(File chunkFile, ChunkInfo data) throws + public static ByteBuffer readData(File chunkFile, ChunkInfo data, + VolumeIOStats volumeIOStats) + throws StorageContainerException, ExecutionException, InterruptedException, NoSuchAlgorithmException { Logger log = LoggerFactory.getLogger(ChunkManagerImpl.class); @@ -275,6 +169,7 @@ public static ByteBuffer readData(File chunkFile, ChunkInfo data) throws AsynchronousFileChannel file = null; FileLock lock = null; try { + long readStartTime = Time.monotonicNow(); file = AsynchronousFileChannel.open(chunkFile.toPath(), StandardOpenOption.READ); @@ -283,10 +178,13 @@ public static ByteBuffer readData(File chunkFile, ChunkInfo data) throws ByteBuffer buf = ByteBuffer.allocate((int) data.getLen()); file.read(buf, data.getOffset()).get(); + // Increment volumeIO stats here. + volumeIOStats.incReadTime(Time.monotonicNow() - readStartTime); + volumeIOStats.incReadOpCount(); + volumeIOStats.incReadBytes(data.getLen()); if (data.getChecksum() != null && !data.getChecksum().isEmpty()) { verifyChecksum(data, buf.array(), log); } - return buf; } catch (IOException e) { throw new StorageContainerException(e, IO_EXCEPTION); @@ -304,6 +202,121 @@ public static ByteBuffer readData(File chunkFile, ChunkInfo data) throws } } + /** + * Verifies the checksum of a chunk against the data buffer. + * + * @param chunkInfo - Chunk Info. + * @param data - data buffer + * @param log - log + * @throws NoSuchAlgorithmException + * @throws StorageContainerException + */ + private static void verifyChecksum(ChunkInfo chunkInfo, byte[] data, Logger + log) throws NoSuchAlgorithmException, StorageContainerException { + MessageDigest sha = MessageDigest.getInstance(OzoneConsts.FILE_HASH); + sha.update(data); + if (!Hex.encodeHexString(sha.digest()).equals( + chunkInfo.getChecksum())) { + log.error("Checksum mismatch. Provided: {} , computed: {}", + chunkInfo.getChecksum(), DigestUtils.sha256Hex(sha.digest())); + throw new StorageContainerException("Checksum mismatch. Provided: " + + chunkInfo.getChecksum() + " , computed: " + + DigestUtils.sha256Hex(sha.digest()), CHECKSUM_MISMATCH); + } + } + + /** + * Validates chunk data and returns a file object to Chunk File that we are + * expected to write data to. + * + * @param chunkFile - chunkFile to write data into. + * @param info - chunk info. + * @return boolean isOverwrite + * @throws StorageContainerException + */ + public static boolean validateChunkForOverwrite(File chunkFile, + ChunkInfo info) throws StorageContainerException { + + Logger log = LoggerFactory.getLogger(ChunkManagerImpl.class); + + if (isOverWriteRequested(chunkFile, info)) { + if (!isOverWritePermitted(info)) { + log.error("Rejecting write chunk request. Chunk overwrite " + + "without explicit request. {}", info.toString()); + throw new StorageContainerException("Rejecting write chunk request. " + + "OverWrite flag required." + info.toString(), + OVERWRITE_FLAG_REQUIRED); + } + return true; + } + return false; + } + + /** + * Validates that Path to chunk file exists. + * + * @param containerData - Container Data + * @param info - Chunk info + * @return - File. + * @throws StorageContainerException + */ + public static File getChunkFile(KeyValueContainerData containerData, + ChunkInfo info) throws + StorageContainerException { + + Preconditions.checkNotNull(containerData, "Container data can't be null"); + Logger log = LoggerFactory.getLogger(ChunkManagerImpl.class); + + String chunksPath = containerData.getChunksPath(); + if (chunksPath == null) { + log.error("Chunks path is null in the container data"); + throw new StorageContainerException("Unable to get Chunks directory.", + UNABLE_TO_FIND_DATA_DIR); + } + File chunksLoc = new File(chunksPath); + if (!chunksLoc.exists()) { + log.error("Chunks path does not exist"); + throw new StorageContainerException("Unable to get Chunks directory.", + UNABLE_TO_FIND_DATA_DIR); + } + + return chunksLoc.toPath().resolve(info.getChunkName()).toFile(); + } + + /** + * Checks if we are getting a request to overwrite an existing range of + * chunk. + * + * @param chunkFile - File + * @param chunkInfo - Buffer to write + * @return bool + */ + public static boolean isOverWriteRequested(File chunkFile, ChunkInfo + chunkInfo) { + + if (!chunkFile.exists()) { + return false; + } + + long offset = chunkInfo.getOffset(); + return offset < chunkFile.length(); + } + + /** + * Overwrite is permitted if an only if the user explicitly asks for it. We + * permit this iff the key/value pair contains a flag called + * [OverWriteRequested, true]. + * + * @param chunkInfo - Chunk info + * @return true if the user asks for it. + */ + public static boolean isOverWritePermitted(ChunkInfo chunkInfo) { + String overWrite = chunkInfo.getMetadata().get(OzoneConsts.CHUNK_OVERWRITE); + return (overWrite != null) && + (!overWrite.isEmpty()) && + (Boolean.valueOf(overWrite)); + } + /** * Returns a CreateContainer Response. This call is used by create and delete * containers which have null success responses. @@ -311,9 +324,9 @@ public static ByteBuffer readData(File chunkFile, ChunkInfo data) throws * @param msg Request * @return Response. */ - public static ContainerProtos.ContainerCommandResponseProto - getChunkResponse(ContainerProtos.ContainerCommandRequestProto msg) { - return ContainerUtils.getContainerResponse(msg); + public static ContainerCommandResponseProto getChunkResponseSuccess( + ContainerCommandRequestProto msg) { + return ContainerUtils.getSuccessResponse(msg); } /** @@ -324,20 +337,20 @@ public static ByteBuffer readData(File chunkFile, ChunkInfo data) throws * @param info - Info * @return Response. */ - public static ContainerProtos.ContainerCommandResponseProto - getReadChunkResponse(ContainerProtos.ContainerCommandRequestProto msg, - byte[] data, ChunkInfo info) { + public static ContainerCommandResponseProto getReadChunkResponse( + ContainerCommandRequestProto msg, byte[] data, ChunkInfo info) { Preconditions.checkNotNull(msg); + Preconditions.checkNotNull(data, "Chunk data is null"); + Preconditions.checkNotNull(info, "Chunk Info is null"); - ContainerProtos.ReadChunkResponseProto.Builder response = - ContainerProtos.ReadChunkResponseProto.newBuilder(); + ReadChunkResponseProto.Builder response = + ReadChunkResponseProto.newBuilder(); response.setChunkData(info.getProtoBufMessage()); response.setData(ByteString.copyFrom(data)); response.setBlockID(msg.getReadChunk().getBlockID()); - ContainerProtos.ContainerCommandResponseProto.Builder builder = - ContainerUtils.getContainerResponse(msg, ContainerProtos.Result - .SUCCESS, ""); + ContainerCommandResponseProto.Builder builder = + ContainerUtils.getSuccessResponseBuilder(msg); builder.setReadChunk(response); return builder.build(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyUtils.java new file mode 100644 index 00000000000..a83d298d779 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyUtils.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue.helpers; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .GetKeyResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos. + GetCommittedBlockLengthResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos. + PutKeyResponseProto; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.helpers.KeyData; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.common.utils.ContainerCache; +import org.apache.hadoop.utils.MetadataStore; + +import java.io.IOException; + +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.NO_SUCH_KEY; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.UNABLE_TO_READ_METADATA_DB; + +/** + * Utils functions to help key functions. + */ +public final class KeyUtils { + + /** Never constructed. **/ + private KeyUtils() { + + } + /** + * Get a DB handler for a given container. + * If the handler doesn't exist in cache yet, first create one and + * add into cache. This function is called with containerManager + * ReadLock held. + * + * @param containerData containerData. + * @param conf configuration. + * @return MetadataStore handle. + * @throws StorageContainerException + */ + public static MetadataStore getDB(KeyValueContainerData containerData, + Configuration conf) throws + StorageContainerException { + Preconditions.checkNotNull(containerData); + ContainerCache cache = ContainerCache.getInstance(conf); + Preconditions.checkNotNull(cache); + Preconditions.checkNotNull(containerData.getDbFile()); + try { + return cache.getDB(containerData.getContainerID(), containerData + .getContainerDBType(), containerData.getDbFile().getAbsolutePath()); + } catch (IOException ex) { + String message = String.format("Error opening DB. Container:%s " + + "ContainerPath:%s", containerData.getContainerID(), containerData + .getDbFile().getPath()); + throw new StorageContainerException(message, UNABLE_TO_READ_METADATA_DB); + } + } + /** + * Remove a DB handler from cache. + * + * @param container - Container data. + * @param conf - Configuration. + */ + public static void removeDB(KeyValueContainerData container, Configuration + conf) { + Preconditions.checkNotNull(container); + ContainerCache cache = ContainerCache.getInstance(conf); + Preconditions.checkNotNull(cache); + cache.removeDB(container.getContainerID()); + } + + /** + * Shutdown all DB Handles. + * + * @param cache - Cache for DB Handles. + */ + @SuppressWarnings("unchecked") + public static void shutdownCache(ContainerCache cache) { + cache.shutdownCache(); + } + + /** + * Parses the {@link KeyData} from a bytes array. + * + * @param bytes key data in bytes. + * @return key data. + * @throws IOException if the bytes array is malformed or invalid. + */ + public static KeyData getKeyData(byte[] bytes) throws IOException { + try { + ContainerProtos.KeyData keyData = ContainerProtos.KeyData.parseFrom( + bytes); + KeyData data = KeyData.getFromProtoBuf(keyData); + return data; + } catch (IOException e) { + throw new StorageContainerException("Failed to parse key data from the" + + " bytes array.", NO_SUCH_KEY); + } + } + + /** + * Returns putKey response success. + * @param msg - Request. + * @return Response. + */ + public static ContainerCommandResponseProto putKeyResponseSuccess( + ContainerCommandRequestProto msg, long blockLength) { + GetCommittedBlockLengthResponseProto.Builder + committedBlockLengthResponseBuilder = + getCommittedBlockLengthResponseBuilder(blockLength, + msg.getPutKey().getKeyData().getBlockID()); + PutKeyResponseProto.Builder putKeyResponse = + PutKeyResponseProto.newBuilder(); + putKeyResponse + .setCommittedBlockLength(committedBlockLengthResponseBuilder); + ContainerProtos.ContainerCommandResponseProto.Builder builder = + ContainerUtils.getSuccessResponseBuilder(msg); + builder.setPutKey(putKeyResponse); + return builder.build(); + } + /** + * Returns successful keyResponse. + * @param msg - Request. + * @return Response. + */ + public static ContainerCommandResponseProto getKeyResponseSuccess( + ContainerCommandRequestProto msg) { + return ContainerUtils.getSuccessResponse(msg); + } + + + public static ContainerCommandResponseProto getKeyDataResponse( + ContainerCommandRequestProto msg, KeyData data) { + GetKeyResponseProto.Builder getKey = ContainerProtos + .GetKeyResponseProto + .newBuilder(); + getKey.setKeyData(data.getProtoBufMessage()); + ContainerProtos.ContainerCommandResponseProto.Builder builder = + ContainerUtils.getSuccessResponseBuilder(msg); + builder.setGetKey(getKey); + return builder.build(); + } + + /** + * Returns successful getCommittedBlockLength Response. + * @param msg - Request. + * @return Response. + */ + public static ContainerCommandResponseProto getBlockLengthResponse( + ContainerCommandRequestProto msg, long blockLength) { + GetCommittedBlockLengthResponseProto.Builder + committedBlockLengthResponseBuilder = + getCommittedBlockLengthResponseBuilder(blockLength, + msg.getGetCommittedBlockLength().getBlockID()); + ContainerProtos.ContainerCommandResponseProto.Builder builder = + ContainerUtils.getSuccessResponseBuilder(msg); + builder.setGetCommittedBlockLength(committedBlockLengthResponseBuilder); + return builder.build(); + } + + private static GetCommittedBlockLengthResponseProto.Builder + getCommittedBlockLengthResponseBuilder( + long blockLength, ContainerProtos.DatanodeBlockID blockID) { + ContainerProtos.GetCommittedBlockLengthResponseProto.Builder + getCommittedBlockLengthResponseBuilder = ContainerProtos. + GetCommittedBlockLengthResponseProto.newBuilder(); + getCommittedBlockLengthResponseBuilder.setBlockLength(blockLength); + getCommittedBlockLengthResponseBuilder.setBlockID(blockID); + return getCommittedBlockLengthResponseBuilder; + } +} \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java new file mode 100644 index 00000000000..0a81ed8449e --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerLocationUtil.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.keyvalue.helpers; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.common.Storage; + +import java.io.File; + +/** + * Class which provides utility methods for container locations. + */ +public final class KeyValueContainerLocationUtil { + + /* Never constructed. */ + private KeyValueContainerLocationUtil() { + + } + /** + * Returns Container Metadata Location. + * @param hddsVolumeDir base dir of the hdds volume where scm directories + * are stored + * @param scmId + * @param containerId + * @return containerMetadata Path to container metadata location where + * .container file will be stored. + */ + public static File getContainerMetaDataPath(String hddsVolumeDir, String scmId, + long containerId) { + String containerMetaDataPath = getBaseContainerLocation(hddsVolumeDir, scmId, + containerId); + containerMetaDataPath = containerMetaDataPath + File.separator + + OzoneConsts.CONTAINER_META_PATH; + return new File(containerMetaDataPath); + } + + + /** + * Returns Container Chunks Location. + * @param baseDir + * @param scmId + * @param containerId + * @return chunksPath + */ + public static File getChunksLocationPath(String baseDir, String scmId, + long containerId) { + String chunksPath = getBaseContainerLocation(baseDir, scmId, containerId) + + File.separator + OzoneConsts.STORAGE_DIR_CHUNKS; + return new File(chunksPath); + } + + /** + * Returns base directory for specified container. + * @param hddsVolumeDir + * @param scmId + * @param containerId + * @return base directory for container. + */ + private static String getBaseContainerLocation(String hddsVolumeDir, String scmId, + long containerId) { + Preconditions.checkNotNull(hddsVolumeDir, "Base Directory cannot be null"); + Preconditions.checkNotNull(scmId, "scmUuid cannot be null"); + Preconditions.checkState(containerId >= 0, + "Container Id cannot be negative."); + + String containerSubDirectory = getContainerSubDirectory(containerId); + + String containerMetaDataPath = hddsVolumeDir + File.separator + scmId + + File.separator + Storage.STORAGE_DIR_CURRENT + File.separator + + containerSubDirectory + File.separator + containerId; + + return containerMetaDataPath; + } + + /** + * Returns subdirectory, where this container needs to be placed. + * @param containerId + * @return container sub directory + */ + private static String getContainerSubDirectory(long containerId){ + int directory = (int) ((containerId >> 9) & 0xFF); + return Storage.CONTAINER_DIR + directory; + } + + /** + * Return containerDB File. + */ + public static File getContainerDBFile(File containerMetaDataPath, + long containerID) { + return new File(containerMetaDataPath, containerID + OzoneConsts + .DN_CONTAINER_DB); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java new file mode 100644 index 00000000000..ed4536f6248 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.keyvalue.helpers; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandResponseProto; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.helpers.KeyData; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.utils.MetadataKeyFilters; +import org.apache.hadoop.utils.MetadataStore; +import org.apache.hadoop.utils.MetadataStoreBuilder; + +import com.google.common.base.Preconditions; +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class which defines utility methods for KeyValueContainer. + */ + +public final class KeyValueContainerUtil { + + /* Never constructed. */ + private KeyValueContainerUtil() { + + } + + private static final Logger LOG = LoggerFactory.getLogger( + KeyValueContainerUtil.class); + + /** + * creates metadata path, chunks path and metadata DB for the specified + * container. + * + * @param containerMetaDataPath + * @throws IOException + */ + public static void createContainerMetaData(File containerMetaDataPath, File + chunksPath, File dbFile, Configuration conf) throws IOException { + Preconditions.checkNotNull(containerMetaDataPath); + Preconditions.checkNotNull(conf); + + if (!containerMetaDataPath.mkdirs()) { + LOG.error("Unable to create directory for metadata storage. Path: {}", + containerMetaDataPath); + throw new IOException("Unable to create directory for metadata storage." + + " Path: " + containerMetaDataPath); + } + MetadataStore store = MetadataStoreBuilder.newBuilder().setConf(conf) + .setCreateIfMissing(true).setDbFile(dbFile).build(); + + // we close since the SCM pre-creates containers. + // we will open and put Db handle into a cache when keys are being created + // in a container. + + store.close(); + + if (!chunksPath.mkdirs()) { + LOG.error("Unable to create chunks directory Container {}", + chunksPath); + //clean up container metadata path and metadata db + FileUtils.deleteDirectory(containerMetaDataPath); + FileUtils.deleteDirectory(containerMetaDataPath.getParentFile()); + throw new IOException("Unable to create directory for data storage." + + " Path: " + chunksPath); + } + } + + /** + * remove Container if it is empty. + *

+ * There are three things we need to delete. + *

+ * 1. Container file and metadata file. 2. The Level DB file 3. The path that + * we created on the data location. + * + * @param containerData - Data of the container to remove. + * @param conf - configuration of the cluster. + * @param forceDelete - whether this container should be deleted forcibly. + * @throws IOException + */ + public static void removeContainer(KeyValueContainerData containerData, + Configuration conf, boolean forceDelete) + throws IOException { + Preconditions.checkNotNull(containerData); + File containerMetaDataPath = new File(containerData + .getMetadataPath()); + File chunksPath = new File(containerData.getChunksPath()); + + // Close the DB connection and remove the DB handler from cache + KeyUtils.removeDB(containerData, conf); + + // Delete the Container MetaData path. + FileUtils.deleteDirectory(containerMetaDataPath); + + //Delete the Container Chunks Path. + FileUtils.deleteDirectory(chunksPath); + + //Delete Container directory + FileUtils.deleteDirectory(containerMetaDataPath.getParentFile()); + } + + /** + * Returns a ReadContainer Response. + * + * @param request Request + * @param containerData - data + * @return Response. + */ + public static ContainerCommandResponseProto getReadContainerResponse( + ContainerCommandRequestProto request, + KeyValueContainerData containerData) { + Preconditions.checkNotNull(containerData); + + ContainerProtos.ReadContainerResponseProto.Builder response = + ContainerProtos.ReadContainerResponseProto.newBuilder(); + response.setContainerData(containerData.getProtoBufMessage()); + + ContainerCommandResponseProto.Builder builder = + ContainerUtils.getSuccessResponseBuilder(request); + builder.setReadContainer(response); + return builder.build(); + } + + /** + * Parse KeyValueContainerData and verify checksum. + * @param kvContainerData + * @param config + * @throws IOException + */ + public static void parseKVContainerData(KeyValueContainerData kvContainerData, + Configuration config) throws IOException { + + long containerID = kvContainerData.getContainerID(); + File metadataPath = new File(kvContainerData.getMetadataPath()); + + // Verify Checksum + ContainerUtils.verifyChecksum(kvContainerData); + + File dbFile = KeyValueContainerLocationUtil.getContainerDBFile( + metadataPath, containerID); + if (!dbFile.exists()) { + LOG.error("Container DB file is missing for ContainerID {}. " + + "Skipping loading of this container.", containerID); + // Don't further process this container, as it is missing db file. + return; + } + kvContainerData.setDbFile(dbFile); + + MetadataStore metadata = KeyUtils.getDB(kvContainerData, config); + long bytesUsed = 0; + List> liveKeys = metadata + .getRangeKVs(null, Integer.MAX_VALUE, + MetadataKeyFilters.getNormalKeyFilter()); + bytesUsed = liveKeys.parallelStream().mapToLong(e-> { + KeyData keyData; + try { + keyData = KeyUtils.getKeyData(e.getValue()); + return keyData.getSize(); + } catch (IOException ex) { + return 0L; + } + }).sum(); + kvContainerData.setBytesUsed(bytesUsed); + kvContainerData.setKeyCount(liveKeys.size()); + } + + /** + * Returns the path where data or chunks live for a given container. + * + * @param kvContainerData - KeyValueContainerData + * @return - Path to the chunks directory + */ + public static Path getDataDirectory(KeyValueContainerData kvContainerData) { + + String chunksPath = kvContainerData.getChunksPath(); + Preconditions.checkNotNull(chunksPath); + + return Paths.get(chunksPath); + } + + /** + * Container metadata directory -- here is where the level DB and + * .container file lives. + * + * @param kvContainerData - KeyValueContainerData + * @return Path to the metadata directory + */ + public static Path getMetadataDirectory( + KeyValueContainerData kvContainerData) { + + String metadataPath = kvContainerData.getMetadataPath(); + Preconditions.checkNotNull(metadataPath); + + return Paths.get(metadataPath); + + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/FileUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/SmallFileUtils.java similarity index 71% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/FileUtils.java rename to hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/SmallFileUtils.java index a2875befc63..df60c605bdd 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/FileUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/SmallFileUtils.java @@ -15,21 +15,27 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.ozone.container.common.helpers; +package org.apache.hadoop.ozone.container.keyvalue.helpers; import com.google.common.base.Preconditions; -import org.apache.ratis.shaded.com.google.protobuf.ByteString; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.ratis.shaded.com.google.protobuf.ByteString; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandResponseProto; /** * File Utils are helper routines used by putSmallFile and getSmallFile * RPCs. */ -public final class FileUtils { +public final class SmallFileUtils { /** * Never Constructed. */ - private FileUtils() { + private SmallFileUtils() { } /** @@ -37,13 +43,12 @@ private FileUtils() { * @param msg - ContainerCommandRequestProto * @return - ContainerCommandResponseProto */ - public static ContainerProtos.ContainerCommandResponseProto - getPutFileResponse(ContainerProtos.ContainerCommandRequestProto msg) { + public static ContainerCommandResponseProto getPutFileResponseSuccess( + ContainerCommandRequestProto msg) { ContainerProtos.PutSmallFileResponseProto.Builder getResponse = ContainerProtos.PutSmallFileResponseProto.newBuilder(); - ContainerProtos.ContainerCommandResponseProto.Builder builder = - ContainerUtils.getContainerResponse(msg, ContainerProtos.Result - .SUCCESS, ""); + ContainerCommandResponseProto.Builder builder = + ContainerUtils.getSuccessResponseBuilder(msg); builder.setCmdType(ContainerProtos.Type.PutSmallFile); builder.setPutSmallFile(getResponse); return builder.build(); @@ -56,24 +61,21 @@ private FileUtils() { * @param info - Info * @return Response. */ - public static ContainerProtos.ContainerCommandResponseProto - getGetSmallFileResponse(ContainerProtos.ContainerCommandRequestProto msg, - byte[] data, ChunkInfo info) { + public static ContainerCommandResponseProto getGetSmallFileResponseSuccess( + ContainerCommandRequestProto msg, byte[] data, ChunkInfo info) { Preconditions.checkNotNull(msg); ContainerProtos.ReadChunkResponseProto.Builder readChunkresponse = ContainerProtos.ReadChunkResponseProto.newBuilder(); readChunkresponse.setChunkData(info.getProtoBufMessage()); readChunkresponse.setData(ByteString.copyFrom(data)); - readChunkresponse.setBlockID(msg.getGetSmallFile().getKey(). - getKeyData().getBlockID()); + readChunkresponse.setBlockID(msg.getGetSmallFile().getKey().getBlockID()); ContainerProtos.GetSmallFileResponseProto.Builder getSmallFile = ContainerProtos.GetSmallFileResponseProto.newBuilder(); getSmallFile.setData(readChunkresponse.build()); - ContainerProtos.ContainerCommandResponseProto.Builder builder = - ContainerUtils.getContainerResponse(msg, ContainerProtos.Result - .SUCCESS, ""); + ContainerCommandResponseProto.Builder builder = + ContainerUtils.getSuccessResponseBuilder(msg); builder.setCmdType(ContainerProtos.Type.GetSmallFile); builder.setGetSmallFile(getSmallFile); return builder.build(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/package-info.java new file mode 100644 index 00000000000..041f485deae --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.keyvalue.helpers; +/** + This package contains utility classes for KeyValue container type. + **/ \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java new file mode 100644 index 00000000000..ce317bd3edc --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue.impl; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.VolumeIOStats; +import org.apache.hadoop.ozone.container.keyvalue.helpers.ChunkUtils; +import org.apache.hadoop.ozone.container.common.impl.ChunkLayOutVersion; +import org.apache.hadoop.ozone.container.keyvalue.interfaces.ChunkManager; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.security.NoSuchAlgorithmException; +import java.util.concurrent.ExecutionException; + +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.CONTAINER_INTERNAL_ERROR; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .Result.NO_SUCH_ALGORITHM; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNSUPPORTED_REQUEST; + +/** + * This class is for performing chunk related operations. + */ +public class ChunkManagerImpl implements ChunkManager { + static final Logger LOG = LoggerFactory.getLogger(ChunkManagerImpl.class); + + /** + * writes a given chunk. + * + * @param container - Container for the chunk + * @param blockID - ID of the block + * @param info - ChunkInfo + * @param data - data of the chunk + * @param stage - Stage of the Chunk operation + * @throws StorageContainerException + */ + public void writeChunk(Container container, BlockID blockID, ChunkInfo info, + byte[] data, ContainerProtos.Stage stage) + throws StorageContainerException { + + try { + + KeyValueContainerData containerData = (KeyValueContainerData) container + .getContainerData(); + HddsVolume volume = containerData.getVolume(); + VolumeIOStats volumeIOStats = volume.getVolumeIOStats(); + + File chunkFile = ChunkUtils.getChunkFile(containerData, info); + + boolean isOverwrite = ChunkUtils.validateChunkForOverwrite( + chunkFile, info); + File tmpChunkFile = getTmpChunkFile(chunkFile, info); + + LOG.debug("writing chunk:{} chunk stage:{} chunk file:{} tmp chunk file", + info.getChunkName(), stage, chunkFile, tmpChunkFile); + + switch (stage) { + case WRITE_DATA: + // Initially writes to temporary chunk file. + ChunkUtils.writeData(tmpChunkFile, info, data, volumeIOStats); + // No need to increment container stats here, as still data is not + // committed here. + break; + case COMMIT_DATA: + // commit the data, means move chunk data from temporary chunk file + // to actual chunk file. + commitChunk(tmpChunkFile, chunkFile); + // Increment container stats here, as we commit the data. + containerData.incrBytesUsed(info.getLen()); + containerData.incrWriteCount(); + containerData.incrWriteBytes(info.getLen()); + break; + case COMBINED: + // directly write to the chunk file + ChunkUtils.writeData(chunkFile, info, data, volumeIOStats); + if (!isOverwrite) { + containerData.incrBytesUsed(info.getLen()); + } + containerData.incrWriteCount(); + containerData.incrWriteBytes(info.getLen()); + break; + default: + throw new IOException("Can not identify write operation."); + } + } catch (StorageContainerException ex) { + throw ex; + } catch (NoSuchAlgorithmException ex) { + LOG.error("write data failed. error: {}", ex); + throw new StorageContainerException("Internal error: ", ex, + NO_SUCH_ALGORITHM); + } catch (ExecutionException | IOException ex) { + LOG.error("write data failed. error: {}", ex); + throw new StorageContainerException("Internal error: ", ex, + CONTAINER_INTERNAL_ERROR); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.error("write data failed. error: {}", e); + throw new StorageContainerException("Internal error: ", e, + CONTAINER_INTERNAL_ERROR); + } + } + + /** + * reads the data defined by a chunk. + * + * @param container - Container for the chunk + * @param blockID - ID of the block. + * @param info - ChunkInfo. + * @return byte array + * @throws StorageContainerException + * TODO: Right now we do not support partial reads and writes of chunks. + * TODO: Explore if we need to do that for ozone. + */ + public byte[] readChunk(Container container, BlockID blockID, ChunkInfo info) + throws StorageContainerException { + try { + KeyValueContainerData containerData = (KeyValueContainerData) container + .getContainerData(); + ByteBuffer data; + HddsVolume volume = containerData.getVolume(); + VolumeIOStats volumeIOStats = volume.getVolumeIOStats(); + + // Checking here, which layout version the container is, and reading + // the chunk file in that format. + // In version1, we verify checksum if it is available and return data + // of the chunk file. + if (containerData.getLayOutVersion() == ChunkLayOutVersion + .getLatestVersion().getVersion()) { + File chunkFile = ChunkUtils.getChunkFile(containerData, info); + data = ChunkUtils.readData(chunkFile, info, volumeIOStats); + containerData.incrReadCount(); + long length = chunkFile.length(); + containerData.incrReadBytes(length); + return data.array(); + } + } catch(NoSuchAlgorithmException ex) { + LOG.error("read data failed. error: {}", ex); + throw new StorageContainerException("Internal error: ", + ex, NO_SUCH_ALGORITHM); + } catch (ExecutionException ex) { + LOG.error("read data failed. error: {}", ex); + throw new StorageContainerException("Internal error: ", + ex, CONTAINER_INTERNAL_ERROR); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.error("read data failed. error: {}", e); + throw new StorageContainerException("Internal error: ", + e, CONTAINER_INTERNAL_ERROR); + } + return null; + } + + /** + * Deletes a given chunk. + * + * @param container - Container for the chunk + * @param blockID - ID of the block + * @param info - Chunk Info + * @throws StorageContainerException + */ + public void deleteChunk(Container container, BlockID blockID, ChunkInfo info) + throws StorageContainerException { + Preconditions.checkNotNull(blockID, "Block ID cannot be null."); + KeyValueContainerData containerData = (KeyValueContainerData) container + .getContainerData(); + // Checking here, which layout version the container is, and performing + // deleting chunk operation. + // In version1, we have only chunk file. + if (containerData.getLayOutVersion() == ChunkLayOutVersion + .getLatestVersion().getVersion()) { + File chunkFile = ChunkUtils.getChunkFile(containerData, info); + if ((info.getOffset() == 0) && (info.getLen() == chunkFile.length())) { + FileUtil.fullyDelete(chunkFile); + containerData.decrBytesUsed(chunkFile.length()); + } else { + LOG.error("Not Supported Operation. Trying to delete a " + + "chunk that is in shared file. chunk info : " + info.toString()); + throw new StorageContainerException("Not Supported Operation. " + + "Trying to delete a chunk that is in shared file. chunk info : " + + info.toString(), UNSUPPORTED_REQUEST); + } + } + } + + /** + * Shutdown the chunkManager. + * + * In the chunkManager we haven't acquired any resources, so nothing to do + * here. + */ + + public void shutdown() { + //TODO: need to revisit this during integration of container IO. + } + + /** + * Returns the temporary chunkFile path. + * @param chunkFile + * @param info + * @return temporary chunkFile path + * @throws StorageContainerException + */ + private File getTmpChunkFile(File chunkFile, ChunkInfo info) + throws StorageContainerException { + return new File(chunkFile.getParent(), + chunkFile.getName() + + OzoneConsts.CONTAINER_CHUNK_NAME_DELIMITER + + OzoneConsts.CONTAINER_TEMPORARY_CHUNK_PREFIX); + } + + /** + * Commit the chunk by renaming the temporary chunk file to chunk file. + * @param tmpChunkFile + * @param chunkFile + * @throws IOException + */ + private void commitChunk(File tmpChunkFile, File chunkFile) throws + IOException { + Files.move(tmpChunkFile.toPath(), chunkFile.toPath(), + StandardCopyOption.REPLACE_EXISTING); + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/KeyManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/KeyManagerImpl.java new file mode 100644 index 00000000000..6370f8eca47 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/KeyManagerImpl.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue.impl; + +import com.google.common.base.Preconditions; +import com.google.common.primitives.Longs; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; + +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyUtils; +import org.apache.hadoop.ozone.container.common.helpers.KeyData; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.keyvalue.interfaces.KeyManager; +import org.apache.hadoop.ozone.container.common.utils.ContainerCache; +import org.apache.hadoop.utils.MetadataStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.NO_SUCH_KEY; + +/** + * This class is for performing key related operations on the KeyValue + * Container. + */ +public class KeyManagerImpl implements KeyManager { + + static final Logger LOG = LoggerFactory.getLogger(KeyManagerImpl.class); + + private Configuration config; + + /** + * Constructs a key Manager. + * + * @param conf - Ozone configuration + */ + public KeyManagerImpl(Configuration conf) { + Preconditions.checkNotNull(conf, "Config cannot be null"); + this.config = conf; + } + + /** + * Puts or overwrites a key. + * + * @param container - Container for which key need to be added. + * @param data - Key Data. + * @return length of the key. + * @throws IOException + */ + public long putKey(Container container, KeyData data) throws IOException { + Preconditions.checkNotNull(data, "KeyData cannot be null for put " + + "operation."); + Preconditions.checkState(data.getContainerID() >= 0, "Container Id " + + "cannot be negative"); + // We are not locking the key manager since LevelDb serializes all actions + // against a single DB. We rely on DB level locking to avoid conflicts. + MetadataStore db = KeyUtils.getDB((KeyValueContainerData) container + .getContainerData(), config); + + // This is a post condition that acts as a hint to the user. + // Should never fail. + Preconditions.checkNotNull(db, "DB cannot be null here"); + db.put(Longs.toByteArray(data.getLocalID()), data.getProtoBufMessage() + .toByteArray()); + + // Increment keycount here + container.getContainerData().incrKeyCount(); + return data.getSize(); + } + + /** + * Gets an existing key. + * + * @param container - Container from which key need to be get. + * @param blockID - BlockID of the key. + * @return Key Data. + * @throws IOException + */ + public KeyData getKey(Container container, BlockID blockID) + throws IOException { + Preconditions.checkNotNull(blockID, + "BlockID cannot be null in GetKet request"); + Preconditions.checkNotNull(blockID.getContainerID(), + "Container name cannot be null"); + + KeyValueContainerData containerData = (KeyValueContainerData) container + .getContainerData(); + MetadataStore db = KeyUtils.getDB(containerData, config); + // This is a post condition that acts as a hint to the user. + // Should never fail. + Preconditions.checkNotNull(db, "DB cannot be null here"); + byte[] kData = db.get(Longs.toByteArray(blockID.getLocalID())); + if (kData == null) { + throw new StorageContainerException("Unable to find the key.", + NO_SUCH_KEY); + } + ContainerProtos.KeyData keyData = ContainerProtos.KeyData.parseFrom(kData); + return KeyData.getFromProtoBuf(keyData); + } + + /** + * Returns the length of the committed block. + * + * @param container - Container from which key need to be get. + * @param blockID - BlockID of the key. + * @return length of the block. + * @throws IOException in case, the block key does not exist in db. + */ + @Override + public long getCommittedBlockLength(Container container, BlockID blockID) + throws IOException { + KeyValueContainerData containerData = (KeyValueContainerData) container + .getContainerData(); + MetadataStore db = KeyUtils.getDB(containerData, config); + // This is a post condition that acts as a hint to the user. + // Should never fail. + Preconditions.checkNotNull(db, "DB cannot be null here"); + byte[] kData = db.get(Longs.toByteArray(blockID.getLocalID())); + if (kData == null) { + throw new StorageContainerException("Unable to find the key.", + NO_SUCH_KEY); + } + ContainerProtos.KeyData keyData = ContainerProtos.KeyData.parseFrom(kData); + return keyData.getSize(); + } + + /** + * Deletes an existing Key. + * + * @param container - Container from which key need to be deleted. + * @param blockID - ID of the block. + * @throws StorageContainerException + */ + public void deleteKey(Container container, BlockID blockID) throws + IOException { + Preconditions.checkNotNull(blockID, "block ID cannot be null."); + Preconditions.checkState(blockID.getContainerID() >= 0, + "Container ID cannot be negative."); + Preconditions.checkState(blockID.getLocalID() >= 0, + "Local ID cannot be negative."); + + KeyValueContainerData cData = (KeyValueContainerData) container + .getContainerData(); + MetadataStore db = KeyUtils.getDB(cData, config); + // This is a post condition that acts as a hint to the user. + // Should never fail. + Preconditions.checkNotNull(db, "DB cannot be null here"); + // Note : There is a race condition here, since get and delete + // are not atomic. Leaving it here since the impact is refusing + // to delete a key which might have just gotten inserted after + // the get check. + byte[] kKey = Longs.toByteArray(blockID.getLocalID()); + byte[] kData = db.get(kKey); + if (kData == null) { + throw new StorageContainerException("Unable to find the key.", + NO_SUCH_KEY); + } + db.delete(kKey); + + // Decrement keycount here + container.getContainerData().decrKeyCount(); + } + + /** + * List keys in a container. + * + * @param container - Container from which keys need to be listed. + * @param startLocalID - Key to start from, 0 to begin. + * @param count - Number of keys to return. + * @return List of Keys that match the criteria. + */ + @Override + public List listKey(Container container, long startLocalID, int + count) throws IOException { + Preconditions.checkNotNull(container, "container cannot be null"); + Preconditions.checkState(startLocalID >= 0, "startLocal ID cannot be " + + "negative"); + Preconditions.checkArgument(count > 0, + "Count must be a positive number."); + container.readLock(); + List result = null; + KeyValueContainerData cData = (KeyValueContainerData) container + .getContainerData(); + MetadataStore db = KeyUtils.getDB(cData, config); + result = new ArrayList<>(); + byte[] startKeyInBytes = Longs.toByteArray(startLocalID); + List> range = db.getSequentialRangeKVs( + startKeyInBytes, count, null); + for (Map.Entry entry : range) { + KeyData value = KeyUtils.getKeyData(entry.getValue()); + KeyData data = new KeyData(value.getBlockID()); + result.add(data); + } + return result; + } + + /** + * Shutdown KeyValueContainerManager. + */ + public void shutdown() { + KeyUtils.shutdownCache(ContainerCache.getInstance(config)); + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/package-info.java new file mode 100644 index 00000000000..525d51b9134 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.keyvalue.impl; +/** + This package contains chunk manager and key manager implementation for + keyvalue container type. + **/ \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ChunkManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java similarity index 74% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ChunkManager.java rename to hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java index c58fb9dbbcf..7134be1d34d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/ChunkManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/ChunkManager.java @@ -1,3 +1,5 @@ +package org.apache.hadoop.ozone.container.keyvalue.interfaces; + /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -16,33 +18,36 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.container.common.interfaces; - -import org.apache.hadoop.hdds.scm.container.common.helpers - .StorageContainerException; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.interfaces.Container; /** * Chunk Manager allows read, write, delete and listing of chunks in * a container. */ + public interface ChunkManager { /** * writes a given chunk. + * + * @param container - Container for the chunk * @param blockID - ID of the block. * @param info - ChunkInfo. * @param stage - Chunk Stage write. * @throws StorageContainerException */ - void writeChunk(BlockID blockID, - ChunkInfo info, byte[] data, ContainerProtos.Stage stage) + void writeChunk(Container container, BlockID blockID, ChunkInfo info, + byte[] data, ContainerProtos.Stage stage) throws StorageContainerException; /** * reads the data defined by a chunk. + * + * @param container - Container for the chunk * @param blockID - ID of the block. * @param info - ChunkInfo. * @return byte array @@ -51,16 +56,18 @@ void writeChunk(BlockID blockID, * TODO: Right now we do not support partial reads and writes of chunks. * TODO: Explore if we need to do that for ozone. */ - byte[] readChunk(BlockID blockID, ChunkInfo info) throws + byte[] readChunk(Container container, BlockID blockID, ChunkInfo info) throws StorageContainerException; /** * Deletes a given chunk. + * + * @param container - Container for the chunk * @param blockID - ID of the block. * @param info - Chunk Info * @throws StorageContainerException */ - void deleteChunk(BlockID blockID, ChunkInfo info) throws + void deleteChunk(Container container, BlockID blockID, ChunkInfo info) throws StorageContainerException; // TODO : Support list operations. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/KeyManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/KeyManager.java new file mode 100644 index 00000000000..84f771ae4e1 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/KeyManager.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue.interfaces; + +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.ozone.container.common.helpers.KeyData; +import org.apache.hadoop.ozone.container.common.interfaces.Container; + +import java.io.IOException; +import java.util.List; + +/** + * KeyManager is for performing key related operations on the container. + */ +public interface KeyManager { + + /** + * Puts or overwrites a key. + * + * @param container - Container for which key need to be added. + * @param data - Key Data. + * @return length of the Key. + * @throws IOException + */ + long putKey(Container container, KeyData data) throws IOException; + + /** + * Gets an existing key. + * + * @param container - Container from which key need to be get. + * @param blockID - BlockID of the Key. + * @return Key Data. + * @throws IOException + */ + KeyData getKey(Container container, BlockID blockID) throws IOException; + + /** + * Deletes an existing Key. + * + * @param container - Container from which key need to be deleted. + * @param blockID - ID of the block. + * @throws StorageContainerException + */ + void deleteKey(Container container, BlockID blockID) throws IOException; + + /** + * List keys in a container. + * + * @param container - Container from which keys need to be listed. + * @param startLocalID - Key to start from, 0 to begin. + * @param count - Number of keys to return. + * @return List of Keys that match the criteria. + */ + List listKey(Container container, long startLocalID, int count) + throws IOException; + + /** + * Returns the last committed block length for the block. + * @param blockID blockId + */ + long getCommittedBlockLength(Container container, BlockID blockID) + throws IOException; + + /** + * Shutdown ContainerManager. + */ + void shutdown(); +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/package-info.java new file mode 100644 index 00000000000..53c9f1e0f97 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.keyvalue; +/** + This package contains classes for KeyValue container type. + **/ \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/background/BlockDeletingService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java similarity index 80% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/background/BlockDeletingService.java rename to hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java index bff591367cd..51eed7ff9e1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/background/BlockDeletingService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java @@ -16,9 +16,17 @@ * the License. */ -package org.apache.hadoop.ozone.container.common.statemachine.background; +package org.apache.hadoop.ozone.container.keyvalue.statemachine.background; import com.google.common.collect.Lists; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.impl.TopNOrderedContainerDeletionChoosingPolicy; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerDeletionChoosingPolicy; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyUtils; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.ratis.shaded.com.google.protobuf .InvalidProtocolBufferException; import org.apache.commons.io.FileUtils; @@ -28,10 +36,6 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; -import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; -import org.apache.hadoop.ozone.container.common.helpers.KeyUtils; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerManager; import org.apache.hadoop.util.Time; import org.apache.hadoop.utils.BackgroundService; import org.apache.hadoop.utils.BackgroundTask; @@ -62,12 +66,14 @@ * A per-datanode container block deleting service takes in charge * of deleting staled ozone blocks. */ +// TODO: Fix BlockDeletingService to work with new StorageLayer public class BlockDeletingService extends BackgroundService{ private static final Logger LOG = LoggerFactory.getLogger(BlockDeletingService.class); - private final ContainerManager containerManager; + ContainerSet containerSet; + private ContainerDeletionChoosingPolicy containerDeletionPolicy; private final Configuration conf; // Throttle number of blocks to delete per task, @@ -82,12 +88,15 @@ // Core pool size for container tasks private final static int BLOCK_DELETING_SERVICE_CORE_POOL_SIZE = 10; - public BlockDeletingService(ContainerManager containerManager, - long serviceInterval, long serviceTimeout, TimeUnit unit, - Configuration conf) { - super("BlockDeletingService", serviceInterval, unit, + public BlockDeletingService(ContainerSet containerSet, long serviceInterval, + long serviceTimeout, TimeUnit timeUnit, Configuration conf) { + super("BlockDeletingService", serviceInterval, timeUnit, BLOCK_DELETING_SERVICE_CORE_POOL_SIZE, serviceTimeout); - this.containerManager = containerManager; + this.containerSet = containerSet; + containerDeletionPolicy = ReflectionUtils.newInstance(conf.getClass( + ScmConfigKeys.OZONE_SCM_KEY_VALUE_CONTAINER_DELETION_CHOOSING_POLICY, + TopNOrderedContainerDeletionChoosingPolicy.class, + ContainerDeletionChoosingPolicy.class), conf); this.conf = conf; this.blockLimitPerTask = conf.getInt( OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER, @@ -97,6 +106,7 @@ public BlockDeletingService(ContainerManager containerManager, OZONE_BLOCK_DELETING_CONTAINER_LIMIT_PER_INTERVAL_DEFAULT); } + @Override public BackgroundTaskQueue getTasks() { BackgroundTaskQueue queue = new BackgroundTaskQueue(); @@ -107,11 +117,13 @@ public BackgroundTaskQueue getTasks() { // We must ensure there is no empty container in this result. // The chosen result depends on what container deletion policy is // configured. - containers = containerManager.chooseContainerForBlockDeletion( - containerLimitPerInterval); - LOG.info("Plan to choose {} containers for block deletion, " - + "actually returns {} valid containers.", - containerLimitPerInterval, containers.size()); + containers = containerSet.chooseContainerForBlockDeletion( + containerLimitPerInterval, containerDeletionPolicy); + if (containers.size() > 0) { + LOG.info("Plan to choose {} containers for block deletion, " + + "actually returns {} valid containers.", + containerLimitPerInterval, containers.size()); + } for(ContainerData container : containers) { BlockDeletingTask containerTask = @@ -161,11 +173,11 @@ public int getSize() { implements BackgroundTask { private final int priority; - private final ContainerData containerData; + private final KeyValueContainerData containerData; BlockDeletingTask(ContainerData containerName, int priority) { this.priority = priority; - this.containerData = containerName; + this.containerData = (KeyValueContainerData) containerName; } @Override @@ -173,7 +185,8 @@ public BackgroundTaskResult call() throws Exception { ContainerBackgroundTaskResult crr = new ContainerBackgroundTaskResult(); long startTime = Time.monotonicNow(); // Scan container's db and get list of under deletion blocks - MetadataStore meta = KeyUtils.getDB(containerData, conf); + MetadataStore meta = KeyUtils.getDB( + (KeyValueContainerData) containerData, conf); // # of blocks to delete is throttled KeyPrefixFilter filter = new KeyPrefixFilter().addFilter(OzoneConsts.DELETING_KEY_PREFIX); @@ -187,10 +200,10 @@ public BackgroundTaskResult call() throws Exception { List succeedBlocks = new LinkedList<>(); LOG.debug("Container : {}, To-Delete blocks : {}", containerData.getContainerID(), toDeleteBlocks.size()); - File dataDir = ContainerUtils.getDataDirectory(containerData).toFile(); + File dataDir = new File(containerData.getChunksPath()); if (!dataDir.exists() || !dataDir.isDirectory()) { LOG.error("Invalid container data dir {} : " - + "not exist or not a directory", dataDir.getAbsolutePath()); + + "does not exist or not a directory", dataDir.getAbsolutePath()); return crr; } @@ -226,8 +239,7 @@ public BackgroundTaskResult call() throws Exception { }); meta.writeBatch(batch); // update count of pending deletion blocks in in-memory container status - containerManager.decrPendingDeletionBlocks(succeedBlocks.size(), - containerData.getContainerID()); + containerData.decrPendingDeletionBlocks(succeedBlocks.size()); if (!succeedBlocks.isEmpty()) { LOG.info("Container: {}, deleted blocks: {}, task elapsed time: {}ms", diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/package-info.java new file mode 100644 index 00000000000..69d80425ab7 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/package-info.java @@ -0,0 +1,18 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.keyvalue.statemachine.background; \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java new file mode 100644 index 00000000000..7c986f04b1d --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.ozoneimpl; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; +import org.apache.hadoop.ozone.common.Storage; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; + +/** + * Class used to read .container files from Volume and build container map. + * + * Layout of the container directory on disk is as follows: + * + * ../hdds/VERSION + * ../hdds/<>/current/<>/</metadata/<>.container + * ../hdds/<>/current/<>/</<> + * + * Some ContainerTypes will have extra metadata other than the .container + * file. For example, KeyValueContainer will have a .db file. This .db file + * will also be stored in the metadata folder along with the .container file. + * + * ../hdds/<>/current/<>/</metadata/<>.db + * + * Note that the <> is dependent on the ContainerType. + * For KeyValueContainers, the data is stored in a "chunks" folder. As such, + * the <> layout for KeyValueContainers is + * + * ../hdds/<>/current/<>/</chunks/<> + * + */ +public class ContainerReader implements Runnable { + + private static final Logger LOG = LoggerFactory.getLogger( + ContainerReader.class); + private HddsVolume hddsVolume; + private final ContainerSet containerSet; + private final OzoneConfiguration config; + private final File hddsVolumeDir; + private final VolumeSet volumeSet; + + ContainerReader(VolumeSet volSet, HddsVolume volume, ContainerSet cset, + OzoneConfiguration conf) { + Preconditions.checkNotNull(volume); + this.hddsVolume = volume; + this.hddsVolumeDir = hddsVolume.getHddsRootDir(); + this.containerSet = cset; + this.config = conf; + this.volumeSet = volSet; + } + + @Override + public void run() { + try { + readVolume(hddsVolumeDir); + } catch (RuntimeException ex) { + LOG.info("Caught an Run time exception during reading container files" + + " from Volume {}", hddsVolumeDir); + } + } + + public void readVolume(File hddsVolumeRootDir) { + Preconditions.checkNotNull(hddsVolumeRootDir, "hddsVolumeRootDir" + + "cannot be null"); + + //filtering scm directory + File[] scmDir = hddsVolumeRootDir.listFiles(new FileFilter() { + @Override + public boolean accept(File pathname) { + return pathname.isDirectory(); + } + }); + + if (scmDir == null) { + LOG.error("IO error for the volume {}, skipped loading", + hddsVolumeRootDir); + volumeSet.failVolume(hddsVolumeRootDir.getPath()); + return; + } + + if (scmDir.length > 1) { + LOG.error("Volume {} is in Inconsistent state", hddsVolumeRootDir); + volumeSet.failVolume(hddsVolumeRootDir.getPath()); + return; + } + + for (File scmLoc : scmDir) { + File currentDir = new File(scmLoc, Storage.STORAGE_DIR_CURRENT); + File[] containerTopDirs = currentDir.listFiles(); + if (containerTopDirs != null) { + for (File containerTopDir : containerTopDirs) { + if (containerTopDir.isDirectory()) { + File[] containerDirs = containerTopDir.listFiles(); + if (containerDirs != null) { + for (File containerDir : containerDirs) { + File containerFile = ContainerUtils.getContainerFile( + containerDir); + long containerID = ContainerUtils.getContainerID(containerDir); + if (containerFile.exists()) { + verifyContainerFile(containerID, containerFile); + } else { + LOG.error("Missing .container file for ContainerID: {}", + containerDir.getName()); + } + } + } + } + } + } + } + } + + private void verifyContainerFile(long containerID, File containerFile) { + try { + ContainerData containerData = ContainerDataYaml.readContainerFile( + containerFile); + if (containerID != containerData.getContainerID()) { + LOG.error("Invalid ContainerID in file {}. " + + "Skipping loading of this container.", containerFile); + return; + } + verifyContainerData(containerData); + } catch (IOException ex) { + LOG.error("Failed to parse ContainerFile for ContainerID: {}", + containerID, ex); + } + } + + public void verifyContainerData(ContainerData containerData) + throws IOException { + switch (containerData.getContainerType()) { + case KeyValueContainer: + if (containerData instanceof KeyValueContainerData) { + KeyValueContainerData kvContainerData = (KeyValueContainerData) + containerData; + containerData.setVolume(hddsVolume); + + KeyValueContainerUtil.parseKVContainerData(kvContainerData, config); + KeyValueContainer kvContainer = new KeyValueContainer( + kvContainerData, config); + containerSet.addContainer(kvContainer); + } else { + throw new StorageContainerException("Container File is corrupted. " + + "ContainerType is KeyValueContainer but cast to " + + "KeyValueContainerData failed. ", + ContainerProtos.Result.CONTAINER_METADATA_ERROR); + } + break; + default: + throw new StorageContainerException("Unrecognized ContainerType " + + containerData.getContainerType(), + ContainerProtos.Result.UNKNOWN_CONTAINER_TYPE); + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index 7931f6f7dd4..72a5804a9ef 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -1,72 +1,51 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ package org.apache.hadoop.ozone.container.ozoneimpl; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdfs.server.datanode.StorageLocation; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.NodeReportProto; -import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.container.common.helpers.ContainerData; -import org.apache.hadoop.ozone.container.common.impl.ChunkManagerImpl; -import org.apache.hadoop.ozone.container.common.impl.ContainerManagerImpl; -import org.apache.hadoop.ozone.container.common.impl.Dispatcher; -import org.apache.hadoop.ozone.container.common.impl.KeyManagerImpl; -import org.apache.hadoop.ozone.container.common.interfaces.ChunkManager; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerManager; -import org.apache.hadoop.ozone.container.common.interfaces.KeyManager; -import org.apache.hadoop.ozone.container.common.statemachine.background - .BlockDeletingService; -import org.apache.hadoop.ozone.container.common.transport.server.XceiverServer; -import org.apache.hadoop.ozone.container.common.transport.server - .XceiverServerGrpc; -import org.apache.hadoop.ozone.container.common.transport.server - .XceiverServerSpi; -import org.apache.hadoop.ozone.container.common.transport.server.ratis - .XceiverServerRatis; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.container.common.transport.server.XceiverServerGrpc; +import org.apache.hadoop.ozone.container.common.transport.server.XceiverServerSpi; +import org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; + +import org.apache.hadoop.ozone.container.replication.GrpcReplicationService; +import org.apache.hadoop.ozone.container.replication + .OnDemandContainerReplicationSource; +import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Paths; -import java.util.LinkedList; -import java.util.List; -import java.util.concurrent.TimeUnit; +import java.io.*; +import java.util.ArrayList; +import java.util.Iterator; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; -import static org.apache.hadoop.ozone.OzoneConfigKeys - .OZONE_BLOCK_DELETING_SERVICE_INTERVAL; -import static org.apache.hadoop.ozone.OzoneConfigKeys - .OZONE_BLOCK_DELETING_SERVICE_INTERVAL_DEFAULT; -import static org.apache.hadoop.ozone.OzoneConfigKeys - .OZONE_BLOCK_DELETING_SERVICE_TIMEOUT; -import static org.apache.hadoop.ozone.OzoneConfigKeys - .OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT; -import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_ROOT_PREFIX; import static org.apache.hadoop.ozone.OzoneConsts.INVALID_PORT; /** @@ -74,69 +53,75 @@ * layer. */ public class OzoneContainer { - public static final Logger LOG = - LoggerFactory.getLogger(OzoneContainer.class); - private final Configuration ozoneConfig; - private final ContainerDispatcher dispatcher; - private final ContainerManager manager; + public static final Logger LOG = LoggerFactory.getLogger( + OzoneContainer.class); + + private final HddsDispatcher hddsDispatcher; + private final DatanodeDetails dnDetails; + private final OzoneConfiguration config; + private final VolumeSet volumeSet; + private final ContainerSet containerSet; private final XceiverServerSpi[] server; - private final ChunkManager chunkManager; - private final KeyManager keyManager; - private final BlockDeletingService blockDeletingService; /** - * Creates a network endpoint and enables Ozone container. - * - * @param ozoneConfig - Config + * Construct OzoneContainer object. + * @param datanodeDetails + * @param conf + * @throws DiskOutOfSpaceException * @throws IOException */ - public OzoneContainer( - DatanodeDetails datanodeDetails, Configuration ozoneConfig) - throws IOException { - this.ozoneConfig = ozoneConfig; - List locations = new LinkedList<>(); - String[] paths = ozoneConfig.getStrings( - OzoneConfigKeys.OZONE_METADATA_DIRS); - if (paths != null && paths.length > 0) { - for (String p : paths) { - locations.add(StorageLocation.parse( - Paths.get(p).resolve(CONTAINER_ROOT_PREFIX).toString())); - } - } else { - getDataDir(locations); - } + public OzoneContainer(DatanodeDetails datanodeDetails, OzoneConfiguration + conf, StateContext context) throws IOException { + this.dnDetails = datanodeDetails; + this.config = conf; + this.volumeSet = new VolumeSet(datanodeDetails.getUuidString(), conf); + this.containerSet = new ContainerSet(); + buildContainerSet(); + hddsDispatcher = new HddsDispatcher(config, containerSet, volumeSet, + context); + server = new XceiverServerSpi[]{ + new XceiverServerGrpc(datanodeDetails, this.config, this + .hddsDispatcher, createReplicationService()), + XceiverServerRatis.newXceiverServerRatis(datanodeDetails, this + .config, hddsDispatcher, context) + }; - manager = new ContainerManagerImpl(); - manager.init(this.ozoneConfig, locations, datanodeDetails); - this.chunkManager = new ChunkManagerImpl(manager); - manager.setChunkManager(this.chunkManager); - this.keyManager = new KeyManagerImpl(manager, ozoneConfig); - manager.setKeyManager(this.keyManager); + } - long svcInterval = - ozoneConfig.getTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, - OZONE_BLOCK_DELETING_SERVICE_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); - long serviceTimeout = ozoneConfig.getTimeDuration( - OZONE_BLOCK_DELETING_SERVICE_TIMEOUT, - OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS); - this.blockDeletingService = new BlockDeletingService(manager, - svcInterval, serviceTimeout, TimeUnit.MILLISECONDS, ozoneConfig); + private GrpcReplicationService createReplicationService() { + return new GrpcReplicationService( + new OnDemandContainerReplicationSource(containerSet)); + } - this.dispatcher = new Dispatcher(manager, this.ozoneConfig); + /** + * Build's container map. + */ + public void buildContainerSet() { + Iterator volumeSetIterator = volumeSet.getVolumesList() + .iterator(); + ArrayList volumeThreads = new ArrayList(); + + //TODO: diskchecker should be run before this, to see how disks are. + // And also handle disk failure tolerance need to be added + while (volumeSetIterator.hasNext()) { + HddsVolume volume = volumeSetIterator.next(); + File hddsVolumeRootDir = volume.getHddsRootDir(); + Thread thread = new Thread(new ContainerReader(volumeSet, volume, + containerSet, config)); + thread.start(); + volumeThreads.add(thread); + } + + try { + for (int i = 0; i < volumeThreads.size(); i++) { + volumeThreads.get(i).join(); + } + } catch (InterruptedException ex) { + LOG.info("Volume Threads Interrupted exception", ex); + } - boolean useGrpc = this.ozoneConfig.getBoolean( - ScmConfigKeys.DFS_CONTAINER_GRPC_ENABLED_KEY, - ScmConfigKeys.DFS_CONTAINER_GRPC_ENABLED_DEFAULT); - server = new XceiverServerSpi[]{ - useGrpc ? new XceiverServerGrpc(datanodeDetails, - this.ozoneConfig, this.dispatcher) : - new XceiverServer(datanodeDetails, - this.ozoneConfig, this.dispatcher), - XceiverServerRatis - .newXceiverServerRatis(datanodeDetails, this.ozoneConfig, dispatcher) - }; } /** @@ -145,170 +130,63 @@ public OzoneContainer( * @throws IOException */ public void start() throws IOException { + LOG.info("Attempting to start container services."); for (XceiverServerSpi serverinstance : server) { serverinstance.start(); } - blockDeletingService.start(); - dispatcher.init(); + hddsDispatcher.init(); } /** - * Stops the ozone container. - *

- * Shutdown logic is not very obvious from the following code. if you need to - * modify the logic, please keep these comments in mind. Here is the shutdown - * sequence. - *

- * 1. We shutdown the network ports. - *

- * 2. Now we need to wait for all requests in-flight to finish. - *

- * 3. The container manager lock is a read-write lock with "Fairness" - * enabled. - *

- * 4. This means that the waiting threads are served in a "first-come-first - * -served" manner. Please note that this applies to waiting threads only. - *

- * 5. Since write locks are exclusive, if we are waiting to get a lock it - * implies that we are waiting for in-flight operations to complete. - *

- * 6. if there are other write operations waiting on the reader-writer lock, - * fairness guarantees that they will proceed before the shutdown lock - * request. - *

- * 7. Since all operations either take a reader or writer lock of container - * manager, we are guaranteed that we are the last operation since we have - * closed the network port, and we wait until close is successful. - *

- * 8. We take the writer lock and call shutdown on each of the managers in - * reverse order. That is chunkManager, keyManager and containerManager is - * shutdown. + * Stop Container Service on the datanode. */ public void stop() { + //TODO: at end of container IO integration work. LOG.info("Attempting to stop container services."); for(XceiverServerSpi serverinstance: server) { serverinstance.stop(); } - dispatcher.shutdown(); - - try { - this.manager.writeLock(); - this.chunkManager.shutdown(); - this.keyManager.shutdown(); - this.manager.shutdown(); - this.blockDeletingService.shutdown(); - LOG.info("container services shutdown complete."); - } catch (IOException ex) { - LOG.warn("container service shutdown error:", ex); - } finally { - this.manager.writeUnlock(); - } - } - - /** - * Returns a paths to data dirs. - * - * @param pathList - List of paths. - * @throws IOException - */ - private void getDataDir(List pathList) throws IOException { - for (String dir : ozoneConfig.getStrings(DFS_DATANODE_DATA_DIR_KEY)) { - StorageLocation location = StorageLocation.parse(dir); - pathList.add(location); - } - } - - /** - * Returns node report of container storage usage. - */ - public NodeReportProto getNodeReport() throws IOException { - return this.manager.getNodeReport(); - } - - private int getPortbyType(HddsProtos.ReplicationType replicationType) { - for (XceiverServerSpi serverinstance : server) { - if (serverinstance.getServerType() == replicationType) { - return serverinstance.getIPCPort(); - } - } - return INVALID_PORT; + hddsDispatcher.shutdown(); } - /** - * Returns the container server IPC port. - * - * @return Container server IPC port. - */ - public int getContainerServerPort() { - return getPortbyType(HddsProtos.ReplicationType.STAND_ALONE); - } - /** - * Returns the Ratis container Server IPC port. - * - * @return Ratis port. - */ - public int getRatisContainerServerPort() { - return getPortbyType(HddsProtos.ReplicationType.RATIS); + @VisibleForTesting + public ContainerSet getContainerSet() { + return containerSet; } - /** * Returns container report. * @return - container report. * @throws IOException */ - public ContainerReportsProto getContainerReport() throws IOException { - return this.manager.getContainerReport(); + public StorageContainerDatanodeProtocolProtos.ContainerReportsProto + getContainerReport() throws IOException { + return this.containerSet.getContainerReport(); } -// TODO: remove getContainerReports /** - * Returns the list of closed containers. - * @return - List of closed containers. + * Submit ContainerRequest. + * @param request + * @param replicationType + * @param pipelineID * @throws IOException */ - public List getClosedContainerReports() throws IOException { - return this.manager.getClosedContainerReports(); - } - - private XceiverServerSpi getRatisSerer() { - for (XceiverServerSpi serverInstance : server) { - if (serverInstance instanceof XceiverServerRatis) { - return serverInstance; - } - } - return null; - } - - private XceiverServerSpi getStandaAloneSerer() { - for (XceiverServerSpi serverInstance : server) { - if (!(serverInstance instanceof XceiverServerRatis)) { - return serverInstance; - } - } - return null; - } - - @VisibleForTesting - public ContainerManager getContainerManager() { - return this.manager; - } - public void submitContainerRequest( ContainerProtos.ContainerCommandRequestProto request, - HddsProtos.ReplicationType replicationType) throws IOException { + HddsProtos.ReplicationType replicationType, + HddsProtos.PipelineID pipelineID) throws IOException { XceiverServerSpi serverInstance; long containerId = getContainerIdForCmd(request); if (replicationType == HddsProtos.ReplicationType.RATIS) { serverInstance = getRatisSerer(); Preconditions.checkNotNull(serverInstance); - serverInstance.submitRequest(request); + serverInstance.submitRequest(request, pipelineID); LOG.info("submitting {} request over RATIS server for container {}", request.getCmdType(), containerId); } else { serverInstance = getStandaAloneSerer(); Preconditions.checkNotNull(serverInstance); - getStandaAloneSerer().submitRequest(request); + getStandaAloneSerer().submitRequest(request, pipelineID); LOG.info( "submitting {} request over STAND_ALONE server for container {}", request.getCmdType(), containerId); @@ -322,7 +200,7 @@ private long getContainerIdForCmd( ContainerProtos.Type type = request.getCmdType(); switch (type) { case CloseContainer: - return request.getCloseContainer().getContainerID(); + return request.getContainerID(); // Right now, we handle only closeContainer via queuing it over the // over the XceiVerServer. For all other commands we throw Illegal // argument exception here. Will need to extend the switch cases @@ -332,4 +210,66 @@ private long getContainerIdForCmd( + " not supported over HearBeat Response"); } } -} \ No newline at end of file + + private XceiverServerSpi getRatisSerer() { + for (XceiverServerSpi serverInstance : server) { + if (serverInstance instanceof XceiverServerRatis) { + return serverInstance; + } + } + return null; + } + + private XceiverServerSpi getStandaAloneSerer() { + for (XceiverServerSpi serverInstance : server) { + if (!(serverInstance instanceof XceiverServerRatis)) { + return serverInstance; + } + } + return null; + } + + private int getPortbyType(HddsProtos.ReplicationType replicationType) { + for (XceiverServerSpi serverinstance : server) { + if (serverinstance.getServerType() == replicationType) { + return serverinstance.getIPCPort(); + } + } + return INVALID_PORT; + } + + /** + * Returns the container server IPC port. + * + * @return Container server IPC port. + */ + public int getContainerServerPort() { + return getPortbyType(HddsProtos.ReplicationType.STAND_ALONE); + } + + /** + * Returns the Ratis container Server IPC port. + * + * @return Ratis port. + */ + public int getRatisContainerServerPort() { + return getPortbyType(HddsProtos.ReplicationType.RATIS); + } + + /** + * Returns node report of container storage usage. + */ + public StorageContainerDatanodeProtocolProtos.NodeReportProto getNodeReport() + throws IOException { + return volumeSet.getNodeReport(); + } + + @VisibleForTesting + public ContainerDispatcher getDispatcher() { + return this.hddsDispatcher; + } + + public VolumeSet getVolumeSet() { + return volumeSet; + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerDownloader.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerDownloader.java new file mode 100644 index 00000000000..9511241fb5f --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerDownloader.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.replication; + +import java.io.Closeable; +import java.nio.file.Path; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; + +/** + * Service to download container data from other datanodes. + *

+ * The implementation of this interface should copy the raw container data in + * compressed form to working directory. + *

+ * A smart implementation would use multiple sources to do parallel download. + */ +public interface ContainerDownloader extends Closeable { + + CompletableFuture getContainerDataFromReplicas(long containerId, + List sources); + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerReplicationSource.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerReplicationSource.java new file mode 100644 index 00000000000..69582f799f8 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerReplicationSource.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.replication; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Contract to prepare provide the container in binary form.. + *

+ * Prepare will be called when container is closed. An implementation could + * precache any binary representation of a container and store the pre packede + * images. + */ +public interface ContainerReplicationSource { + + /** + * Prepare for the replication. + * + * @param containerId The name of the container the package. + */ + void prepare(long containerId); + + /** + * Copy the container data to an output stream. + * + * @param containerId Container to replicate + * @param destination The destination stream to copy all the container data. + * @throws IOException + */ + void copyData(long containerId, OutputStream destination) + throws IOException; + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerStreamingOutput.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerStreamingOutput.java new file mode 100644 index 00000000000..f7fd8a4957d --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ContainerStreamingOutput.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.replication; + +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.StreamingOutput; +import java.io.IOException; +import java.io.OutputStream; + +/** + * JAX-RS streaming output to return the binary container data. + */ +public class ContainerStreamingOutput implements StreamingOutput { + + private long containerId; + + private ContainerReplicationSource containerReplicationSource; + + public ContainerStreamingOutput(long containerId, + ContainerReplicationSource containerReplicationSource) { + this.containerId = containerId; + this.containerReplicationSource = containerReplicationSource; + } + + @Override + public void write(OutputStream outputStream) + throws IOException, WebApplicationException { + containerReplicationSource.copyData(containerId, outputStream); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java new file mode 100644 index 00000000000..91d098f0b05 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.replication; + +import java.io.BufferedOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.CompletableFuture; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .CopyContainerRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .CopyContainerResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto + .IntraDatanodeProtocolServiceGrpc; +import org.apache.hadoop.hdds.protocol.datanode.proto + .IntraDatanodeProtocolServiceGrpc.IntraDatanodeProtocolServiceStub; +import org.apache.hadoop.ozone.OzoneConfigKeys; + +import com.google.common.base.Preconditions; +import org.apache.ratis.shaded.io.grpc.ManagedChannel; +import org.apache.ratis.shaded.io.grpc.netty.NettyChannelBuilder; +import org.apache.ratis.shaded.io.grpc.stub.StreamObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Client to read container data from Grpc. + */ +public class GrpcReplicationClient { + + private static final Logger LOG = + LoggerFactory.getLogger(GrpcReplicationClient.class); + + private final ManagedChannel channel; + + private final IntraDatanodeProtocolServiceStub client; + + private final Path workingDirectory; + + public GrpcReplicationClient(String host, + int port, Path workingDir) { + + channel = NettyChannelBuilder.forAddress(host, port) + .usePlaintext() + .maxInboundMessageSize(OzoneConfigKeys.DFS_CONTAINER_CHUNK_MAX_SIZE) + .build(); + client = IntraDatanodeProtocolServiceGrpc.newStub(channel); + this.workingDirectory = workingDir; + + } + + public CompletableFuture download(long containerId) { + CopyContainerRequestProto request = + CopyContainerRequestProto.newBuilder() + .setContainerID(containerId) + .setLen(-1) + .setReadOffset(0) + .build(); + + CompletableFuture response = new CompletableFuture<>(); + + Path destinationPath = + getWorkingDirectory().resolve("container-" + containerId + ".tar.gz"); + + client.download(request, + new StreamDownloader(containerId, response, destinationPath)); + return response; + } + + private Path getWorkingDirectory() { + return workingDirectory; + } + + public void shutdown() { + channel.shutdown(); + } + + /** + * Grpc stream observer to ComletableFuture adapter. + */ + public static class StreamDownloader + implements StreamObserver { + + private final CompletableFuture response; + + private final long containerId; + + private BufferedOutputStream stream; + + private Path outputPath; + + public StreamDownloader(long containerId, CompletableFuture response, + Path outputPath) { + this.response = response; + this.containerId = containerId; + this.outputPath = outputPath; + try { + outputPath = Preconditions.checkNotNull(outputPath); + Path parentPath = Preconditions.checkNotNull(outputPath.getParent()); + Files.createDirectories(parentPath); + stream = + new BufferedOutputStream(new FileOutputStream(outputPath.toFile())); + } catch (IOException e) { + throw new RuntimeException("OutputPath can't be used: " + outputPath, + e); + } + + } + + @Override + public void onNext(CopyContainerResponseProto chunk) { + try { + stream.write(chunk.getData().toByteArray()); + } catch (IOException e) { + response.completeExceptionally(e); + } + } + + @Override + public void onError(Throwable throwable) { + try { + stream.close(); + LOG.error("Container download was unsuccessfull", throwable); + try { + Files.delete(outputPath); + } catch (IOException ex) { + LOG.error( + "Error happened during the download but can't delete the " + + "temporary destination.", ex); + } + response.completeExceptionally(throwable); + } catch (IOException e) { + response.completeExceptionally(e); + } + } + + @Override + public void onCompleted() { + try { + stream.close(); + response.complete(outputPath); + LOG.info("Container is downloaded to {}", outputPath); + } catch (IOException e) { + response.completeExceptionally(e); + } + + } + } + +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationService.java new file mode 100644 index 00000000000..d8f696f47d9 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationService.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.replication; + +import java.io.ByteArrayOutputStream; +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .CopyContainerRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .CopyContainerResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto + .IntraDatanodeProtocolServiceGrpc; + +import org.apache.ratis.shaded.com.google.protobuf.ByteString; +import org.apache.ratis.shaded.io.grpc.stub.StreamObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Service to make containers available for replication. + */ +public class GrpcReplicationService extends + IntraDatanodeProtocolServiceGrpc.IntraDatanodeProtocolServiceImplBase { + + private static final Logger LOG = + LoggerFactory.getLogger(GrpcReplicationService.class); + + private final ContainerReplicationSource containerReplicationSource; + + public GrpcReplicationService( + ContainerReplicationSource containerReplicationSource) { + this.containerReplicationSource = containerReplicationSource; + } + + @Override + public void download(CopyContainerRequestProto request, + StreamObserver responseObserver) { + LOG.info("Streaming container data ({}) to other datanode", + request.getContainerID()); + try { + GrpcOutputStream outputStream = + new GrpcOutputStream(responseObserver, request.getContainerID()); + containerReplicationSource + .copyData(request.getContainerID(), outputStream); + + } catch (IOException e) { + LOG.error("Can't stream the container data", e); + responseObserver.onError(e); + } + } + + private static class GrpcOutputStream extends OutputStream + implements Closeable { + + private static final int BUFFER_SIZE_IN_BYTES = 1024 * 1024; + + private final StreamObserver responseObserver; + + private final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + + private long containerId; + + private int readOffset = 0; + + private int writtenBytes; + + GrpcOutputStream( + StreamObserver responseObserver, + long containerId) { + this.responseObserver = responseObserver; + this.containerId = containerId; + } + + @Override + public void write(int b) throws IOException { + try { + buffer.write(b); + if (buffer.size() > BUFFER_SIZE_IN_BYTES) { + flushBuffer(false); + } + } catch (Exception ex) { + responseObserver.onError(ex); + } + } + + private void flushBuffer(boolean eof) { + if (buffer.size() > 0) { + CopyContainerResponseProto response = + CopyContainerResponseProto.newBuilder() + .setContainerID(containerId) + .setData(ByteString.copyFrom(buffer.toByteArray())) + .setEof(eof) + .setReadOffset(readOffset) + .setLen(buffer.size()) + .build(); + responseObserver.onNext(response); + readOffset += buffer.size(); + writtenBytes += buffer.size(); + buffer.reset(); + } + } + + @Override + public void close() throws IOException { + flushBuffer(true); + LOG.info("{} bytes written to the rpc stream from container {}", + writtenBytes, containerId); + responseObserver.onCompleted(); + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/OnDemandContainerReplicationSource.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/OnDemandContainerReplicationSource.java new file mode 100644 index 00000000000..d557b548b41 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/OnDemandContainerReplicationSource.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.replication; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerPacker; +import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A naive implementation of the replication source which creates a tar file + * on-demand without pre-create the compressed archives. + */ +public class OnDemandContainerReplicationSource + implements ContainerReplicationSource { + + private static final Logger LOG = + LoggerFactory.getLogger(ContainerReplicationSource.class); + + private ContainerSet containerSet; + + private ContainerPacker packer = new TarContainerPacker(); + + public OnDemandContainerReplicationSource( + ContainerSet containerSet) { + this.containerSet = containerSet; + } + + @Override + public void prepare(long containerId) { + + } + + @Override + public void copyData(long containerId, OutputStream destination) + throws IOException { + + Container container = containerSet.getContainer(containerId); + + Preconditions + .checkNotNull(container, "Container is not found " + containerId); + + switch (container.getContainerType()) { + case KeyValueContainer: + packer.pack(container, + destination); + break; + default: + LOG.warn("Container type " + container.getContainerType() + + " is not replicable as no compression algorithm for that."); + } + + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java new file mode 100644 index 00000000000..a461a98f236 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.replication; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name; +import org.apache.hadoop.ozone.OzoneConfigKeys; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Simple ContainerDownloaderImplementation to download the missing container + * from the first available datanode. + *

+ * This is not the most effective implementation as it uses only one source + * for he container download. + */ +public class SimpleContainerDownloader implements ContainerDownloader { + + private static final Logger LOG = + LoggerFactory.getLogger(SimpleContainerDownloader.class); + + private final Path workingDirectory; + + private ExecutorService executor; + + public SimpleContainerDownloader(Configuration conf) { + + String workDirString = + conf.get(OzoneConfigKeys.OZONE_CONTAINER_COPY_WORKDIR); + + if (workDirString == null) { + workingDirectory = Paths.get(System.getProperty("java.io.tmpdir")) + .resolve("container-copy"); + } else { + workingDirectory = Paths.get(workDirString); + } + + ThreadFactory build = new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("Container downloader thread - %d").build(); + executor = Executors.newSingleThreadExecutor(build); + LOG.info("Starting container downloader service to copy " + + "containers to replicate."); + } + + @Override + public CompletableFuture getContainerDataFromReplicas(long containerId, + List sourceDatanodes) { + + CompletableFuture result = null; + for (DatanodeDetails datanode : sourceDatanodes) { + try { + + if (result == null) { + GrpcReplicationClient grpcReplicationClient = + new GrpcReplicationClient(datanode.getIpAddress(), + datanode.getPort(Name.STANDALONE).getValue(), + workingDirectory); + result = grpcReplicationClient.download(containerId); + } else { + result = result.thenApply(CompletableFuture::completedFuture) + .exceptionally(t -> { + LOG.error("Error on replicating container: " + containerId, t); + GrpcReplicationClient grpcReplicationClient = + new GrpcReplicationClient(datanode.getIpAddress(), + datanode.getPort(Name.STANDALONE).getValue(), + workingDirectory); + return grpcReplicationClient.download(containerId); + }).thenCompose(Function.identity()); + } + } catch (Exception ex) { + LOG.error(String.format( + "Container %s download from datanode %s was unsuccessful. " + + "Trying the next datanode", containerId, datanode), ex); + } + + } + return result; + + } + + @Override + public void close() throws IOException { + try { + executor.awaitTermination(10, TimeUnit.SECONDS); + } catch (InterruptedException e) { + LOG.error("Can't stop container downloader gracefully", e); + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/package-info.java index 7f335e37c12..38a853c72a0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/package-info.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/package-info.java @@ -15,9 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.ozone.container.replication; - /** - * Ozone Container replicaton related classes. - */ \ No newline at end of file + Classes to replicate container data between datanodes. +**/ \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/VersionResponse.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/VersionResponse.java index 83acf5bd6e3..4d328d3d1e6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/VersionResponse.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/VersionResponse.java @@ -105,6 +105,10 @@ public SCMVersionResponseProto getProtobufMessage() { .addAllKeys(list).build(); } + public String getValue(String key) { + return this.values.get(key); + } + /** * Builder class. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java index c7d8df57150..aaa5f112a3d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CloseContainerCommand.java @@ -1,19 +1,18 @@ /** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. */ package org.apache.hadoop.ozone.protocol.commands; @@ -23,7 +22,7 @@ .StorageContainerDatanodeProtocolProtos.SCMCommandProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.CloseContainerCommandProto; - +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; /** * Asks datanode to close a container. @@ -33,11 +32,25 @@ private long containerID; private HddsProtos.ReplicationType replicationType; + private PipelineID pipelineID; public CloseContainerCommand(long containerID, - HddsProtos.ReplicationType replicationType) { + HddsProtos.ReplicationType replicationType, + PipelineID pipelineID) { + super(); + this.containerID = containerID; + this.replicationType = replicationType; + this.pipelineID = pipelineID; + } + + // Should be called only for protobuf conversion + private CloseContainerCommand(long containerID, + HddsProtos.ReplicationType replicationType, + PipelineID pipelineID, long id) { + super(id); this.containerID = containerID; this.replicationType = replicationType; + this.pipelineID = pipelineID; } /** @@ -63,15 +76,19 @@ public CloseContainerCommand(long containerID, public CloseContainerCommandProto getProto() { return CloseContainerCommandProto.newBuilder() .setContainerID(containerID) - .setReplicationType(replicationType).build(); + .setCmdId(getId()) + .setReplicationType(replicationType) + .setPipelineID(pipelineID.getProtobuf()) + .build(); } public static CloseContainerCommand getFromProtobuf( CloseContainerCommandProto closeContainerProto) { Preconditions.checkNotNull(closeContainerProto); return new CloseContainerCommand(closeContainerProto.getContainerID(), - closeContainerProto.getReplicationType()); - + closeContainerProto.getReplicationType(), + PipelineID.getFromProtobuf(closeContainerProto.getPipelineID()), + closeContainerProto.getCmdId()); } public long getContainerID() { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CommandStatus.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CommandStatus.java new file mode 100644 index 00000000000..32cf7c222f5 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CommandStatus.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.protocol.commands; + +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatus.Status; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.SCMCommandProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; + +/** + * A class that is used to communicate status of datanode commands. + */ +public class CommandStatus { + + private SCMCommandProto.Type type; + private Long cmdId; + private Status status; + private String msg; + + public Type getType() { + return type; + } + + public Long getCmdId() { + return cmdId; + } + + public Status getStatus() { + return status; + } + + public String getMsg() { + return msg; + } + + /** + * To allow change of status once commandStatus is initialized. + * + * @param status + */ + public void setStatus(Status status) { + this.status = status; + } + + /** + * Returns a CommandStatus from the protocol buffers. + * + * @param cmdStatusProto - protoBuf Message + * @return CommandStatus + */ + public CommandStatus getFromProtoBuf( + StorageContainerDatanodeProtocolProtos.CommandStatus cmdStatusProto) { + return CommandStatusBuilder.newBuilder() + .setCmdId(cmdStatusProto.getCmdId()) + .setStatus(cmdStatusProto.getStatus()) + .setType(cmdStatusProto.getType()) + .setMsg(cmdStatusProto.getMsg()).build(); + } + /** + * Returns a CommandStatus from the protocol buffers. + * + * @return StorageContainerDatanodeProtocolProtos.CommandStatus + */ + public StorageContainerDatanodeProtocolProtos.CommandStatus + getProtoBufMessage() { + StorageContainerDatanodeProtocolProtos.CommandStatus.Builder builder = + StorageContainerDatanodeProtocolProtos.CommandStatus.newBuilder() + .setCmdId(this.getCmdId()) + .setStatus(this.getStatus()) + .setType(this.getType()); + if (this.getMsg() != null) { + builder.setMsg(this.getMsg()); + } + return builder.build(); + } + + /** + * Builder class for CommandStatus. + */ + public static final class CommandStatusBuilder { + + private SCMCommandProto.Type type; + private Long cmdId; + private StorageContainerDatanodeProtocolProtos.CommandStatus.Status status; + private String msg; + + private CommandStatusBuilder() { + } + + public static CommandStatusBuilder newBuilder() { + return new CommandStatusBuilder(); + } + + public CommandStatusBuilder setType(Type commandType) { + this.type = commandType; + return this; + } + + public CommandStatusBuilder setCmdId(Long commandId) { + this.cmdId = commandId; + return this; + } + + public CommandStatusBuilder setStatus(Status commandStatus) { + this.status = commandStatus; + return this; + } + + public CommandStatusBuilder setMsg(String message) { + this.msg = message; + return this; + } + + public CommandStatus build() { + CommandStatus commandStatus = new CommandStatus(); + commandStatus.type = this.type; + commandStatus.msg = this.msg; + commandStatus.status = this.status; + commandStatus.cmdId = this.cmdId; + return commandStatus; + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DeleteBlocksCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DeleteBlocksCommand.java index 4fa33f68b00..07feeff6c1b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DeleteBlocksCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DeleteBlocksCommand.java @@ -7,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -36,6 +36,14 @@ public DeleteBlocksCommand(List blocks) { + super(); + this.blocksTobeDeleted = blocks; + } + + // Should be called only for protobuf conversion + private DeleteBlocksCommand(List blocks, + long id) { + super(id); this.blocksTobeDeleted = blocks; } @@ -56,11 +64,12 @@ public DeleteBlocksCommand(List blocks) { public static DeleteBlocksCommand getFromProtobuf( DeleteBlocksCommandProto deleteBlocksProto) { return new DeleteBlocksCommand(deleteBlocksProto - .getDeletedBlocksTransactionsList()); + .getDeletedBlocksTransactionsList(), deleteBlocksProto.getCmdId()); } public DeleteBlocksCommandProto getProto() { return DeleteBlocksCommandProto.newBuilder() + .setCmdId(getId()) .addAllDeletedBlocksTransactions(blocksTobeDeleted).build(); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReplicateContainerCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReplicateContainerCommand.java index 834318b1451..85302853952 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReplicateContainerCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReplicateContainerCommand.java @@ -30,7 +30,6 @@ .StorageContainerDatanodeProtocolProtos.SCMCommandProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; -import org.apache.hadoop.hdds.scm.container.ContainerID; import com.google.common.base.Preconditions; @@ -41,11 +40,19 @@ extends SCMCommand { private final long containerID; - private final List sourceDatanodes; public ReplicateContainerCommand(long containerID, List sourceDatanodes) { + super(); + this.containerID = containerID; + this.sourceDatanodes = sourceDatanodes; + } + + // Should be called only for protobuf conversion + public ReplicateContainerCommand(long containerID, + List sourceDatanodes, long id) { + super(id); this.containerID = containerID; this.sourceDatanodes = sourceDatanodes; } @@ -62,6 +69,7 @@ public Type getType() { public ReplicateContainerCommandProto getProto() { Builder builder = ReplicateContainerCommandProto.newBuilder() + .setCmdId(getId()) .setContainerID(containerID); for (DatanodeDetails dd : sourceDatanodes) { builder.addSources(dd.getProtoBufMessage()); @@ -75,12 +83,12 @@ public static ReplicateContainerCommand getFromProtobuf( List datanodeDetails = protoMessage.getSourcesList() - .stream() - .map(DatanodeDetails::getFromProtoBuf) - .collect(Collectors.toList()); + .stream() + .map(DatanodeDetails::getFromProtoBuf) + .collect(Collectors.toList()); return new ReplicateContainerCommand(protoMessage.getContainerID(), - datanodeDetails); + datanodeDetails, protoMessage.getCmdId()); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReregisterCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReregisterCommand.java index 953e31a02ef..09f361d7b6a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReregisterCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/ReregisterCommand.java @@ -49,6 +49,16 @@ return getProto().toByteArray(); } + /** + * Not implemented for ReregisterCommand. + * + * @return cmdId. + */ + @Override + public long getId() { + return 0; + } + public ReregisterCommandProto getProto() { return ReregisterCommandProto .newBuilder() diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SCMCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SCMCommand.java index 35ca802bee1..5773bf1825b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SCMCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SCMCommand.java @@ -18,15 +18,27 @@ package org.apache.hadoop.ozone.protocol.commands; import com.google.protobuf.GeneratedMessage; +import org.apache.hadoop.hdds.HddsIdFactory; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMCommandProto; +import org.apache.hadoop.hdds.server.events.IdentifiableEventPayload; /** * A class that acts as the base class to convert between Java and SCM * commands in protobuf format. * @param */ -public abstract class SCMCommand { +public abstract class SCMCommand implements + IdentifiableEventPayload { + private long id; + + SCMCommand() { + this.id = HddsIdFactory.getLongId(); + } + + SCMCommand(long id) { + this.id = id; + } /** * Returns the type of this command. * @return Type @@ -38,4 +50,13 @@ * @return A protobuf message. */ public abstract byte[] getProtoBufMessage(); + + /** + * Gets the commandId of this object. + * @return uuid. + */ + public long getId() { + return id; + } + } diff --git a/hadoop-hdds/container-service/src/main/proto/StorageContainerDatanodeProtocol.proto b/hadoop-hdds/container-service/src/main/proto/StorageContainerDatanodeProtocol.proto index 54230c1e9fc..0a6934342e4 100644 --- a/hadoop-hdds/container-service/src/main/proto/StorageContainerDatanodeProtocol.proto +++ b/hadoop-hdds/container-service/src/main/proto/StorageContainerDatanodeProtocol.proto @@ -79,7 +79,9 @@ message SCMHeartbeatRequestProto { required DatanodeDetailsProto datanodeDetails = 1; optional NodeReportProto nodeReport = 2; optional ContainerReportsProto containerReport = 3; - optional ContainerActionsProto containerActions = 4; + optional CommandStatusReportsProto commandStatusReport = 4; + optional ContainerActionsProto containerActions = 5; + optional PipelineActionsProto pipelineActions = 6; } /* @@ -127,6 +129,22 @@ message ContainerReportsProto { repeated ContainerInfo reports = 1; } +message CommandStatusReportsProto { + repeated CommandStatus cmdStatus = 1; +} + +message CommandStatus { + enum Status { + PENDING = 1; + EXECUTED = 2; + FAILED = 3; + } + required int64 cmdId = 1; + required Status status = 2 [default = PENDING]; + required SCMCommandProto.Type type = 3; + optional string msg = 4; +} + message ContainerActionsProto { repeated ContainerAction containerActions = 1; } @@ -140,11 +158,36 @@ message ContainerAction { CONTAINER_FULL = 1; } - required ContainerInfo container = 1; + required int64 containerID = 1; required Action action = 2; optional Reason reason = 3; } +message PipelineActionsProto { + repeated PipelineAction pipelineActions = 1; +} + +message ClosePipelineInfo { + enum Reason { + PIPELINE_FAILED = 1; + } + required PipelineID pipelineID = 1; + optional Reason reason = 3; + optional string detailedReason = 4; +} + +message PipelineAction { + enum Action { + CLOSE = 1; + } + + /** + * Action will be used to identify the correct pipeline action. + */ + required Action action = 1; + optional ClosePipelineInfo closePipeline = 2; +} + /** A container report contains the following information. */ @@ -193,6 +236,7 @@ message ReregisterCommandProto {} // HB response from SCM, contains a list of block deletion transactions. message DeleteBlocksCommandProto { repeated DeletedBlocksTransaction deletedBlocksTransactions = 1; + required int64 cmdId = 3; } // The deleted blocks which are stored in deletedBlock.db of scm. @@ -211,9 +255,11 @@ message DeletedBlocksTransaction { message ContainerBlocksDeletionACKProto { message DeleteBlockTransactionResult { required int64 txID = 1; - required bool success = 2; + required int64 containerID = 2; + required bool success = 3; } repeated DeleteBlockTransactionResult results = 1; + required string dnId = 2; } // SendACK response returned by datanode to SCM, currently empty. @@ -226,6 +272,8 @@ This command asks the datanode to close a specific container. message CloseContainerCommandProto { required int64 containerID = 1; required hadoop.hdds.ReplicationType replicationType = 2; + required int64 cmdId = 3; + required PipelineID pipelineID = 4; } /** @@ -233,6 +281,7 @@ This command asks the datanode to delete a specific container. */ message DeleteContainerCommandProto { required int64 containerID = 1; + required int64 cmdId = 2; } /** @@ -241,6 +290,7 @@ This command asks the datanode to replicate a container from specific sources. message ReplicateContainerCommandProto { required int64 containerID = 1; repeated DatanodeDetailsProto sources = 2; + required int64 cmdId = 3; } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/SCMTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/SCMTestUtils.java index b63c5fbe9bf..a24f096ddb1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/SCMTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/SCMTestUtils.java @@ -25,16 +25,20 @@ .StorageContainerDatanodeProtocolService; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.protocol.StorageContainerDatanodeProtocol; import org.apache.hadoop.ozone.protocolPB.StorageContainerDatanodeProtocolPB; import org.apache.hadoop.ozone.protocolPB .StorageContainerDatanodeProtocolServerSideTranslatorPB; +import org.apache.hadoop.test.GenericTestUtils; import java.io.IOException; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.ServerSocket; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; + /** * Test Endpoint class. */ @@ -109,8 +113,13 @@ public static InetSocketAddress getReuseableAddress() throws IOException { } } - public static Configuration getConf() { - return new Configuration(); + public static OzoneConfiguration getConf() { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(HDDS_DATANODE_DIR_KEY, GenericTestUtils + .getRandomizedTempPath()); + conf.set(OzoneConfigKeys.OZONE_METADATA_DIRS, GenericTestUtils + .getRandomizedTempPath()); + return conf; } public static OzoneConfiguration getOzoneConf() { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ScmTestMock.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ScmTestMock.java index 14da9601e45..751775f627f 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ScmTestMock.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ScmTestMock.java @@ -18,6 +18,8 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.CommandStatus; import org.apache.hadoop.hdds.scm.VersionInfo; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDetailsProto; @@ -38,6 +40,7 @@ .StorageContainerDatanodeProtocolProtos.NodeReportProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.protocol.StorageContainerDatanodeProtocol; import org.apache.hadoop.ozone.protocol.VersionResponse; @@ -53,11 +56,21 @@ private AtomicInteger heartbeatCount = new AtomicInteger(0); private AtomicInteger rpcCount = new AtomicInteger(0); private AtomicInteger containerReportsCount = new AtomicInteger(0); + private String clusterId; + private String scmId; + + public ScmTestMock() { + clusterId = UUID.randomUUID().toString(); + scmId = UUID.randomUUID().toString(); + } // Map of datanode to containers private Map> nodeContainers = new HashMap(); private Map nodeReports = new HashMap<>(); + private AtomicInteger commandStatusReport = new AtomicInteger(0); + private List cmdStatusList = new LinkedList<>(); + private List scmCommandRequests = new LinkedList<>(); /** * Returns the number of heartbeats made to this class. * @@ -151,7 +164,10 @@ public long getBytesUsed() { return VersionResponse.newBuilder() .setVersion(versionInfo.getVersion()) .addValue(VersionInfo.DESCRIPTION_KEY, versionInfo.getDescription()) + .addValue(OzoneConsts.SCM_ID, scmId) + .addValue(OzoneConsts.CLUSTER_ID, clusterId) .build().getProtobufMessage(); + } private void sleepIfNeeded() { @@ -176,10 +192,14 @@ private void sleepIfNeeded() { sendHeartbeat(SCMHeartbeatRequestProto heartbeat) throws IOException { rpcCount.incrementAndGet(); heartbeatCount.incrementAndGet(); + if(heartbeat.hasCommandStatusReport()){ + cmdStatusList.addAll(heartbeat.getCommandStatusReport() + .getCmdStatusList()); + commandStatusReport.incrementAndGet(); + } sleepIfNeeded(); - List - cmdResponses = new LinkedList<>(); - return SCMHeartbeatResponseProto.newBuilder().addAllCommands(cmdResponses) + return SCMHeartbeatResponseProto.newBuilder().addAllCommands( + scmCommandRequests) .setDatanodeUUID(heartbeat.getDatanodeDetails().getUuid()) .build(); } @@ -298,4 +318,40 @@ public void reset() { nodeContainers.clear(); } + + public int getCommandStatusReportCount() { + return commandStatusReport.get(); + } + + public List getCmdStatusList() { + return cmdStatusList; + } + + public List getScmCommandRequests() { + return scmCommandRequests; + } + + public void clearScmCommandRequests() { + scmCommandRequests.clear(); + } + + public void addScmCommandRequest(SCMCommandProto scmCmd) { + scmCommandRequests.add(scmCmd); + } + + /** + * Set scmId. + * @param id + */ + public void setScmId(String id) { + this.scmId = id; + } + + /** + * Set scmId. + * @return scmId + */ + public String getScmId() { + return scmId; + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestChunkLayOutVersion.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestChunkLayOutVersion.java new file mode 100644 index 00000000000..a4e0028e108 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestChunkLayOutVersion.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common; + +import org.apache.hadoop.ozone.container.common.impl.ChunkLayOutVersion; +import org.junit.Assert; +import org.junit.Test; + +/** + * This class tests ChunkLayOutVersion. + */ +public class TestChunkLayOutVersion { + + @Test + public void testChunkLayOutVersion() { + + // Check Latest Version and description + Assert.assertEquals(1, ChunkLayOutVersion.getLatestVersion().getVersion()); + Assert.assertEquals("Data without checksums.", ChunkLayOutVersion + .getLatestVersion().getDescription()); + + Assert.assertEquals(1, ChunkLayOutVersion.getAllVersions().length); + + } + +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeLayOutVersion.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeLayOutVersion.java new file mode 100644 index 00000000000..5cabef295f3 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeLayOutVersion.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.container.common; + +import org.junit.Assert; +import org.junit.Test; + +/** + * This class tests DatanodeLayOutVersion. + */ +public class TestDatanodeLayOutVersion { + + @Test + public void testDatanodeLayOutVersion() { + // Check Latest Version and description + Assert.assertEquals(1, DataNodeLayoutVersion.getLatestVersion() + .getVersion()); + Assert.assertEquals("HDDS Datanode LayOut Version 1", DataNodeLayoutVersion + .getLatestVersion().getDescription()); + Assert.assertEquals(DataNodeLayoutVersion.getAllVersions().length, + DataNodeLayoutVersion.getAllVersions().length); + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java index ece75459b96..3fc0dd04e5c 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestDatanodeStateMachine.java @@ -19,6 +19,7 @@ import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.ipc.RPC; @@ -57,9 +58,9 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_HEARTBEAT_RPC_TIMEOUT; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; import static org.junit.Assert.assertTrue; /** @@ -68,7 +69,9 @@ public class TestDatanodeStateMachine { private static final Logger LOG = LoggerFactory.getLogger(TestDatanodeStateMachine.class); - private final int scmServerCount = 3; + // Changing it to 1, as current code checks for multiple scm directories, + // and fail if exists + private final int scmServerCount = 1; private List serverAddresses; private List scmServers; private List mockServers; @@ -90,7 +93,6 @@ public void setUp() throws Exception { String address = "127.0.0.1"; serverAddresses.add(address + ":" + port); ScmTestMock mock = new ScmTestMock(); - scmServers.add(SCMTestUtils.startScmRpcServer(conf, mock, new InetSocketAddress(address, port), 10)); mockServers.add(mock); @@ -107,7 +109,7 @@ public void setUp() throws Exception { } File dataDir = new File(testRoot, "data"); - conf.set(DFS_DATANODE_DATA_DIR_KEY, dataDir.getAbsolutePath()); + conf.set(HDDS_DATANODE_DIR_KEY, dataDir.getAbsolutePath()); if (!dataDir.mkdirs()) { LOG.info("Data dir create failed."); } @@ -145,7 +147,7 @@ public void tearDown() throws Exception { } catch (Exception e) { //ignore all execption from the shutdown } finally { - testRoot.delete(); + FileUtil.fullyDelete(testRoot); } } @@ -162,7 +164,7 @@ public void testStartStopDatanodeStateMachine() throws IOException, stateMachine.startDaemon(); SCMConnectionManager connectionManager = stateMachine.getConnectionManager(); - GenericTestUtils.waitFor(() -> connectionManager.getValues().size() == 3, + GenericTestUtils.waitFor(() -> connectionManager.getValues().size() == 1, 1000, 30000); stateMachine.stopDaemon(); @@ -309,6 +311,48 @@ public void testDatanodeStateContext() throws IOException, } } + @Test + public void testDatanodeStateMachineWithIdWriteFail() throws Exception { + + File idPath = new File( + conf.get(ScmConfigKeys.OZONE_SCM_DATANODE_ID)); + idPath.delete(); + DatanodeDetails datanodeDetails = getNewDatanodeDetails(); + DatanodeDetails.Port port = DatanodeDetails.newPort( + DatanodeDetails.Port.Name.STANDALONE, + OzoneConfigKeys.DFS_CONTAINER_IPC_PORT_DEFAULT); + datanodeDetails.setPort(port); + + try (DatanodeStateMachine stateMachine = + new DatanodeStateMachine(datanodeDetails, conf)) { + DatanodeStateMachine.DatanodeStates currentState = + stateMachine.getContext().getState(); + Assert.assertEquals(DatanodeStateMachine.DatanodeStates.INIT, + currentState); + + DatanodeState task = + stateMachine.getContext().getTask(); + Assert.assertEquals(InitDatanodeState.class, task.getClass()); + + //Set the idPath to read only, state machine will fail to write + // datanodeId file and set the state to shutdown. + idPath.getParentFile().mkdirs(); + idPath.getParentFile().setReadOnly(); + + task.execute(executorService); + DatanodeStateMachine.DatanodeStates newState = + task.await(2, TimeUnit.SECONDS); + + //As, we have changed the permission of idPath to readable, writing + // will fail and it will set the state to shutdown. + Assert.assertEquals(DatanodeStateMachine.DatanodeStates.SHUTDOWN, + newState); + + //Setting back to writable. + idPath.getParentFile().setWritable(true); + } + } + /** * Test state transition with a list of invalid scm configurations, * and verify the state transits to SHUTDOWN each time. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java new file mode 100644 index 00000000000..f991520cc46 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * This class is used to test the KeyValueContainerData. + */ +public class TestKeyValueContainerData { + + private static final long MAXSIZE = (long) StorageUnit.GB.toBytes(5); + @Test + public void testKeyValueData() { + long containerId = 1L; + ContainerProtos.ContainerType containerType = ContainerProtos + .ContainerType.KeyValueContainer; + String path = "/tmp"; + String containerDBType = "RocksDB"; + ContainerProtos.ContainerLifeCycleState state = ContainerProtos + .ContainerLifeCycleState.CLOSED; + AtomicLong val = new AtomicLong(0); + + KeyValueContainerData kvData = new KeyValueContainerData(containerId, + MAXSIZE); + + assertEquals(containerType, kvData.getContainerType()); + assertEquals(containerId, kvData.getContainerID()); + assertEquals(ContainerProtos.ContainerLifeCycleState.OPEN, kvData + .getState()); + assertEquals(0, kvData.getMetadata().size()); + assertEquals(0, kvData.getNumPendingDeletionBlocks()); + assertEquals(val.get(), kvData.getReadBytes()); + assertEquals(val.get(), kvData.getWriteBytes()); + assertEquals(val.get(), kvData.getReadCount()); + assertEquals(val.get(), kvData.getWriteCount()); + assertEquals(val.get(), kvData.getKeyCount()); + assertEquals(val.get(), kvData.getNumPendingDeletionBlocks()); + assertEquals(MAXSIZE, kvData.getMaxSize()); + + kvData.setState(state); + kvData.setContainerDBType(containerDBType); + kvData.setChunksPath(path); + kvData.setMetadataPath(path); + kvData.incrReadBytes(10); + kvData.incrWriteBytes(10); + kvData.incrReadCount(); + kvData.incrWriteCount(); + kvData.incrKeyCount(); + kvData.incrPendingDeletionBlocks(1); + + assertEquals(state, kvData.getState()); + assertEquals(containerDBType, kvData.getContainerDBType()); + assertEquals(path, kvData.getChunksPath()); + assertEquals(path, kvData.getMetadataPath()); + + assertEquals(10, kvData.getReadBytes()); + assertEquals(10, kvData.getWriteBytes()); + assertEquals(1, kvData.getReadCount()); + assertEquals(1, kvData.getWriteCount()); + assertEquals(1, kvData.getKeyCount()); + assertEquals(1, kvData.getNumPendingDeletionBlocks()); + } + +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestDatanodeVersionFile.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestDatanodeVersionFile.java new file mode 100644 index 00000000000..58892227a65 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestDatanodeVersionFile.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.container.common.helpers; + +import org.apache.hadoop.ozone.common.InconsistentStorageStateException; +import org.apache.hadoop.ozone.container.common.DataNodeLayoutVersion; +import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.Time; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.IOException; +import java.util.Properties; +import java.util.UUID; + +import static org.junit.Assert.*; + +/** + * This class tests {@link DatanodeVersionFile}. + */ +public class TestDatanodeVersionFile { + + private File versionFile; + private DatanodeVersionFile dnVersionFile; + private Properties properties; + + private String storageID; + private String clusterID; + private String datanodeUUID; + private long cTime; + private int lv; + + @Rule + public TemporaryFolder folder= new TemporaryFolder(); + + @Before + public void setup() throws IOException { + versionFile = folder.newFile("Version"); + storageID = UUID.randomUUID().toString(); + clusterID = UUID.randomUUID().toString(); + datanodeUUID = UUID.randomUUID().toString(); + cTime = Time.now(); + lv = DataNodeLayoutVersion.getLatestVersion().getVersion(); + + dnVersionFile = new DatanodeVersionFile( + storageID, clusterID, datanodeUUID, cTime, lv); + + dnVersionFile.createVersionFile(versionFile); + + properties = dnVersionFile.readFrom(versionFile); + } + + @Test + public void testCreateAndReadVersionFile() throws IOException{ + + //Check VersionFile exists + assertTrue(versionFile.exists()); + + assertEquals(storageID, HddsVolumeUtil.getStorageID( + properties, versionFile)); + assertEquals(clusterID, HddsVolumeUtil.getClusterID( + properties, versionFile, clusterID)); + assertEquals(datanodeUUID, HddsVolumeUtil.getDatanodeUUID( + properties, versionFile, datanodeUUID)); + assertEquals(cTime, HddsVolumeUtil.getCreationTime( + properties, versionFile)); + assertEquals(lv, HddsVolumeUtil.getLayOutVersion( + properties, versionFile)); + } + + @Test + public void testIncorrectClusterId() throws IOException{ + try { + String randomClusterID = UUID.randomUUID().toString(); + HddsVolumeUtil.getClusterID(properties, versionFile, + randomClusterID); + fail("Test failure in testIncorrectClusterId"); + } catch (InconsistentStorageStateException ex) { + GenericTestUtils.assertExceptionContains("Mismatched ClusterIDs", ex); + } + } + + @Test + public void testVerifyCTime() throws IOException{ + long invalidCTime = -10; + dnVersionFile = new DatanodeVersionFile( + storageID, clusterID, datanodeUUID, invalidCTime, lv); + dnVersionFile.createVersionFile(versionFile); + properties = dnVersionFile.readFrom(versionFile); + + try { + HddsVolumeUtil.getCreationTime(properties, versionFile); + fail("Test failure in testVerifyCTime"); + } catch (InconsistentStorageStateException ex) { + GenericTestUtils.assertExceptionContains("Invalid Creation time in " + + "Version File : " + versionFile, ex); + } + } + + @Test + public void testVerifyLayOut() throws IOException{ + int invalidLayOutVersion = 100; + dnVersionFile = new DatanodeVersionFile( + storageID, clusterID, datanodeUUID, cTime, invalidLayOutVersion); + dnVersionFile.createVersionFile(versionFile); + Properties props = dnVersionFile.readFrom(versionFile); + + try { + HddsVolumeUtil.getLayOutVersion(props, versionFile); + fail("Test failure in testVerifyLayOut"); + } catch (InconsistentStorageStateException ex) { + GenericTestUtils.assertExceptionContains("Invalid layOutVersion.", ex); + } + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerDataYaml.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerDataYaml.java new file mode 100644 index 00000000000..c7b9e0aff80 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerDataYaml.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.impl; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * This class tests create/read .container files. + */ +public class TestContainerDataYaml { + + private static long testContainerID = 1234; + + private static String testRoot = new FileSystemTestHelper().getTestRootDir(); + + private static final long MAXSIZE = (long) StorageUnit.GB.toBytes(5); + + /** + * Creates a .container file. cleanup() should be called at the end of the + * test when container file is created. + */ + private File createContainerFile(long containerID) throws IOException { + new File(testRoot).mkdirs(); + + String containerPath = containerID + ".container"; + + KeyValueContainerData keyValueContainerData = new KeyValueContainerData( + containerID, MAXSIZE); + keyValueContainerData.setContainerDBType("RocksDB"); + keyValueContainerData.setMetadataPath(testRoot); + keyValueContainerData.setChunksPath(testRoot); + + File containerFile = new File(testRoot, containerPath); + + // Create .container file with ContainerData + ContainerDataYaml.createContainerFile(ContainerProtos.ContainerType + .KeyValueContainer, keyValueContainerData, containerFile); + + //Check .container file exists or not. + assertTrue(containerFile.exists()); + + return containerFile; + } + + private void cleanup() { + FileUtil.fullyDelete(new File(testRoot)); + } + + @Test + public void testCreateContainerFile() throws IOException { + long containerID = testContainerID++; + + File containerFile = createContainerFile(containerID); + + // Read from .container file, and verify data. + KeyValueContainerData kvData = (KeyValueContainerData) ContainerDataYaml + .readContainerFile(containerFile); + assertEquals(containerID, kvData.getContainerID()); + assertEquals(ContainerProtos.ContainerType.KeyValueContainer, kvData + .getContainerType()); + assertEquals("RocksDB", kvData.getContainerDBType()); + assertEquals(containerFile.getParent(), kvData.getMetadataPath()); + assertEquals(containerFile.getParent(), kvData.getChunksPath()); + assertEquals(ContainerProtos.ContainerLifeCycleState.OPEN, kvData + .getState()); + assertEquals(1, kvData.getLayOutVersion()); + assertEquals(0, kvData.getMetadata().size()); + assertEquals(MAXSIZE, kvData.getMaxSize()); + + // Update ContainerData. + kvData.addMetadata("VOLUME", "hdfs"); + kvData.addMetadata("OWNER", "ozone"); + kvData.setState(ContainerProtos.ContainerLifeCycleState.CLOSED); + + + ContainerDataYaml.createContainerFile(ContainerProtos.ContainerType + .KeyValueContainer, kvData, containerFile); + + // Reading newly updated data from .container file + kvData = (KeyValueContainerData) ContainerDataYaml.readContainerFile( + containerFile); + + // verify data. + assertEquals(containerID, kvData.getContainerID()); + assertEquals(ContainerProtos.ContainerType.KeyValueContainer, kvData + .getContainerType()); + assertEquals("RocksDB", kvData.getContainerDBType()); + assertEquals(containerFile.getParent(), kvData.getMetadataPath()); + assertEquals(containerFile.getParent(), kvData.getChunksPath()); + assertEquals(ContainerProtos.ContainerLifeCycleState.CLOSED, kvData + .getState()); + assertEquals(1, kvData.getLayOutVersion()); + assertEquals(2, kvData.getMetadata().size()); + assertEquals("hdfs", kvData.getMetadata().get("VOLUME")); + assertEquals("ozone", kvData.getMetadata().get("OWNER")); + assertEquals(MAXSIZE, kvData.getMaxSize()); + } + + @Test + public void testIncorrectContainerFile() throws IOException{ + try { + String containerFile = "incorrect.container"; + //Get file from resources folder + ClassLoader classLoader = getClass().getClassLoader(); + File file = new File(classLoader.getResource(containerFile).getFile()); + KeyValueContainerData kvData = (KeyValueContainerData) ContainerDataYaml + .readContainerFile(file); + fail("testIncorrectContainerFile failed"); + } catch (IllegalStateException ex) { + GenericTestUtils.assertExceptionContains("Unexpected " + + "ContainerLifeCycleState", ex); + } + } + + + @Test + public void testCheckBackWardCompatabilityOfContainerFile() throws + IOException { + // This test is for if we upgrade, and then .container files added by new + // server will have new fields added to .container file, after a while we + // decided to rollback. Then older ozone can read .container files + // created or not. + + try { + String containerFile = "additionalfields.container"; + //Get file from resources folder + ClassLoader classLoader = getClass().getClassLoader(); + File file = new File(classLoader.getResource(containerFile).getFile()); + KeyValueContainerData kvData = (KeyValueContainerData) ContainerDataYaml + .readContainerFile(file); + ContainerUtils.verifyChecksum(kvData); + + //Checking the Container file data is consistent or not + assertEquals(ContainerProtos.ContainerLifeCycleState.CLOSED, kvData + .getState()); + assertEquals("RocksDB", kvData.getContainerDBType()); + assertEquals(ContainerProtos.ContainerType.KeyValueContainer, kvData + .getContainerType()); + assertEquals(9223372036854775807L, kvData.getContainerID()); + assertEquals("/hdds/current/aed-fg4-hji-jkl/containerDir0/1", kvData + .getChunksPath()); + assertEquals("/hdds/current/aed-fg4-hji-jkl/containerDir0/1", kvData + .getMetadataPath()); + assertEquals(1, kvData.getLayOutVersion()); + assertEquals(2, kvData.getMetadata().size()); + + } catch (Exception ex) { + ex.printStackTrace(); + fail("testCheckBackWardCompatabilityOfContainerFile failed"); + } + } + + /** + * Test to verify {@link ContainerUtils#verifyChecksum(ContainerData)}. + */ + @Test + public void testChecksumInContainerFile() throws IOException { + long containerID = testContainerID++; + + File containerFile = createContainerFile(containerID); + + // Read from .container file, and verify data. + KeyValueContainerData kvData = (KeyValueContainerData) ContainerDataYaml + .readContainerFile(containerFile); + ContainerUtils.verifyChecksum(kvData); + + cleanup(); + } + + /** + * Test to verify incorrect checksum is detected. + */ + @Test + public void testIncorrectChecksum() { + try { + String containerFile = "incorrect.checksum.container"; + //Get file from resources folder + ClassLoader classLoader = getClass().getClassLoader(); + File file = new File(classLoader.getResource(containerFile).getFile()); + KeyValueContainerData kvData = (KeyValueContainerData) ContainerDataYaml + .readContainerFile(file); + ContainerUtils.verifyChecksum(kvData); + fail("testIncorrectChecksum failed"); + } catch (Exception ex) { + GenericTestUtils.assertExceptionContains("Container checksum error for " + + "ContainerID:", ex); + } + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java new file mode 100644 index 00000000000..af322eaecc5 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.container.common.impl; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.ozone.container.common.interfaces.Container; + +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Class used to test ContainerSet operations. + */ +public class TestContainerSet { + + @Test + public void testAddGetRemoveContainer() throws StorageContainerException { + ContainerSet containerSet = new ContainerSet(); + long containerId = 100L; + ContainerProtos.ContainerLifeCycleState state = ContainerProtos + .ContainerLifeCycleState.CLOSED; + + KeyValueContainerData kvData = new KeyValueContainerData(containerId, + (long) StorageUnit.GB.toBytes(5)); + kvData.setState(state); + KeyValueContainer keyValueContainer = new KeyValueContainer(kvData, new + OzoneConfiguration()); + + //addContainer + boolean result = containerSet.addContainer(keyValueContainer); + assertTrue(result); + try { + result = containerSet.addContainer(keyValueContainer); + fail("Adding same container ID twice should fail."); + } catch (StorageContainerException ex) { + GenericTestUtils.assertExceptionContains("Container already exists with" + + " container Id " + containerId, ex); + } + + //getContainer + KeyValueContainer container = (KeyValueContainer) containerSet + .getContainer(containerId); + KeyValueContainerData keyValueContainerData = (KeyValueContainerData) + container.getContainerData(); + assertEquals(containerId, keyValueContainerData.getContainerID()); + assertEquals(state, keyValueContainerData.getState()); + assertNull(containerSet.getContainer(1000L)); + + //removeContainer + assertTrue(containerSet.removeContainer(containerId)); + assertFalse(containerSet.removeContainer(1000L)); + } + + @Test + public void testIteratorsAndCount() throws StorageContainerException { + + ContainerSet containerSet = createContainerSet(); + + assertEquals(10, containerSet.containerCount()); + + // Using containerIterator. + Iterator containerIterator = containerSet.getContainerIterator(); + + int count = 0; + while(containerIterator.hasNext()) { + Container kv = containerIterator.next(); + ContainerData containerData = kv.getContainerData(); + long containerId = containerData.getContainerID(); + if (containerId%2 == 0) { + assertEquals(ContainerProtos.ContainerLifeCycleState.CLOSED, + containerData.getState()); + } else { + assertEquals(ContainerProtos.ContainerLifeCycleState.OPEN, + containerData.getState()); + } + count++; + } + assertEquals(10, count); + + //Using containerMapIterator. + Iterator> containerMapIterator = containerSet + .getContainerMapIterator(); + + count = 0; + while (containerMapIterator.hasNext()) { + Container kv = containerMapIterator.next().getValue(); + ContainerData containerData = kv.getContainerData(); + long containerId = containerData.getContainerID(); + if (containerId%2 == 0) { + assertEquals(ContainerProtos.ContainerLifeCycleState.CLOSED, + containerData.getState()); + } else { + assertEquals(ContainerProtos.ContainerLifeCycleState.OPEN, + containerData.getState()); + } + count++; + } + assertEquals(10, count); + + } + + + @Test + public void testGetContainerReport() throws IOException { + + ContainerSet containerSet = createContainerSet(); + + ContainerReportsProto containerReportsRequestProto = containerSet + .getContainerReport(); + + assertEquals(10, containerReportsRequestProto.getReportsList().size()); + } + + + + @Test + public void testListContainer() throws StorageContainerException { + ContainerSet containerSet = createContainerSet(); + + List result = new ArrayList<>(); + containerSet.listContainer(2, 5, result); + + assertEquals(5, result.size()); + + for(ContainerData containerData : result) { + assertTrue(containerData.getContainerID() >=2 && containerData + .getContainerID()<=6); + } + } + + private ContainerSet createContainerSet() throws StorageContainerException { + ContainerSet containerSet = new ContainerSet(); + for (int i=0; i<10; i++) { + KeyValueContainerData kvData = new KeyValueContainerData(i, + (long) StorageUnit.GB.toBytes(5)); + if (i%2 == 0) { + kvData.setState(ContainerProtos.ContainerLifeCycleState.CLOSED); + } else { + kvData.setState(ContainerProtos.ContainerLifeCycleState.OPEN); + } + KeyValueContainer kv = new KeyValueContainer(kvData, new + OzoneConfiguration()); + containerSet.addContainer(kv); + } + return containerSet; + } + +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestHddsDispatcher.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestHddsDispatcher.java new file mode 100644 index 00000000000..fc622b2d195 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestHddsDispatcher.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.impl; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto + .ContainerProtos.ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .WriteChunkRequestProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerAction; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.ratis.shaded.com.google.protobuf.ByteString; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.File; +import java.io.IOException; +import java.util.UUID; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +/** + * Test-cases to verify the functionality of HddsDispatcher. + */ +public class TestHddsDispatcher { + + @Test + public void testContainerCloseActionWhenFull() throws IOException { + String testDir = GenericTestUtils.getTempPath( + TestHddsDispatcher.class.getSimpleName()); + try { + UUID scmId = UUID.randomUUID(); + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(HDDS_DATANODE_DIR_KEY, testDir); + DatanodeDetails dd = randomDatanodeDetails(); + ContainerSet containerSet = new ContainerSet(); + VolumeSet volumeSet = new VolumeSet(dd.getUuidString(), conf); + StateContext context = Mockito.mock(StateContext.class); + KeyValueContainerData containerData = new KeyValueContainerData(1L, + (long) StorageUnit.GB.toBytes(1)); + Container container = new KeyValueContainer(containerData, conf); + container.create(volumeSet, new RoundRobinVolumeChoosingPolicy(), + scmId.toString()); + containerSet.addContainer(container); + HddsDispatcher hddsDispatcher = new HddsDispatcher( + conf, containerSet, volumeSet, context); + hddsDispatcher.setScmId(scmId.toString()); + ContainerCommandResponseProto responseOne = hddsDispatcher.dispatch( + getWriteChunkRequest(dd.getUuidString(), 1L, 1L)); + Assert.assertEquals(ContainerProtos.Result.SUCCESS, + responseOne.getResult()); + verify(context, times(0)) + .addContainerActionIfAbsent(Mockito.any(ContainerAction.class)); + containerData.setBytesUsed(Double.valueOf( + StorageUnit.MB.toBytes(950)).longValue()); + ContainerCommandResponseProto responseTwo = hddsDispatcher.dispatch( + getWriteChunkRequest(dd.getUuidString(), 1L, 2L)); + Assert.assertEquals(ContainerProtos.Result.SUCCESS, + responseTwo.getResult()); + verify(context, times(1)) + .addContainerActionIfAbsent(Mockito.any(ContainerAction.class)); + + } finally { + FileUtils.deleteDirectory(new File(testDir)); + } + + } + + // This method has to be removed once we move scm/TestUtils.java + // from server-scm project to container-service or to common project. + private static DatanodeDetails randomDatanodeDetails() { + DatanodeDetails.Port containerPort = DatanodeDetails.newPort( + DatanodeDetails.Port.Name.STANDALONE, 0); + DatanodeDetails.Port ratisPort = DatanodeDetails.newPort( + DatanodeDetails.Port.Name.RATIS, 0); + DatanodeDetails.Port restPort = DatanodeDetails.newPort( + DatanodeDetails.Port.Name.REST, 0); + DatanodeDetails.Builder builder = DatanodeDetails.newBuilder(); + builder.setUuid(UUID.randomUUID().toString()) + .setHostName("localhost") + .setIpAddress("127.0.0.1") + .addPort(containerPort) + .addPort(ratisPort) + .addPort(restPort); + return builder.build(); + } + + private ContainerCommandRequestProto getWriteChunkRequest( + String datanodeId, Long containerId, Long localId) { + + ByteString data = ByteString.copyFrom( + UUID.randomUUID().toString().getBytes()); + ContainerProtos.ChunkInfo chunk = ContainerProtos.ChunkInfo + .newBuilder() + .setChunkName( + DigestUtils.md5Hex("dummy-key") + "_stream_" + + containerId + "_chunk_" + localId) + .setOffset(0) + .setLen(data.size()) + .build(); + + WriteChunkRequestProto.Builder writeChunkRequest = WriteChunkRequestProto + .newBuilder() + .setBlockID(new BlockID(containerId, localId) + .getDatanodeBlockIDProtobuf()) + .setChunkData(chunk) + .setData(data); + + return ContainerCommandRequestProto + .newBuilder() + .setContainerID(containerId) + .setCmdType(ContainerProtos.Type.WriteChunk) + .setTraceID(UUID.randomUUID().toString()) + .setDatanodeUuid(datanodeId) + .setWriteChunk(writeChunkRequest) + .build(); + } + +} \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/package-info.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/package-info.java new file mode 100644 index 00000000000..07c78c04989 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +/** + * Datanode container related test-cases. + */ +package org.apache.hadoop.ozone.container.common.impl; \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/interfaces/TestHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/interfaces/TestHandler.java new file mode 100644 index 00000000000..b6582952bbd --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/interfaces/TestHandler.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.interfaces; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestRule; +import org.junit.rules.Timeout; +import org.mockito.Mockito; + +/** + * Tests Handler interface. + */ +public class TestHandler { + @Rule + public TestRule timeout = new Timeout(300000); + + private Configuration conf; + private HddsDispatcher dispatcher; + private ContainerSet containerSet; + private VolumeSet volumeSet; + private Handler handler; + + @Before + public void setup() throws Exception { + this.conf = new Configuration(); + this.containerSet = Mockito.mock(ContainerSet.class); + this.volumeSet = Mockito.mock(VolumeSet.class); + + this.dispatcher = new HddsDispatcher(conf, containerSet, volumeSet, null); + } + + @Test + public void testGetKeyValueHandler() throws Exception { + Handler kvHandler = dispatcher.getHandler( + ContainerProtos.ContainerType.KeyValueContainer); + + Assert.assertTrue("getHandlerForContainerType returned incorrect handler", + (kvHandler instanceof KeyValueHandler)); + } + + @Test + public void testGetHandlerForInvalidContainerType() { + // When new ContainerProtos.ContainerType are added, increment the code + // for invalid enum. + ContainerProtos.ContainerType invalidContainerType = + ContainerProtos.ContainerType.forNumber(2); + + Assert.assertEquals("New ContainerType detected. Not an invalid " + + "containerType", invalidContainerType, null); + + Handler handler = dispatcher.getHandler(invalidContainerType); + Assert.assertEquals("Get Handler for Invalid ContainerType should " + + "return null.", handler, null); + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/report/TestReportPublisher.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/report/TestReportPublisher.java index 5fd9cf60479..811599f01a8 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/report/TestReportPublisher.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/report/TestReportPublisher.java @@ -18,20 +18,22 @@ package org.apache.hadoop.ozone.container.common.report; import com.google.common.util.concurrent.ThreadFactoryBuilder; -import com.google.protobuf.Descriptors; import com.google.protobuf.GeneratedMessage; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.HddsIdFactory; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.NodeReportProto; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.CommandStatus.Status; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.protocol.commands.CommandStatus; import org.apache.hadoop.util.concurrent.HadoopExecutors; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import org.mockito.Mockito; @@ -42,12 +44,20 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; /** * Test cases to test {@link ReportPublisher}. */ public class TestReportPublisher { + private static Configuration config; + + @BeforeClass + public static void setup() { + config = new OzoneConfiguration(); + } + /** * Dummy report publisher for testing. */ @@ -93,9 +103,9 @@ public void testScheduledReport() throws InterruptedException { .setNameFormat("Unit test ReportManager Thread - %d").build()); publisher.init(dummyContext, executorService); Thread.sleep(150); - Assert.assertEquals(1, ((DummyReportPublisher)publisher).getReportCount); - Thread.sleep(150); - Assert.assertEquals(2, ((DummyReportPublisher)publisher).getReportCount); + Assert.assertEquals(1, ((DummyReportPublisher) publisher).getReportCount); + Thread.sleep(100); + Assert.assertEquals(2, ((DummyReportPublisher) publisher).getReportCount); executorService.shutdown(); } @@ -110,30 +120,55 @@ public void testPublishReport() throws InterruptedException { publisher.init(dummyContext, executorService); Thread.sleep(150); executorService.shutdown(); - Assert.assertEquals(1, ((DummyReportPublisher)publisher).getReportCount); + Assert.assertEquals(1, ((DummyReportPublisher) publisher).getReportCount); verify(dummyContext, times(1)).addReport(null); } @Test - public void testAddingReportToHeartbeat() { - Configuration conf = new OzoneConfiguration(); - ReportPublisherFactory factory = new ReportPublisherFactory(conf); - ReportPublisher nodeReportPublisher = factory.getPublisherFor( - NodeReportProto.class); - ReportPublisher containerReportPubliser = factory.getPublisherFor( - ContainerReportsProto.class); - GeneratedMessage nodeReport = nodeReportPublisher.getReport(); - GeneratedMessage containerReport = containerReportPubliser.getReport(); - SCMHeartbeatRequestProto.Builder heartbeatBuilder = - SCMHeartbeatRequestProto.newBuilder(); - heartbeatBuilder.setDatanodeDetails( - getDatanodeDetails().getProtoBufMessage()); - addReport(heartbeatBuilder, nodeReport); - addReport(heartbeatBuilder, containerReport); - SCMHeartbeatRequestProto heartbeat = heartbeatBuilder.build(); - Assert.assertTrue(heartbeat.hasNodeReport()); - Assert.assertTrue(heartbeat.hasContainerReport()); + public void testCommandStatusPublisher() throws InterruptedException { + StateContext dummyContext = Mockito.mock(StateContext.class); + ReportPublisher publisher = new CommandStatusReportPublisher(); + final Map cmdStatusMap = new ConcurrentHashMap<>(); + when(dummyContext.getCommandStatusMap()).thenReturn(cmdStatusMap); + publisher.setConf(config); + + ScheduledExecutorService executorService = HadoopExecutors + .newScheduledThreadPool(1, + new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("Unit test ReportManager Thread - %d").build()); + publisher.init(dummyContext, executorService); + Assert.assertEquals(0, + ((CommandStatusReportPublisher) publisher).getReport() + .getCmdStatusCount()); + + // Insert to status object to state context map and then get the report. + CommandStatus obj1 = CommandStatus.CommandStatusBuilder.newBuilder() + .setCmdId(HddsIdFactory.getLongId()) + .setType(Type.deleteBlocksCommand) + .setStatus(Status.PENDING) + .build(); + CommandStatus obj2 = CommandStatus.CommandStatusBuilder.newBuilder() + .setCmdId(HddsIdFactory.getLongId()) + .setType(Type.closeContainerCommand) + .setStatus(Status.EXECUTED) + .build(); + cmdStatusMap.put(obj1.getCmdId(), obj1); + cmdStatusMap.put(obj2.getCmdId(), obj2); + Assert.assertEquals("Should publish report with 2 status objects", 2, + ((CommandStatusReportPublisher) publisher).getReport() + .getCmdStatusCount()); + Assert.assertEquals( + "Next report should have 1 status objects as command status o" + + "bjects are still in Pending state", + 1, ((CommandStatusReportPublisher) publisher).getReport() + .getCmdStatusCount()); + Assert.assertTrue( + "Next report should have 1 status objects as command status " + + "objects are still in Pending state", + ((CommandStatusReportPublisher) publisher).getReport() + .getCmdStatusList().get(0).getStatus().equals(Status.PENDING)); + executorService.shutdown(); } /** @@ -164,22 +199,4 @@ private static DatanodeDetails getDatanodeDetails() { return builder.build(); } - /** - * Adds the report to heartbeat. - * - * @param requestBuilder builder to which the report has to be added. - * @param report the report to be added. - */ - private static void addReport(SCMHeartbeatRequestProto.Builder requestBuilder, - GeneratedMessage report) { - String reportName = report.getDescriptorForType().getFullName(); - for (Descriptors.FieldDescriptor descriptor : - SCMHeartbeatRequestProto.getDescriptor().getFields()) { - String heartbeatFieldName = descriptor.getMessageType().getFullName(); - if (heartbeatFieldName.equals(reportName)) { - requestBuilder.setField(descriptor, report); - } - } - } - } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReplicateContainerCommandHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReplicateContainerCommandHandler.java new file mode 100644 index 00000000000..6529922fa59 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReplicateContainerCommandHandler.java @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.container.common.statemachine.commandhandler; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeoutException; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.container.replication.ContainerDownloader; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.TestGenericTestUtils; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +/** + * Test replication command handler. + */ +public class TestReplicateContainerCommandHandler { + + private static final String EXCEPTION_MESSAGE = "Oh my god"; + private ReplicateContainerCommandHandler handler; + private StubDownloader downloader; + private ReplicateContainerCommand command; + private List importedContainerIds; + + @Before + public void init() { + importedContainerIds = new ArrayList<>(); + + OzoneConfiguration conf = new OzoneConfiguration(); + ContainerSet containerSet = Mockito.mock(ContainerSet.class); + ContainerDispatcher containerDispatcher = + Mockito.mock(ContainerDispatcher.class); + + downloader = new StubDownloader(); + + handler = new ReplicateContainerCommandHandler(conf, containerSet, + containerDispatcher, downloader) { + @Override + protected void importContainer(long containerID, Path tarFilePath) { + importedContainerIds.add(containerID); + } + }; + + //the command + ArrayList datanodeDetails = new ArrayList<>(); + datanodeDetails.add(Mockito.mock(DatanodeDetails.class)); + datanodeDetails.add(Mockito.mock(DatanodeDetails.class)); + + command = new ReplicateContainerCommand(1L, datanodeDetails); + } + + @Test + public void handle() throws TimeoutException, InterruptedException { + //GIVEN + + //WHEN + handler.handle(command, null, Mockito.mock(StateContext.class), null); + + TestGenericTestUtils + .waitFor(() -> downloader.futureByContainers.size() == 1, 100, 2000); + + Assert.assertNotNull(downloader.futureByContainers.get(1L)); + downloader.futureByContainers.get(1L).complete(Paths.get("/tmp/test")); + + TestGenericTestUtils + .waitFor(() -> importedContainerIds.size() == 1, 100, 2000); + } + + @Test + public void handleWithErrors() throws TimeoutException, InterruptedException { + //GIVEN + GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer + .captureLogs(ReplicateContainerCommandHandler.LOG); + + //WHEN + handler.handle(command, null, Mockito.mock(StateContext.class), null); + + //THEN + TestGenericTestUtils + .waitFor(() -> downloader.futureByContainers.size() == 1, 100, 2000); + + Assert.assertNotNull(downloader.futureByContainers.get(1L)); + downloader.futureByContainers.get(1L) + .completeExceptionally(new IllegalArgumentException( + EXCEPTION_MESSAGE)); + + TestGenericTestUtils + .waitFor(() -> { + String output = logCapturer.getOutput(); + return output.contains("unsuccessful") && output + .contains(EXCEPTION_MESSAGE); }, + 100, + 2000); + } + + /** + * Can't handle a command if there are no source replicas. + */ + @Test(expected = IllegalArgumentException.class) + public void handleWithoutReplicas() + throws TimeoutException, InterruptedException { + //GIVEN + ReplicateContainerCommand commandWithoutReplicas = + new ReplicateContainerCommand(1L, new ArrayList<>()); + + //WHEN + handler + .handle(commandWithoutReplicas, + null, + Mockito.mock(StateContext.class), + null); + + } + private static class StubDownloader implements ContainerDownloader { + + private Map> futureByContainers = + new HashMap<>(); + + @Override + public void close() { + + } + + @Override + public CompletableFuture getContainerDataFromReplicas( + long containerId, List sources) { + CompletableFuture future = new CompletableFuture<>(); + futureByContainers.put(containerId, future); + return future; + } + } + +} \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/package-info.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/package-info.java new file mode 100644 index 00000000000..05ac76d1439 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for command handlers. + */ +package org.apache.hadoop.ozone.container.common.statemachine.commandhandler; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java new file mode 100644 index 00000000000..69a6a339bfc --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/TestHeartbeatEndpointTask.java @@ -0,0 +1,295 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.states.endpoint; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerAction; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.NodeReportProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.SCMHeartbeatResponseProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto; +import org.apache.hadoop.ozone.container.common.statemachine + .DatanodeStateMachine; +import org.apache.hadoop.ozone.container.common.statemachine + .DatanodeStateMachine.DatanodeStates; +import org.apache.hadoop.ozone.container.common.statemachine + .EndpointStateMachine; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.protocolPB + .StorageContainerDatanodeProtocolClientSideTranslatorPB; + +import org.junit.Assert; +import org.junit.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; + +import java.util.UUID; + +/** + * This class tests the functionality of HeartbeatEndpointTask. + */ +public class TestHeartbeatEndpointTask { + + + @Test + public void testheartbeatWithoutReports() throws Exception { + StorageContainerDatanodeProtocolClientSideTranslatorPB scm = + Mockito.mock( + StorageContainerDatanodeProtocolClientSideTranslatorPB.class); + ArgumentCaptor argument = ArgumentCaptor + .forClass(SCMHeartbeatRequestProto.class); + Mockito.when(scm.sendHeartbeat(argument.capture())) + .thenAnswer(invocation -> + SCMHeartbeatResponseProto.newBuilder() + .setDatanodeUUID( + ((SCMHeartbeatRequestProto)invocation.getArgument(0)) + .getDatanodeDetails().getUuid()) + .build()); + + HeartbeatEndpointTask endpointTask = getHeartbeatEndpointTask(scm); + endpointTask.call(); + SCMHeartbeatRequestProto heartbeat = argument.getValue(); + Assert.assertTrue(heartbeat.hasDatanodeDetails()); + Assert.assertFalse(heartbeat.hasNodeReport()); + Assert.assertFalse(heartbeat.hasContainerReport()); + Assert.assertFalse(heartbeat.hasCommandStatusReport()); + Assert.assertFalse(heartbeat.hasContainerActions()); + } + + @Test + public void testheartbeatWithNodeReports() throws Exception { + Configuration conf = new OzoneConfiguration(); + StateContext context = new StateContext(conf, DatanodeStates.RUNNING, + Mockito.mock(DatanodeStateMachine.class)); + + StorageContainerDatanodeProtocolClientSideTranslatorPB scm = + Mockito.mock( + StorageContainerDatanodeProtocolClientSideTranslatorPB.class); + ArgumentCaptor argument = ArgumentCaptor + .forClass(SCMHeartbeatRequestProto.class); + Mockito.when(scm.sendHeartbeat(argument.capture())) + .thenAnswer(invocation -> + SCMHeartbeatResponseProto.newBuilder() + .setDatanodeUUID( + ((SCMHeartbeatRequestProto)invocation.getArgument(0)) + .getDatanodeDetails().getUuid()) + .build()); + + HeartbeatEndpointTask endpointTask = getHeartbeatEndpointTask( + conf, context, scm); + context.addReport(NodeReportProto.getDefaultInstance()); + endpointTask.call(); + SCMHeartbeatRequestProto heartbeat = argument.getValue(); + Assert.assertTrue(heartbeat.hasDatanodeDetails()); + Assert.assertTrue(heartbeat.hasNodeReport()); + Assert.assertFalse(heartbeat.hasContainerReport()); + Assert.assertFalse(heartbeat.hasCommandStatusReport()); + Assert.assertFalse(heartbeat.hasContainerActions()); + } + + @Test + public void testheartbeatWithContainerReports() throws Exception { + Configuration conf = new OzoneConfiguration(); + StateContext context = new StateContext(conf, DatanodeStates.RUNNING, + Mockito.mock(DatanodeStateMachine.class)); + + StorageContainerDatanodeProtocolClientSideTranslatorPB scm = + Mockito.mock( + StorageContainerDatanodeProtocolClientSideTranslatorPB.class); + ArgumentCaptor argument = ArgumentCaptor + .forClass(SCMHeartbeatRequestProto.class); + Mockito.when(scm.sendHeartbeat(argument.capture())) + .thenAnswer(invocation -> + SCMHeartbeatResponseProto.newBuilder() + .setDatanodeUUID( + ((SCMHeartbeatRequestProto)invocation.getArgument(0)) + .getDatanodeDetails().getUuid()) + .build()); + + HeartbeatEndpointTask endpointTask = getHeartbeatEndpointTask( + conf, context, scm); + context.addReport(ContainerReportsProto.getDefaultInstance()); + endpointTask.call(); + SCMHeartbeatRequestProto heartbeat = argument.getValue(); + Assert.assertTrue(heartbeat.hasDatanodeDetails()); + Assert.assertFalse(heartbeat.hasNodeReport()); + Assert.assertTrue(heartbeat.hasContainerReport()); + Assert.assertFalse(heartbeat.hasCommandStatusReport()); + Assert.assertFalse(heartbeat.hasContainerActions()); + } + + @Test + public void testheartbeatWithCommandStatusReports() throws Exception { + Configuration conf = new OzoneConfiguration(); + StateContext context = new StateContext(conf, DatanodeStates.RUNNING, + Mockito.mock(DatanodeStateMachine.class)); + + StorageContainerDatanodeProtocolClientSideTranslatorPB scm = + Mockito.mock( + StorageContainerDatanodeProtocolClientSideTranslatorPB.class); + ArgumentCaptor argument = ArgumentCaptor + .forClass(SCMHeartbeatRequestProto.class); + Mockito.when(scm.sendHeartbeat(argument.capture())) + .thenAnswer(invocation -> + SCMHeartbeatResponseProto.newBuilder() + .setDatanodeUUID( + ((SCMHeartbeatRequestProto)invocation.getArgument(0)) + .getDatanodeDetails().getUuid()) + .build()); + + HeartbeatEndpointTask endpointTask = getHeartbeatEndpointTask( + conf, context, scm); + context.addReport(CommandStatusReportsProto.getDefaultInstance()); + endpointTask.call(); + SCMHeartbeatRequestProto heartbeat = argument.getValue(); + Assert.assertTrue(heartbeat.hasDatanodeDetails()); + Assert.assertFalse(heartbeat.hasNodeReport()); + Assert.assertFalse(heartbeat.hasContainerReport()); + Assert.assertTrue(heartbeat.hasCommandStatusReport()); + Assert.assertFalse(heartbeat.hasContainerActions()); + } + + @Test + public void testheartbeatWithContainerActions() throws Exception { + Configuration conf = new OzoneConfiguration(); + StateContext context = new StateContext(conf, DatanodeStates.RUNNING, + Mockito.mock(DatanodeStateMachine.class)); + + StorageContainerDatanodeProtocolClientSideTranslatorPB scm = + Mockito.mock( + StorageContainerDatanodeProtocolClientSideTranslatorPB.class); + ArgumentCaptor argument = ArgumentCaptor + .forClass(SCMHeartbeatRequestProto.class); + Mockito.when(scm.sendHeartbeat(argument.capture())) + .thenAnswer(invocation -> + SCMHeartbeatResponseProto.newBuilder() + .setDatanodeUUID( + ((SCMHeartbeatRequestProto)invocation.getArgument(0)) + .getDatanodeDetails().getUuid()) + .build()); + + HeartbeatEndpointTask endpointTask = getHeartbeatEndpointTask( + conf, context, scm); + context.addContainerAction(getContainerAction()); + endpointTask.call(); + SCMHeartbeatRequestProto heartbeat = argument.getValue(); + Assert.assertTrue(heartbeat.hasDatanodeDetails()); + Assert.assertFalse(heartbeat.hasNodeReport()); + Assert.assertFalse(heartbeat.hasContainerReport()); + Assert.assertFalse(heartbeat.hasCommandStatusReport()); + Assert.assertTrue(heartbeat.hasContainerActions()); + } + + @Test + public void testheartbeatWithAllReports() throws Exception { + Configuration conf = new OzoneConfiguration(); + StateContext context = new StateContext(conf, DatanodeStates.RUNNING, + Mockito.mock(DatanodeStateMachine.class)); + + StorageContainerDatanodeProtocolClientSideTranslatorPB scm = + Mockito.mock( + StorageContainerDatanodeProtocolClientSideTranslatorPB.class); + ArgumentCaptor argument = ArgumentCaptor + .forClass(SCMHeartbeatRequestProto.class); + Mockito.when(scm.sendHeartbeat(argument.capture())) + .thenAnswer(invocation -> + SCMHeartbeatResponseProto.newBuilder() + .setDatanodeUUID( + ((SCMHeartbeatRequestProto)invocation.getArgument(0)) + .getDatanodeDetails().getUuid()) + .build()); + + HeartbeatEndpointTask endpointTask = getHeartbeatEndpointTask( + conf, context, scm); + context.addReport(NodeReportProto.getDefaultInstance()); + context.addReport(ContainerReportsProto.getDefaultInstance()); + context.addReport(CommandStatusReportsProto.getDefaultInstance()); + context.addContainerAction(getContainerAction()); + endpointTask.call(); + SCMHeartbeatRequestProto heartbeat = argument.getValue(); + Assert.assertTrue(heartbeat.hasDatanodeDetails()); + Assert.assertTrue(heartbeat.hasNodeReport()); + Assert.assertTrue(heartbeat.hasContainerReport()); + Assert.assertTrue(heartbeat.hasCommandStatusReport()); + Assert.assertTrue(heartbeat.hasContainerActions()); + } + + /** + * Creates HeartbeatEndpointTask for the given StorageContainerManager proxy. + * + * @param proxy StorageContainerDatanodeProtocolClientSideTranslatorPB + * + * @return HeartbeatEndpointTask + */ + private HeartbeatEndpointTask getHeartbeatEndpointTask( + StorageContainerDatanodeProtocolClientSideTranslatorPB proxy) { + Configuration conf = new OzoneConfiguration(); + StateContext context = new StateContext(conf, DatanodeStates.RUNNING, + Mockito.mock(DatanodeStateMachine.class)); + return getHeartbeatEndpointTask(conf, context, proxy); + + } + + /** + * Creates HeartbeatEndpointTask with the given conf, context and + * StorageContainerManager client side proxy. + * + * @param conf Configuration + * @param context StateContext + * @param proxy StorageContainerDatanodeProtocolClientSideTranslatorPB + * + * @return HeartbeatEndpointTask + */ + private HeartbeatEndpointTask getHeartbeatEndpointTask( + Configuration conf, + StateContext context, + StorageContainerDatanodeProtocolClientSideTranslatorPB proxy) { + DatanodeDetails datanodeDetails = DatanodeDetails.newBuilder() + .setUuid(UUID.randomUUID().toString()) + .setHostName("localhost") + .setIpAddress("127.0.0.1") + .build(); + EndpointStateMachine endpointStateMachine = Mockito + .mock(EndpointStateMachine.class); + Mockito.when(endpointStateMachine.getEndPoint()).thenReturn(proxy); + return HeartbeatEndpointTask.newBuilder() + .setConfig(conf) + .setDatanodeDetails(datanodeDetails) + .setContext(context) + .setEndpointStateMachine(endpointStateMachine) + .build(); + } + + private ContainerAction getContainerAction() { + ContainerAction.Builder builder = ContainerAction.newBuilder(); + builder.setContainerID(1L) + .setAction(ContainerAction.Action.CLOSE) + .setReason(ContainerAction.Reason.CONTAINER_FULL); + return builder.build(); + } +} \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/background/package-info.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/package-info.java similarity index 91% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/background/package-info.java rename to hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/package-info.java index a9e202e35e1..d120a5cd4b7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/background/package-info.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/states/endpoint/package-info.java @@ -15,4 +15,4 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.ozone.container.common.statemachine.background; \ No newline at end of file +package org.apache.hadoop.ozone.container.common.states.endpoint; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java new file mode 100644 index 00000000000..7755345d42a --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestHddsVolume.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.GetSpaceUsed; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.ozone.container.common.helpers.DatanodeVersionFile; +import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; +import static org.junit.Assert.*; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; + +import java.io.File; +import java.util.Properties; +import java.util.UUID; + +/** + * Unit tests for {@link HddsVolume}. + */ +public class TestHddsVolume { + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private static final String DATANODE_UUID = UUID.randomUUID().toString(); + private static final String CLUSTER_ID = UUID.randomUUID().toString(); + private static final Configuration CONF = new Configuration(); + private static final String DU_CACHE_FILE = "scmUsed"; + + private File rootDir; + private HddsVolume volume; + private File versionFile; + + @Before + public void setup() throws Exception { + rootDir = new File(folder.getRoot(), HddsVolume.HDDS_VOLUME_DIR); + volume = new HddsVolume.Builder(folder.getRoot().getPath()) + .datanodeUuid(DATANODE_UUID) + .conf(CONF) + .build(); + versionFile = HddsVolumeUtil.getVersionFile(rootDir); + } + + @Test + public void testHddsVolumeInitialization() throws Exception { + + // The initial state of HddsVolume should be "NOT_FORMATTED" when + // clusterID is not specified and the version file should not be written + // to disk. + assertTrue(volume.getClusterID() == null); + assertEquals(volume.getStorageType(), StorageType.DEFAULT); + assertEquals(volume.getStorageState(), + HddsVolume.VolumeState.NOT_FORMATTED); + assertFalse("Version file should not be created when clusterID is not " + + "known.", versionFile.exists()); + + + // Format the volume with clusterID. + volume.format(CLUSTER_ID); + + // The state of HddsVolume after formatting with clusterID should be + // NORMAL and the version file should exist. + assertTrue("Volume format should create Version file", + versionFile.exists()); + assertEquals(volume.getClusterID(), CLUSTER_ID); + assertEquals(volume.getStorageState(), HddsVolume.VolumeState.NORMAL); + } + + @Test + public void testReadPropertiesFromVersionFile() throws Exception { + volume.format(CLUSTER_ID); + + Properties properties = DatanodeVersionFile.readFrom(versionFile); + + String storageID = HddsVolumeUtil.getStorageID(properties, versionFile); + String clusterID = HddsVolumeUtil.getClusterID( + properties, versionFile, CLUSTER_ID); + String datanodeUuid = HddsVolumeUtil.getDatanodeUUID( + properties, versionFile, DATANODE_UUID); + long cTime = HddsVolumeUtil.getCreationTime( + properties, versionFile); + int layoutVersion = HddsVolumeUtil.getLayOutVersion( + properties, versionFile); + + assertEquals(volume.getStorageID(), storageID); + assertEquals(volume.getClusterID(), clusterID); + assertEquals(volume.getDatanodeUuid(), datanodeUuid); + assertEquals(volume.getCTime(), cTime); + assertEquals(volume.getLayoutVersion(), layoutVersion); + } + + @Test + public void testShutdown() throws Exception{ + // Return dummy value > 0 for scmUsage so that scm cache file is written + // during shutdown. + GetSpaceUsed scmUsageMock = Mockito.mock(GetSpaceUsed.class); + volume.setScmUsageForTesting(scmUsageMock); + Mockito.when(scmUsageMock.getUsed()).thenReturn(Long.valueOf(100)); + + assertTrue("Available volume should be positive", + volume.getAvailable() > 0); + + // Shutdown the volume. + volume.shutdown(); + + // Volume state should be "NON_EXISTENT" when volume is shutdown. + assertEquals(volume.getStorageState(), + HddsVolume.VolumeState.NON_EXISTENT); + + // Volume should save scmUsed cache file once volume is shutdown + File scmUsedFile = new File(folder.getRoot(), DU_CACHE_FILE); + System.out.println("scmUsedFile: " + scmUsedFile); + assertTrue("scmUsed cache file should be saved on shutdown", + scmUsedFile.exists()); + + try { + // Volume.getAvailable() should fail with NullPointerException as usage + // is shutdown. + volume.getAvailable(); + fail("HddsVolume#shutdown test failed"); + } catch (Exception ex){ + assertTrue(ex instanceof NullPointerException); + } + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestRoundRobinVolumeChoosingPolicy.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestRoundRobinVolumeChoosingPolicy.java new file mode 100644 index 00000000000..80594d35245 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestRoundRobinVolumeChoosingPolicy.java @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; + +import org.apache.hadoop.fs.GetSpaceUsed; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; +import org.apache.hadoop.util.ReflectionUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.List; +import java.util.UUID; + +/** + * Tests {@link RoundRobinVolumeChoosingPolicy}. + */ +public class TestRoundRobinVolumeChoosingPolicy { + + private RoundRobinVolumeChoosingPolicy policy; + private List volumes; + + private final String baseDir = MiniDFSCluster.getBaseDirectory(); + private final String volume1 = baseDir + "disk1"; + private final String volume2 = baseDir + "disk2"; + private static final String DUMMY_IP_ADDR = "0.0.0.0"; + + @Before + public void setup() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + String dataDirKey = volume1 + "," + volume2; + conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDirKey); + policy = ReflectionUtils.newInstance( + RoundRobinVolumeChoosingPolicy.class, null); + VolumeSet volumeSet = new VolumeSet(UUID.randomUUID().toString(), conf); + volumes = volumeSet.getVolumesList(); + } + + @Test + public void testRRVolumeChoosingPolicy() throws Exception { + HddsVolume hddsVolume1 = volumes.get(0); + HddsVolume hddsVolume2 = volumes.get(1); + + // Set available space in volume1 to 100L + setAvailableSpace(hddsVolume1, 100L); + + // Set available space in volume1 to 200L + setAvailableSpace(hddsVolume2, 200L); + + Assert.assertEquals(100L, hddsVolume1.getAvailable()); + Assert.assertEquals(200L, hddsVolume2.getAvailable()); + + // Test two rounds of round-robin choosing + Assert.assertEquals(hddsVolume1, policy.chooseVolume(volumes, 0)); + Assert.assertEquals(hddsVolume2, policy.chooseVolume(volumes, 0)); + Assert.assertEquals(hddsVolume1, policy.chooseVolume(volumes, 0)); + Assert.assertEquals(hddsVolume2, policy.chooseVolume(volumes, 0)); + + // The first volume has only 100L space, so the policy should + // choose the second one in case we ask for more. + Assert.assertEquals(hddsVolume2, + policy.chooseVolume(volumes, 150)); + + // Fail if no volume has enough space available + try { + policy.chooseVolume(volumes, Long.MAX_VALUE); + Assert.fail(); + } catch (IOException e) { + // Passed. + } + } + + @Test + public void testRRPolicyExceptionMessage() throws Exception { + HddsVolume hddsVolume1 = volumes.get(0); + HddsVolume hddsVolume2 = volumes.get(1); + + // Set available space in volume1 to 100L + setAvailableSpace(hddsVolume1, 100L); + + // Set available space in volume1 to 200L + setAvailableSpace(hddsVolume2, 200L); + + int blockSize = 300; + try { + policy.chooseVolume(volumes, blockSize); + Assert.fail("expected to throw DiskOutOfSpaceException"); + } catch(DiskOutOfSpaceException e) { + Assert.assertEquals("Not returnig the expected message", + "Out of space: The volume with the most available space (=" + 200 + + " B) is less than the container size (=" + blockSize + " B).", + e.getMessage()); + } + } + + private void setAvailableSpace(HddsVolume hddsVolume, long availableSpace) + throws IOException { + GetSpaceUsed scmUsageMock = Mockito.mock(GetSpaceUsed.class); + hddsVolume.setScmUsageForTesting(scmUsageMock); + // Set used space to capacity -requiredAvailableSpace so that + // getAvailable() returns us the specified availableSpace. + Mockito.when(scmUsageMock.getUsed()).thenReturn( + (hddsVolume.getCapacity() - availableSpace)); + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeSet.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeSet.java new file mode 100644 index 00000000000..fca68b19acc --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestVolumeSet.java @@ -0,0 +1,254 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; + +import java.io.IOException; +import org.apache.commons.io.FileUtils; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.ozone.container.common.utils.HddsVolumeUtil; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.GenericTestUtils.LogCapturer; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; +import static org.apache.hadoop.ozone.container.common.volume.HddsVolume + .HDDS_VOLUME_DIR; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +/** + * Tests {@link VolumeSet} operations. + */ +public class TestVolumeSet { + + private OzoneConfiguration conf; + private VolumeSet volumeSet; + private final String baseDir = MiniDFSCluster.getBaseDirectory(); + private final String volume1 = baseDir + "disk1"; + private final String volume2 = baseDir + "disk2"; + private final List volumes = new ArrayList<>(); + + private static final String DUMMY_IP_ADDR = "0.0.0.0"; + + private void initializeVolumeSet() throws Exception { + volumeSet = new VolumeSet(UUID.randomUUID().toString(), conf); + } + + @Rule + public Timeout testTimeout = new Timeout(300_000); + + @Before + public void setup() throws Exception { + conf = new OzoneConfiguration(); + String dataDirKey = volume1 + "," + volume2; + volumes.add(volume1); + volumes.add(volume2); + conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDirKey); + initializeVolumeSet(); + } + + @After + public void shutdown() throws IOException { + // Delete the hdds volume root dir + List hddsVolumes = new ArrayList<>(); + hddsVolumes.addAll(volumeSet.getVolumesList()); + hddsVolumes.addAll(volumeSet.getFailedVolumesList()); + + for (HddsVolume volume : hddsVolumes) { + FileUtils.deleteDirectory(volume.getHddsRootDir()); + } + volumeSet.shutdown(); + + FileUtil.fullyDelete(new File(baseDir)); + } + + private boolean checkVolumeExistsInVolumeSet(String volume) { + for (HddsVolume hddsVolume : volumeSet.getVolumesList()) { + if (hddsVolume.getHddsRootDir().getPath().equals( + HddsVolumeUtil.getHddsRoot(volume))) { + return true; + } + } + return false; + } + + @Test + public void testVolumeSetInitialization() throws Exception { + + List volumesList = volumeSet.getVolumesList(); + + // VolumeSet initialization should add volume1 and volume2 to VolumeSet + assertEquals("VolumeSet intialization is incorrect", + volumesList.size(), volumes.size()); + assertTrue("VolumeSet not initailized correctly", + checkVolumeExistsInVolumeSet(volume1)); + assertTrue("VolumeSet not initailized correctly", + checkVolumeExistsInVolumeSet(volume2)); + } + + @Test + public void testAddVolume() { + + assertEquals(2, volumeSet.getVolumesList().size()); + + // Add a volume to VolumeSet + String volume3 = baseDir + "disk3"; + boolean success = volumeSet.addVolume(volume3); + + assertTrue(success); + assertEquals(3, volumeSet.getVolumesList().size()); + assertTrue("AddVolume did not add requested volume to VolumeSet", + checkVolumeExistsInVolumeSet(volume3)); + } + + @Test + public void testFailVolume() throws Exception { + + //Fail a volume + volumeSet.failVolume(volume1); + + // Failed volume should not show up in the volumeList + assertEquals(1, volumeSet.getVolumesList().size()); + + // Failed volume should be added to FailedVolumeList + assertEquals("Failed volume not present in FailedVolumeMap", + 1, volumeSet.getFailedVolumesList().size()); + assertEquals("Failed Volume list did not match", + HddsVolumeUtil.getHddsRoot(volume1), + volumeSet.getFailedVolumesList().get(0).getHddsRootDir().getPath()); + assertTrue(volumeSet.getFailedVolumesList().get(0).isFailed()); + + // Failed volume should not exist in VolumeMap + Path volume1Path = new Path(volume1); + assertFalse(volumeSet.getVolumeMap().containsKey(volume1Path)); + } + + @Test + public void testRemoveVolume() throws Exception { + + assertEquals(2, volumeSet.getVolumesList().size()); + + // Remove a volume from VolumeSet + volumeSet.removeVolume(volume1); + assertEquals(1, volumeSet.getVolumesList().size()); + + // Attempting to remove a volume which does not exist in VolumeSet should + // log a warning. + LogCapturer logs = LogCapturer.captureLogs( + LogFactory.getLog(VolumeSet.class)); + volumeSet.removeVolume(volume1); + assertEquals(1, volumeSet.getVolumesList().size()); + String expectedLogMessage = "Volume : " + + HddsVolumeUtil.getHddsRoot(volume1) + " does not exist in VolumeSet"; + assertTrue("Log output does not contain expected log message: " + + expectedLogMessage, logs.getOutput().contains(expectedLogMessage)); + } + + @Test + public void testVolumeInInconsistentState() throws Exception { + assertEquals(2, volumeSet.getVolumesList().size()); + + // Add a volume to VolumeSet + String volume3 = baseDir + "disk3"; + + // Create the root volume dir and create a sub-directory within it. + File newVolume = new File(volume3, HDDS_VOLUME_DIR); + System.out.println("new volume root: " + newVolume); + newVolume.mkdirs(); + assertTrue("Failed to create new volume root", newVolume.exists()); + File dataDir = new File(newVolume, "chunks"); + dataDir.mkdirs(); + assertTrue(dataDir.exists()); + + // The new volume is in an inconsistent state as the root dir is + // non-empty but the version file does not exist. Add Volume should + // return false. + boolean success = volumeSet.addVolume(volume3); + + assertFalse(success); + assertEquals(2, volumeSet.getVolumesList().size()); + assertTrue("AddVolume should fail for an inconsistent volume", + !checkVolumeExistsInVolumeSet(volume3)); + + // Delete volume3 + File volume = new File(volume3); + FileUtils.deleteDirectory(volume); + } + + @Test + public void testShutdown() throws Exception { + List volumesList = volumeSet.getVolumesList(); + + volumeSet.shutdown(); + + // Verify that the volumes are shutdown and the volumeUsage is set to null. + for (HddsVolume volume : volumesList) { + Assert.assertNull(volume.getVolumeInfo().getUsageForTesting()); + try { + // getAvailable() should throw null pointer exception as usage is null. + volume.getAvailable(); + fail("Volume shutdown failed."); + } catch (NullPointerException ex) { + // Do Nothing. Exception is expected. + } + } + } + + @Test + public void testFailVolumes() throws Exception{ + VolumeSet volSet = null; + File readOnlyVolumePath = new File(baseDir); + //Set to readonly, so that this volume will be failed + readOnlyVolumePath.setReadOnly(); + File volumePath = GenericTestUtils.getRandomizedTestDir(); + OzoneConfiguration ozoneConfig = new OzoneConfiguration(); + ozoneConfig.set(HDDS_DATANODE_DIR_KEY, readOnlyVolumePath.getAbsolutePath() + + "," + volumePath.getAbsolutePath()); + volSet = new VolumeSet(UUID.randomUUID().toString(), ozoneConfig); + assertTrue(volSet.getFailedVolumesList().size() == 1); + assertEquals(readOnlyVolumePath, volSet.getFailedVolumesList().get(0) + .getHddsRootDir()); + + //Set back to writable + try { + readOnlyVolumePath.setWritable(true); + } finally { + FileUtil.fullyDelete(volumePath); + } + + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestChunkManagerImpl.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestChunkManagerImpl.java new file mode 100644 index 00000000000..9664052d07e --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestChunkManagerImpl.java @@ -0,0 +1,292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.keyvalue.helpers.ChunkUtils; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.volume.VolumeIOStats; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.impl.ChunkManagerImpl; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; + +import java.io.File; +import java.util.Arrays; +import java.util.UUID; + +import static org.junit.Assert.*; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.mock; + +/** + * This class is used to test ChunkManager operations. + */ +public class TestChunkManagerImpl { + + private OzoneConfiguration config; + private String scmId = UUID.randomUUID().toString(); + private VolumeSet volumeSet; + private RoundRobinVolumeChoosingPolicy volumeChoosingPolicy; + private HddsVolume hddsVolume; + private KeyValueContainerData keyValueContainerData; + private KeyValueContainer keyValueContainer; + private BlockID blockID; + private ChunkManagerImpl chunkManager; + private ChunkInfo chunkInfo; + private byte[] data; + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + @Before + public void setUp() throws Exception { + config = new OzoneConfiguration(); + hddsVolume = new HddsVolume.Builder(folder.getRoot() + .getAbsolutePath()).conf(config).datanodeUuid(UUID.randomUUID() + .toString()).build(); + + volumeSet = mock(VolumeSet.class); + + volumeChoosingPolicy = mock(RoundRobinVolumeChoosingPolicy.class); + Mockito.when(volumeChoosingPolicy.chooseVolume(anyList(), anyLong())) + .thenReturn(hddsVolume); + + keyValueContainerData = new KeyValueContainerData(1L, + (long) StorageUnit.GB.toBytes(5)); + + keyValueContainer = new KeyValueContainer(keyValueContainerData, config); + + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + + data = "testing write chunks".getBytes(); + // Creating KeyData + blockID = new BlockID(1L, 1L); + chunkInfo = new ChunkInfo(String.format("%d.data.%d", blockID + .getLocalID(), 0), 0, data.length); + + // Create a ChunkManager object. + chunkManager = new ChunkManagerImpl(); + + } + + @Test + public void testWriteChunkStageWriteAndCommit() throws Exception { + //As in Setup, we try to create container, these paths should exist. + assertTrue(keyValueContainerData.getChunksPath() != null); + File chunksPath = new File(keyValueContainerData.getChunksPath()); + assertTrue(chunksPath.exists()); + // Initially chunks folder should be empty. + assertTrue(chunksPath.listFiles().length == 0); + + // As no chunks are written to the volume writeBytes should be 0 + checkWriteIOStats(0, 0); + chunkManager.writeChunk(keyValueContainer, blockID, chunkInfo, data, + ContainerProtos.Stage.WRITE_DATA); + // Now a chunk file is being written with Stage WRITE_DATA, so it should + // create a temporary chunk file. + assertTrue(chunksPath.listFiles().length == 1); + + File chunkFile = ChunkUtils.getChunkFile(keyValueContainerData, chunkInfo); + File tempChunkFile = new File(chunkFile.getParent(), + chunkFile.getName() + + OzoneConsts.CONTAINER_CHUNK_NAME_DELIMITER + + OzoneConsts.CONTAINER_TEMPORARY_CHUNK_PREFIX); + + // As chunk write stage is WRITE_DATA, temp chunk file will be created. + assertTrue(tempChunkFile.exists()); + + checkWriteIOStats(data.length, 1); + + chunkManager.writeChunk(keyValueContainer, blockID, chunkInfo, data, + ContainerProtos.Stage.COMMIT_DATA); + + checkWriteIOStats(data.length, 1); + + // Old temp file should have been renamed to chunk file. + assertTrue(chunksPath.listFiles().length == 1); + + // As commit happened, chunk file should exist. + assertTrue(chunkFile.exists()); + assertFalse(tempChunkFile.exists()); + + } + + @Test + public void testWriteChunkIncorrectLength() throws Exception { + try { + long randomLength = 200L; + chunkInfo = new ChunkInfo(String.format("%d.data.%d", blockID + .getLocalID(), 0), 0, randomLength); + chunkManager.writeChunk(keyValueContainer, blockID, chunkInfo, data, + ContainerProtos.Stage.WRITE_DATA); + fail("testWriteChunkIncorrectLength failed"); + } catch (StorageContainerException ex) { + // As we got an exception, writeBytes should be 0. + checkWriteIOStats(0, 0); + GenericTestUtils.assertExceptionContains("data array does not match " + + "the length ", ex); + assertEquals(ContainerProtos.Result.INVALID_WRITE_SIZE, ex.getResult()); + } + } + + @Test + public void testWriteChunkStageCombinedData() throws Exception { + //As in Setup, we try to create container, these paths should exist. + assertTrue(keyValueContainerData.getChunksPath() != null); + File chunksPath = new File(keyValueContainerData.getChunksPath()); + assertTrue(chunksPath.exists()); + // Initially chunks folder should be empty. + assertTrue(chunksPath.listFiles().length == 0); + checkWriteIOStats(0, 0); + chunkManager.writeChunk(keyValueContainer, blockID, chunkInfo, data, + ContainerProtos.Stage.COMBINED); + // Now a chunk file is being written with Stage COMBINED_DATA, so it should + // create a chunk file. + assertTrue(chunksPath.listFiles().length == 1); + File chunkFile = ChunkUtils.getChunkFile(keyValueContainerData, chunkInfo); + assertTrue(chunkFile.exists()); + checkWriteIOStats(data.length, 1); + } + + @Test + public void testReadChunk() throws Exception { + checkWriteIOStats(0, 0); + chunkManager.writeChunk(keyValueContainer, blockID, chunkInfo, data, + ContainerProtos.Stage.COMBINED); + checkWriteIOStats(data.length, 1); + checkReadIOStats(0, 0); + byte[] expectedData = chunkManager.readChunk(keyValueContainer, blockID, + chunkInfo); + assertEquals(expectedData.length, data.length); + assertTrue(Arrays.equals(expectedData, data)); + checkReadIOStats(data.length, 1); + } + + @Test + public void testDeleteChunk() throws Exception { + File chunksPath = new File(keyValueContainerData.getChunksPath()); + chunkManager.writeChunk(keyValueContainer, blockID, chunkInfo, data, + ContainerProtos.Stage.COMBINED); + assertTrue(chunksPath.listFiles().length == 1); + chunkManager.deleteChunk(keyValueContainer, blockID, chunkInfo); + assertTrue(chunksPath.listFiles().length == 0); + } + + @Test + public void testDeleteChunkUnsupportedRequest() throws Exception { + try { + chunkManager.writeChunk(keyValueContainer, blockID, chunkInfo, data, + ContainerProtos.Stage.COMBINED); + long randomLength = 200L; + chunkInfo = new ChunkInfo(String.format("%d.data.%d", blockID + .getLocalID(), 0), 0, randomLength); + chunkManager.deleteChunk(keyValueContainer, blockID, chunkInfo); + fail("testDeleteChunkUnsupportedRequest"); + } catch (StorageContainerException ex) { + GenericTestUtils.assertExceptionContains("Not Supported Operation.", ex); + assertEquals(ContainerProtos.Result.UNSUPPORTED_REQUEST, ex.getResult()); + } + } + + @Test + public void testWriteChunkChecksumMismatch() throws Exception { + try { + chunkInfo = new ChunkInfo(String.format("%d.data.%d", blockID + .getLocalID(), 0), 0, data.length); + //Setting checksum to some value. + chunkInfo.setChecksum("some garbage"); + chunkManager.writeChunk(keyValueContainer, blockID, chunkInfo, data, + ContainerProtos.Stage.COMBINED); + fail("testWriteChunkChecksumMismatch failed"); + } catch (StorageContainerException ex) { + GenericTestUtils.assertExceptionContains("Checksum mismatch.", ex); + assertEquals(ContainerProtos.Result.CHECKSUM_MISMATCH, ex.getResult()); + } + } + + @Test + public void testReadChunkFileNotExists() throws Exception { + try { + // trying to read a chunk, where chunk file does not exist + byte[] expectedData = chunkManager.readChunk(keyValueContainer, blockID, + chunkInfo); + fail("testReadChunkFileNotExists failed"); + } catch (StorageContainerException ex) { + GenericTestUtils.assertExceptionContains("Unable to find the chunk " + + "file.", ex); + assertEquals(ContainerProtos.Result.UNABLE_TO_FIND_CHUNK, ex.getResult()); + } + } + + @Test + public void testWriteAndReadChunkMultipleTimes() throws Exception { + for (int i=0; i<100; i++) { + chunkInfo = new ChunkInfo(String.format("%d.data.%d", blockID + .getLocalID(), i), 0, data.length); + chunkManager.writeChunk(keyValueContainer, blockID, chunkInfo, data, + ContainerProtos.Stage.COMBINED); + } + checkWriteIOStats(data.length*100, 100); + assertTrue(hddsVolume.getVolumeIOStats().getWriteTime() > 0); + + for (int i=0; i<100; i++) { + chunkInfo = new ChunkInfo(String.format("%d.data.%d", blockID + .getLocalID(), i), 0, data.length); + chunkManager.readChunk(keyValueContainer, blockID, chunkInfo); + } + checkReadIOStats(data.length*100, 100); + assertTrue(hddsVolume.getVolumeIOStats().getReadTime() > 0); + } + + + /** + * Check WriteIO stats. + * @param length + * @param opCount + */ + private void checkWriteIOStats(long length, long opCount) { + VolumeIOStats volumeIOStats = hddsVolume.getVolumeIOStats(); + assertEquals(length, volumeIOStats.getWriteBytes()); + assertEquals(opCount, volumeIOStats.getWriteOpCount()); + } + + /** + * Check ReadIO stats. + * @param length + * @param opCount + */ + private void checkReadIOStats(long length, long opCount) { + VolumeIOStats volumeIOStats = hddsVolume.getVolumeIOStats(); + assertEquals(length, volumeIOStats.getReadBytes()); + assertEquals(opCount, volumeIOStats.getReadOpCount()); + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyManagerImpl.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyManagerImpl.java new file mode 100644 index 00000000000..b05dbca9821 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyManagerImpl.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.KeyData; +import org.apache.hadoop.ozone.container.common.volume + .RoundRobinVolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.keyvalue.impl.KeyManagerImpl; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; +import java.util.UUID; + +import static org.junit.Assert.*; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.mock; + +/** + * This class is used to test key related operations on the container. + */ +public class TestKeyManagerImpl { + + private OzoneConfiguration config; + private String scmId = UUID.randomUUID().toString(); + private VolumeSet volumeSet; + private RoundRobinVolumeChoosingPolicy volumeChoosingPolicy; + private KeyValueContainerData keyValueContainerData; + private KeyValueContainer keyValueContainer; + private KeyData keyData; + private KeyManagerImpl keyManager; + private BlockID blockID; + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + + @Before + public void setUp() throws Exception { + config = new OzoneConfiguration(); + + HddsVolume hddsVolume = new HddsVolume.Builder(folder.getRoot() + .getAbsolutePath()).conf(config).datanodeUuid(UUID.randomUUID() + .toString()).build(); + + volumeSet = mock(VolumeSet.class); + + volumeChoosingPolicy = mock(RoundRobinVolumeChoosingPolicy.class); + Mockito.when(volumeChoosingPolicy.chooseVolume(anyList(), anyLong())) + .thenReturn(hddsVolume); + + keyValueContainerData = new KeyValueContainerData(1L, + (long) StorageUnit.GB.toBytes(5)); + + keyValueContainer = new KeyValueContainer( + keyValueContainerData, config); + + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + + // Creating KeyData + blockID = new BlockID(1L, 1L); + keyData = new KeyData(blockID); + keyData.addMetadata("VOLUME", "ozone"); + keyData.addMetadata("OWNER", "hdfs"); + List chunkList = new LinkedList<>(); + ChunkInfo info = new ChunkInfo(String.format("%d.data.%d", blockID + .getLocalID(), 0), 0, 1024); + chunkList.add(info.getProtoBufMessage()); + keyData.setChunks(chunkList); + + // Create KeyValueContainerManager + keyManager = new KeyManagerImpl(config); + + } + + @Test + public void testPutAndGetKey() throws Exception { + assertEquals(0, keyValueContainer.getContainerData().getKeyCount()); + //Put Key + keyManager.putKey(keyValueContainer, keyData); + + assertEquals(1, keyValueContainer.getContainerData().getKeyCount()); + //Get Key + KeyData fromGetKeyData = keyManager.getKey(keyValueContainer, + keyData.getBlockID()); + + assertEquals(keyData.getContainerID(), fromGetKeyData.getContainerID()); + assertEquals(keyData.getLocalID(), fromGetKeyData.getLocalID()); + assertEquals(keyData.getChunks().size(), fromGetKeyData.getChunks().size()); + assertEquals(keyData.getMetadata().size(), fromGetKeyData.getMetadata() + .size()); + + } + + + @Test + public void testDeleteKey() throws Exception { + try { + assertEquals(0, keyValueContainer.getContainerData().getKeyCount()); + //Put Key + keyManager.putKey(keyValueContainer, keyData); + assertEquals(1, keyValueContainer.getContainerData().getKeyCount()); + //Delete Key + keyManager.deleteKey(keyValueContainer, blockID); + assertEquals(0, keyValueContainer.getContainerData().getKeyCount()); + try { + keyManager.getKey(keyValueContainer, blockID); + fail("testDeleteKey"); + } catch (StorageContainerException ex) { + GenericTestUtils.assertExceptionContains("Unable to find the key", ex); + } + } catch (IOException ex) { + fail("testDeleteKey failed"); + } + } + + @Test + public void testListKey() throws Exception { + try { + keyManager.putKey(keyValueContainer, keyData); + List listKeyData = keyManager.listKey( + keyValueContainer, 1, 10); + assertNotNull(listKeyData); + assertTrue(listKeyData.size() == 1); + + for (long i = 2; i <= 10; i++) { + blockID = new BlockID(1L, i); + keyData = new KeyData(blockID); + keyData.addMetadata("VOLUME", "ozone"); + keyData.addMetadata("OWNER", "hdfs"); + List chunkList = new LinkedList<>(); + ChunkInfo info = new ChunkInfo(String.format("%d.data.%d", blockID + .getLocalID(), 0), 0, 1024); + chunkList.add(info.getProtoBufMessage()); + keyData.setChunks(chunkList); + keyManager.putKey(keyValueContainer, keyData); + } + + listKeyData = keyManager.listKey( + keyValueContainer, 1, 10); + assertNotNull(listKeyData); + assertTrue(listKeyData.size() == 10); + + } catch (IOException ex) { + fail("testListKey failed"); + } + } + + @Test + public void testGetNoSuchKey() throws Exception { + try { + keyData = new KeyData(new BlockID(1L, 2L)); + keyManager.getKey(keyValueContainer, new BlockID(1L, 2L)); + fail("testGetNoSuchKey failed"); + } catch (StorageContainerException ex) { + GenericTestUtils.assertExceptionContains("Unable to find the key.", ex); + assertEquals(ContainerProtos.Result.NO_SUCH_KEY, ex.getResult()); + } + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueBlockIterator.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueBlockIterator.java new file mode 100644 index 00000000000..f1fe88ecd11 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueBlockIterator.java @@ -0,0 +1,277 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + +import com.google.common.primitives.Longs; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.KeyData; +import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyUtils; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.utils.MetadataKeyFilters; +import org.apache.hadoop.utils.MetadataStore; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.Arrays; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.UUID; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_IMPL; +import static org.apache.hadoop.ozone.OzoneConfigKeys + .OZONE_METADATA_STORE_IMPL_LEVELDB; +import static org.apache.hadoop.ozone.OzoneConfigKeys + .OZONE_METADATA_STORE_IMPL_ROCKSDB; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * This class is used to test KeyValue container block iterator. + */ +@RunWith(Parameterized.class) +public class TestKeyValueBlockIterator { + + private KeyValueContainer container; + private KeyValueContainerData containerData; + private VolumeSet volumeSet; + private Configuration conf; + private File testRoot; + + private final String storeImpl; + + public TestKeyValueBlockIterator(String metadataImpl) { + this.storeImpl = metadataImpl; + } + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {OZONE_METADATA_STORE_IMPL_LEVELDB}, + {OZONE_METADATA_STORE_IMPL_ROCKSDB}}); + } + + @Before + public void setUp() throws Exception { + testRoot = GenericTestUtils.getRandomizedTestDir(); + conf = new OzoneConfiguration(); + conf.set(HDDS_DATANODE_DIR_KEY, testRoot.getAbsolutePath()); + conf.set(OZONE_METADATA_STORE_IMPL, storeImpl); + volumeSet = new VolumeSet(UUID.randomUUID().toString(), conf); + } + + + @After + public void tearDown() { + volumeSet.shutdown(); + FileUtil.fullyDelete(testRoot); + } + + @Test + public void testKeyValueBlockIteratorWithMixedBlocks() throws Exception { + + long containerID = 100L; + int deletedBlocks = 5; + int normalBlocks = 5; + createContainerWithBlocks(containerID, normalBlocks, deletedBlocks); + String containerPath = new File(containerData.getMetadataPath()) + .getParent(); + KeyValueBlockIterator keyValueBlockIterator = new KeyValueBlockIterator( + containerID, new File(containerPath)); + + int counter = 0; + while(keyValueBlockIterator.hasNext()) { + KeyData keyData = keyValueBlockIterator.nextBlock(); + assertEquals(keyData.getLocalID(), counter++); + } + + assertFalse(keyValueBlockIterator.hasNext()); + + keyValueBlockIterator.seekToFirst(); + counter = 0; + while(keyValueBlockIterator.hasNext()) { + KeyData keyData = keyValueBlockIterator.nextBlock(); + assertEquals(keyData.getLocalID(), counter++); + } + assertFalse(keyValueBlockIterator.hasNext()); + + try { + keyValueBlockIterator.nextBlock(); + } catch (NoSuchElementException ex) { + GenericTestUtils.assertExceptionContains("Block Iterator reached end " + + "for ContainerID " + containerID, ex); + } + } + + @Test + public void testKeyValueBlockIteratorWithNextBlock() throws Exception { + long containerID = 101L; + createContainerWithBlocks(containerID, 2, 0); + String containerPath = new File(containerData.getMetadataPath()) + .getParent(); + KeyValueBlockIterator keyValueBlockIterator = new KeyValueBlockIterator( + containerID, new File(containerPath)); + long blockID = 0L; + assertEquals(blockID++, keyValueBlockIterator.nextBlock().getLocalID()); + assertEquals(blockID, keyValueBlockIterator.nextBlock().getLocalID()); + + try { + keyValueBlockIterator.nextBlock(); + } catch (NoSuchElementException ex) { + GenericTestUtils.assertExceptionContains("Block Iterator reached end " + + "for ContainerID " + containerID, ex); + } + } + + @Test + public void testKeyValueBlockIteratorWithHasNext() throws Exception { + long containerID = 102L; + createContainerWithBlocks(containerID, 2, 0); + String containerPath = new File(containerData.getMetadataPath()) + .getParent(); + KeyValueBlockIterator keyValueBlockIterator = new KeyValueBlockIterator( + containerID, new File(containerPath)); + long blockID = 0L; + + // Even calling multiple times hasNext() should not move entry forward. + assertTrue(keyValueBlockIterator.hasNext()); + assertTrue(keyValueBlockIterator.hasNext()); + assertTrue(keyValueBlockIterator.hasNext()); + assertTrue(keyValueBlockIterator.hasNext()); + assertTrue(keyValueBlockIterator.hasNext()); + assertEquals(blockID++, keyValueBlockIterator.nextBlock().getLocalID()); + + assertTrue(keyValueBlockIterator.hasNext()); + assertTrue(keyValueBlockIterator.hasNext()); + assertTrue(keyValueBlockIterator.hasNext()); + assertTrue(keyValueBlockIterator.hasNext()); + assertTrue(keyValueBlockIterator.hasNext()); + assertEquals(blockID, keyValueBlockIterator.nextBlock().getLocalID()); + + keyValueBlockIterator.seekToLast(); + assertTrue(keyValueBlockIterator.hasNext()); + assertEquals(blockID, keyValueBlockIterator.nextBlock().getLocalID()); + + keyValueBlockIterator.seekToFirst(); + blockID = 0L; + assertEquals(blockID++, keyValueBlockIterator.nextBlock().getLocalID()); + assertEquals(blockID, keyValueBlockIterator.nextBlock().getLocalID()); + + try { + keyValueBlockIterator.nextBlock(); + } catch (NoSuchElementException ex) { + GenericTestUtils.assertExceptionContains("Block Iterator reached end " + + "for ContainerID " + containerID, ex); + } + + + } + + @Test + public void testKeyValueBlockIteratorWithFilter() throws Exception { + long containerId = 103L; + int deletedBlocks = 5; + int normalBlocks = 5; + createContainerWithBlocks(containerId, normalBlocks, deletedBlocks); + String containerPath = new File(containerData.getMetadataPath()) + .getParent(); + KeyValueBlockIterator keyValueBlockIterator = new KeyValueBlockIterator( + containerId, new File(containerPath), MetadataKeyFilters + .getDeletingKeyFilter()); + + int counter = 5; + while(keyValueBlockIterator.hasNext()) { + KeyData keyData = keyValueBlockIterator.nextBlock(); + assertEquals(keyData.getLocalID(), counter++); + } + } + + @Test + public void testKeyValueBlockIteratorWithOnlyDeletedBlocks() throws + Exception { + long containerId = 104L; + createContainerWithBlocks(containerId, 0, 5); + String containerPath = new File(containerData.getMetadataPath()) + .getParent(); + KeyValueBlockIterator keyValueBlockIterator = new KeyValueBlockIterator( + containerId, new File(containerPath)); + //As all blocks are deleted blocks, blocks does not match with normal key + // filter. + assertFalse(keyValueBlockIterator.hasNext()); + } + + /** + * Creates a container with specified number of normal blocks and deleted + * blocks. First it will insert normal blocks, and then it will insert + * deleted blocks. + * @param containerId + * @param normalBlocks + * @param deletedBlocks + * @throws Exception + */ + private void createContainerWithBlocks(long containerId, int + normalBlocks, int deletedBlocks) throws + Exception { + containerData = new KeyValueContainerData(containerId, + (long) StorageUnit.GB.toBytes(1)); + container = new KeyValueContainer(containerData, conf); + container.create(volumeSet, new RoundRobinVolumeChoosingPolicy(), UUID + .randomUUID().toString()); + MetadataStore metadataStore = KeyUtils.getDB(containerData, conf); + + List chunkList = new LinkedList<>(); + ChunkInfo info = new ChunkInfo("chunkfile", 0, 1024); + chunkList.add(info.getProtoBufMessage()); + + for (int i=0; i chunkList = new LinkedList<>(); + ChunkInfo info = new ChunkInfo(String.format("%d.data.%d", blockID + .getLocalID(), 0), 0, 1024); + chunkList.add(info.getProtoBufMessage()); + keyData.setChunks(chunkList); + metadataStore.put(Longs.toByteArray(blockID.getLocalID()), keyData + .getProtoBufMessage().toByteArray()); + } + + } + + @SuppressWarnings("RedundantCast") + @Test + public void testCreateContainer() throws Exception { + + // Create Container. + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + + keyValueContainerData = keyValueContainer + .getContainerData(); + + String containerMetaDataPath = keyValueContainerData + .getMetadataPath(); + String chunksPath = keyValueContainerData.getChunksPath(); + + // Check whether containerMetaDataPath and chunksPath exists or not. + assertTrue(containerMetaDataPath != null); + assertTrue(chunksPath != null); + File containerMetaDataLoc = new File(containerMetaDataPath); + + //Check whether container file and container db file exists or not. + assertTrue(keyValueContainer.getContainerFile().exists(), + ".Container File does not exist"); + assertTrue(keyValueContainer.getContainerDBFile().exists(), "Container " + + "DB does not exist"); + } + + @Test + public void testContainerImportExport() throws Exception { + + long containerId = keyValueContainer.getContainerData().getContainerID(); + // Create Container. + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + + + keyValueContainerData = keyValueContainer + .getContainerData(); + + keyValueContainerData.setState(ContainerLifeCycleState.CLOSED); + + int numberOfKeysToWrite = 12; + //write one few keys to check the key count after import + MetadataStore metadataStore = KeyUtils.getDB(keyValueContainerData, conf); + for (int i = 0; i < numberOfKeysToWrite; i++) { + metadataStore.put(("test" + i).getBytes(), "test".getBytes()); + } + metadataStore.close(); + + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + keyValueContainer.update(metadata, true); + + //destination path + File folderToExport = folder.newFile("exported.tar.gz"); + + TarContainerPacker packer = new TarContainerPacker(); + + //export the container + try (FileOutputStream fos = new FileOutputStream(folderToExport)) { + keyValueContainer + .exportContainerData(fos, packer); + } + + //delete the original one + keyValueContainer.delete(true); + + //create a new one + KeyValueContainerData containerData = + new KeyValueContainerData(containerId, 1, + keyValueContainerData.getMaxSize()); + KeyValueContainer container = new KeyValueContainer(containerData, conf); + + HddsVolume containerVolume = volumeChoosingPolicy.chooseVolume(volumeSet + .getVolumesList(), 1); + String hddsVolumeDir = containerVolume.getHddsRootDir().toString(); + + container.populatePathFields(scmId, containerVolume, hddsVolumeDir); + try (FileInputStream fis = new FileInputStream(folderToExport)) { + container.importContainerData(fis, packer); + } + + Assert.assertEquals("value1", containerData.getMetadata().get("key1")); + Assert.assertEquals(keyValueContainerData.getContainerDBType(), + containerData.getContainerDBType()); + Assert.assertEquals(keyValueContainerData.getState(), + containerData.getState()); + Assert.assertEquals(numberOfKeysToWrite, + containerData.getKeyCount()); + Assert.assertEquals(keyValueContainerData.getLayOutVersion(), + containerData.getLayOutVersion()); + Assert.assertEquals(keyValueContainerData.getMaxSize(), + containerData.getMaxSize()); + Assert.assertEquals(keyValueContainerData.getBytesUsed(), + containerData.getBytesUsed()); + + //Can't overwrite existing container + try { + try (FileInputStream fis = new FileInputStream(folderToExport)) { + container.importContainerData(fis, packer); + } + fail("Container is imported twice. Previous files are overwritten"); + } catch (Exception ex) { + //all good + } + + } + + @Test + public void testDuplicateContainer() throws Exception { + try { + // Create Container. + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + fail("testDuplicateContainer failed"); + } catch (StorageContainerException ex) { + GenericTestUtils.assertExceptionContains("ContainerFile already " + + "exists", ex); + assertEquals(ContainerProtos.Result.CONTAINER_ALREADY_EXISTS, ex + .getResult()); + } + } + + @Test + public void testDiskFullExceptionCreateContainer() throws Exception { + + Mockito.when(volumeChoosingPolicy.chooseVolume(anyList(), anyLong())) + .thenThrow(DiskChecker.DiskOutOfSpaceException.class); + try { + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + fail("testDiskFullExceptionCreateContainer failed"); + } catch (StorageContainerException ex) { + GenericTestUtils.assertExceptionContains("disk out of space", + ex); + assertEquals(ContainerProtos.Result.DISK_OUT_OF_SPACE, ex.getResult()); + } + } + + @Test + public void testDeleteContainer() throws Exception { + keyValueContainerData.setState(ContainerProtos.ContainerLifeCycleState + .CLOSED); + keyValueContainer = new KeyValueContainer( + keyValueContainerData, conf); + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + keyValueContainer.delete(true); + + String containerMetaDataPath = keyValueContainerData + .getMetadataPath(); + File containerMetaDataLoc = new File(containerMetaDataPath); + + assertFalse("Container directory still exists", containerMetaDataLoc + .getParentFile().exists()); + + assertFalse("Container File still exists", + keyValueContainer.getContainerFile().exists()); + assertFalse("Container DB file still exists", + keyValueContainer.getContainerDBFile().exists()); + } + + + @Test + public void testCloseContainer() throws Exception { + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + keyValueContainer.close(); + + keyValueContainerData = keyValueContainer + .getContainerData(); + + assertEquals(ContainerProtos.ContainerLifeCycleState.CLOSED, + keyValueContainerData.getState()); + + //Check state in the .container file + String containerMetaDataPath = keyValueContainerData + .getMetadataPath(); + File containerFile = keyValueContainer.getContainerFile(); + + keyValueContainerData = (KeyValueContainerData) ContainerDataYaml + .readContainerFile(containerFile); + assertEquals(ContainerProtos.ContainerLifeCycleState.CLOSED, + keyValueContainerData.getState()); + } + + @Test + public void testUpdateContainer() throws IOException { + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + Map metadata = new HashMap<>(); + metadata.put("VOLUME", "ozone"); + metadata.put("OWNER", "hdfs"); + keyValueContainer.update(metadata, true); + + keyValueContainerData = keyValueContainer + .getContainerData(); + + assertEquals(2, keyValueContainerData.getMetadata().size()); + + //Check metadata in the .container file + File containerFile = keyValueContainer.getContainerFile(); + + keyValueContainerData = (KeyValueContainerData) ContainerDataYaml + .readContainerFile(containerFile); + assertEquals(2, keyValueContainerData.getMetadata().size()); + + } + + @Test + public void testUpdateContainerUnsupportedRequest() throws Exception { + try { + keyValueContainerData.setState(ContainerProtos.ContainerLifeCycleState + .CLOSED); + keyValueContainer = new KeyValueContainer(keyValueContainerData, conf); + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + Map metadata = new HashMap<>(); + metadata.put("VOLUME", "ozone"); + keyValueContainer.update(metadata, false); + fail("testUpdateContainerUnsupportedRequest failed"); + } catch (StorageContainerException ex) { + GenericTestUtils.assertExceptionContains("Updating a closed container " + + "without force option is not allowed", ex); + assertEquals(ContainerProtos.Result.UNSUPPORTED_REQUEST, ex + .getResult()); + } + } + + +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java new file mode 100644 index 00000000000..d91bbf74781 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -0,0 +1,292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerCommandRequestProto; +import org.apache.hadoop.hdds.scm.container.common.helpers + .StorageContainerException; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestRule; +import org.junit.rules.Timeout; + +import org.mockito.Mockito; + +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_DATANODE_VOLUME_CHOOSING_POLICY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doCallRealMethod; +import static org.mockito.Mockito.times; + + +import java.io.File; +import java.util.UUID; + +/** + * Unit tests for {@link KeyValueHandler}. + */ +public class TestKeyValueHandler { + + @Rule + public TestRule timeout = new Timeout(300000); + + private static HddsDispatcher dispatcher; + private static KeyValueHandler handler; + + private final static String DATANODE_UUID = UUID.randomUUID().toString(); + + private final String baseDir = MiniDFSCluster.getBaseDirectory(); + private final String volume = baseDir + "disk1"; + + private static final long DUMMY_CONTAINER_ID = 9999; + + @BeforeClass + public static void setup() throws StorageContainerException { + // Create mock HddsDispatcher and KeyValueHandler. + handler = Mockito.mock(KeyValueHandler.class); + dispatcher = Mockito.mock(HddsDispatcher.class); + Mockito.when(dispatcher.getHandler(any())).thenReturn(handler); + Mockito.when(dispatcher.dispatch(any())).thenCallRealMethod(); + Mockito.when(dispatcher.getContainer(anyLong())).thenReturn( + Mockito.mock(KeyValueContainer.class)); + Mockito.when(handler.handle(any(), any())).thenCallRealMethod(); + doCallRealMethod().when(dispatcher).setMetricsForTesting(any()); + dispatcher.setMetricsForTesting(Mockito.mock(ContainerMetrics.class)); + } + + @Test + /** + * Test that Handler handles different command types correctly. + */ + public void testHandlerCommandHandling() throws Exception { + + // Test Create Container Request handling + ContainerCommandRequestProto createContainerRequest = + ContainerProtos.ContainerCommandRequestProto.newBuilder() + .setCmdType(ContainerProtos.Type.CreateContainer) + .setContainerID(DUMMY_CONTAINER_ID) + .setDatanodeUuid(DATANODE_UUID) + .setCreateContainer(ContainerProtos.CreateContainerRequestProto + .getDefaultInstance()) + .build(); + dispatcher.dispatch(createContainerRequest); + Mockito.verify(handler, times(1)).handleCreateContainer( + any(ContainerCommandRequestProto.class), any()); + + // Test Read Container Request handling + ContainerCommandRequestProto readContainerRequest = + getDummyCommandRequestProto(ContainerProtos.Type.ReadContainer); + dispatcher.dispatch(readContainerRequest); + Mockito.verify(handler, times(1)).handleReadContainer( + any(ContainerCommandRequestProto.class), any()); + + // Test Update Container Request handling + ContainerCommandRequestProto updateContainerRequest = + getDummyCommandRequestProto(ContainerProtos.Type.UpdateContainer); + dispatcher.dispatch(updateContainerRequest); + Mockito.verify(handler, times(1)).handleUpdateContainer( + any(ContainerCommandRequestProto.class), any()); + + // Test Delete Container Request handling + ContainerCommandRequestProto deleteContainerRequest = + getDummyCommandRequestProto(ContainerProtos.Type.DeleteContainer); + dispatcher.dispatch(deleteContainerRequest); + Mockito.verify(handler, times(1)).handleDeleteContainer( + any(ContainerCommandRequestProto.class), any()); + + // Test List Container Request handling + ContainerCommandRequestProto listContainerRequest = + getDummyCommandRequestProto(ContainerProtos.Type.ListContainer); + dispatcher.dispatch(listContainerRequest); + Mockito.verify(handler, times(1)).handleUnsupportedOp( + any(ContainerCommandRequestProto.class)); + + // Test Close Container Request handling + ContainerCommandRequestProto closeContainerRequest = + getDummyCommandRequestProto(ContainerProtos.Type.CloseContainer); + dispatcher.dispatch(closeContainerRequest); + Mockito.verify(handler, times(1)).handleCloseContainer( + any(ContainerCommandRequestProto.class), any()); + + // Test Put Key Request handling + ContainerCommandRequestProto putKeyRequest = + getDummyCommandRequestProto(ContainerProtos.Type.PutKey); + dispatcher.dispatch(putKeyRequest); + Mockito.verify(handler, times(1)).handlePutKey( + any(ContainerCommandRequestProto.class), any()); + + // Test Get Key Request handling + ContainerCommandRequestProto getKeyRequest = + getDummyCommandRequestProto(ContainerProtos.Type.GetKey); + dispatcher.dispatch(getKeyRequest); + Mockito.verify(handler, times(1)).handleGetKey( + any(ContainerCommandRequestProto.class), any()); + + // Test Delete Key Request handling + ContainerCommandRequestProto deleteKeyRequest = + getDummyCommandRequestProto(ContainerProtos.Type.DeleteKey); + dispatcher.dispatch(deleteKeyRequest); + Mockito.verify(handler, times(1)).handleDeleteKey( + any(ContainerCommandRequestProto.class), any()); + + // Test List Key Request handling + ContainerCommandRequestProto listKeyRequest = + getDummyCommandRequestProto(ContainerProtos.Type.ListKey); + dispatcher.dispatch(listKeyRequest); + Mockito.verify(handler, times(2)).handleUnsupportedOp( + any(ContainerCommandRequestProto.class)); + + // Test Read Chunk Request handling + ContainerCommandRequestProto readChunkRequest = + getDummyCommandRequestProto(ContainerProtos.Type.ReadChunk); + dispatcher.dispatch(readChunkRequest); + Mockito.verify(handler, times(1)).handleReadChunk( + any(ContainerCommandRequestProto.class), any()); + + // Test Delete Chunk Request handling + ContainerCommandRequestProto deleteChunkRequest = + getDummyCommandRequestProto(ContainerProtos.Type.DeleteChunk); + dispatcher.dispatch(deleteChunkRequest); + Mockito.verify(handler, times(1)).handleDeleteChunk( + any(ContainerCommandRequestProto.class), any()); + + // Test Write Chunk Request handling + ContainerCommandRequestProto writeChunkRequest = + getDummyCommandRequestProto(ContainerProtos.Type.WriteChunk); + dispatcher.dispatch(writeChunkRequest); + Mockito.verify(handler, times(1)).handleWriteChunk( + any(ContainerCommandRequestProto.class), any()); + + // Test List Chunk Request handling + ContainerCommandRequestProto listChunkRequest = + getDummyCommandRequestProto(ContainerProtos.Type.ListChunk); + dispatcher.dispatch(listChunkRequest); + Mockito.verify(handler, times(3)).handleUnsupportedOp( + any(ContainerCommandRequestProto.class)); + + // Test Put Small File Request handling + ContainerCommandRequestProto putSmallFileRequest = + getDummyCommandRequestProto(ContainerProtos.Type.PutSmallFile); + dispatcher.dispatch(putSmallFileRequest); + Mockito.verify(handler, times(1)).handlePutSmallFile( + any(ContainerCommandRequestProto.class), any()); + + // Test Get Small File Request handling + ContainerCommandRequestProto getSmallFileRequest = + getDummyCommandRequestProto(ContainerProtos.Type.GetSmallFile); + dispatcher.dispatch(getSmallFileRequest); + Mockito.verify(handler, times(1)).handleGetSmallFile( + any(ContainerCommandRequestProto.class), any()); + } + + @Test + public void testVolumeSetInKeyValueHandler() throws Exception{ + File path = GenericTestUtils.getRandomizedTestDir(); + try { + Configuration conf = new OzoneConfiguration(); + conf.set(HDDS_DATANODE_DIR_KEY, path.getAbsolutePath()); + ContainerSet cset = new ContainerSet(); + int[] interval = new int[1]; + interval[0] = 2; + ContainerMetrics metrics = new ContainerMetrics(interval); + VolumeSet volumeSet = new VolumeSet(UUID.randomUUID().toString(), conf); + KeyValueHandler keyValueHandler = new KeyValueHandler(conf, cset, + volumeSet, metrics); + assertEquals(keyValueHandler.getVolumeChoosingPolicyForTesting() + .getClass().getName(), "org.apache.hadoop.ozone.container.common" + + ".volume.RoundRobinVolumeChoosingPolicy"); + + //Set a class which is not of sub class of VolumeChoosingPolicy + conf.set(HDDS_DATANODE_VOLUME_CHOOSING_POLICY, + "org.apache.hadoop.ozone.container.common.impl.HddsDispatcher"); + try { + new KeyValueHandler(conf, cset, volumeSet, metrics); + } catch (RuntimeException ex) { + GenericTestUtils.assertExceptionContains("class org.apache.hadoop" + + ".ozone.container.common.impl.HddsDispatcher not org.apache" + + ".hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy", + ex); + } + } finally { + FileUtil.fullyDelete(path); + } + } + + private ContainerCommandRequestProto getDummyCommandRequestProto( + ContainerProtos.Type cmdType) { + ContainerCommandRequestProto request = + ContainerProtos.ContainerCommandRequestProto.newBuilder() + .setCmdType(cmdType) + .setContainerID(DUMMY_CONTAINER_ID) + .setDatanodeUuid(DATANODE_UUID) + .build(); + + return request; + } + + + @Test + public void testCloseInvalidContainer() { + long containerID = 1234L; + Configuration conf = new Configuration(); + KeyValueContainerData kvData = new KeyValueContainerData(containerID, + (long) StorageUnit.GB.toBytes(1)); + KeyValueContainer container = new KeyValueContainer(kvData, conf); + kvData.setState(ContainerProtos.ContainerLifeCycleState.INVALID); + + // Create Close container request + ContainerCommandRequestProto closeContainerRequest = + ContainerProtos.ContainerCommandRequestProto.newBuilder() + .setCmdType(ContainerProtos.Type.CloseContainer) + .setContainerID(DUMMY_CONTAINER_ID) + .setDatanodeUuid(DATANODE_UUID) + .setCloseContainer(ContainerProtos.CloseContainerRequestProto + .getDefaultInstance()) + .build(); + dispatcher.dispatch(closeContainerRequest); + + Mockito.when(handler.handleCloseContainer(any(), any())) + .thenCallRealMethod(); + // Closing invalid container should return error response. + ContainerProtos.ContainerCommandResponseProto response = + handler.handleCloseContainer(closeContainerRequest, container); + + Assert.assertTrue("Close container should return Invalid container error", + response.getResult().equals( + ContainerProtos.Result.INVALID_CONTAINER_STATE)); + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java new file mode 100644 index 00000000000..a599f721696 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.keyvalue; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.container.common.interfaces.ContainerPacker; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.CompressorException; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Test the tar/untar for a given container. + */ +public class TestTarContainerPacker { + + private static final String TEST_DB_FILE_NAME = "test1"; + + private static final String TEST_DB_FILE_CONTENT = "test1"; + + private static final String TEST_CHUNK_FILE_NAME = "chunk1"; + + private static final String TEST_CHUNK_FILE_CONTENT = "This is a chunk"; + + private static final String TEST_DESCRIPTOR_FILE_CONTENT = "descriptor"; + + private ContainerPacker packer = new TarContainerPacker(); + + private static final Path SOURCE_CONTAINER_ROOT = + Paths.get("target/test/data/packer-source-dir"); + + private static final Path DEST_CONTAINER_ROOT = + Paths.get("target/test/data/packer-dest-dir"); + + @BeforeClass + public static void init() throws IOException { + initDir(SOURCE_CONTAINER_ROOT); + initDir(DEST_CONTAINER_ROOT); + } + + private static void initDir(Path path) throws IOException { + if (path.toFile().exists()) { + FileUtils.deleteDirectory(path.toFile()); + } + path.toFile().mkdirs(); + } + + private KeyValueContainerData createContainer(long id, Path dir, + OzoneConfiguration conf) throws IOException { + + Path containerDir = dir.resolve("container" + id); + Path dbDir = containerDir.resolve("db"); + Path dataDir = containerDir.resolve("data"); + Files.createDirectories(dbDir); + Files.createDirectories(dataDir); + + KeyValueContainerData containerData = new KeyValueContainerData(id, -1); + containerData.setChunksPath(dataDir.toString()); + containerData.setMetadataPath(dbDir.getParent().toString()); + containerData.setDbFile(dbDir.toFile()); + + + return containerData; + } + + @Test + public void pack() throws IOException, CompressorException { + + //GIVEN + OzoneConfiguration conf = new OzoneConfiguration(); + + KeyValueContainerData sourceContainerData = + createContainer(1L, SOURCE_CONTAINER_ROOT, conf); + + KeyValueContainer sourceContainer = + new KeyValueContainer(sourceContainerData, conf); + + //sample db file in the metadata directory + try (FileWriter writer = new FileWriter( + sourceContainerData.getDbFile().toPath() + .resolve(TEST_DB_FILE_NAME) + .toFile())) { + IOUtils.write(TEST_DB_FILE_CONTENT, writer); + } + + //sample chunk file in the chunk directory + try (FileWriter writer = new FileWriter( + Paths.get(sourceContainerData.getChunksPath()) + .resolve(TEST_CHUNK_FILE_NAME) + .toFile())) { + IOUtils.write(TEST_CHUNK_FILE_CONTENT, writer); + } + + //sample container descriptor file + try (FileWriter writer = new FileWriter( + sourceContainer.getContainerFile())) { + IOUtils.write(TEST_DESCRIPTOR_FILE_CONTENT, writer); + } + + Path targetFile = + SOURCE_CONTAINER_ROOT.getParent().resolve("container.tar.gz"); + + //WHEN: pack it + try (FileOutputStream output = new FileOutputStream(targetFile.toFile())) { + packer.pack(sourceContainer, output); + } + + //THEN: check the result + try (FileInputStream input = new FileInputStream(targetFile.toFile())) { + CompressorInputStream uncompressed = new CompressorStreamFactory() + .createCompressorInputStream(CompressorStreamFactory.GZIP, input); + TarArchiveInputStream tarStream = new TarArchiveInputStream(uncompressed); + + TarArchiveEntry entry; + Map entries = new HashMap<>(); + while ((entry = tarStream.getNextTarEntry()) != null) { + entries.put(entry.getName(), entry); + } + + Assert.assertTrue( + entries.containsKey("container.yaml")); + + } + + //read the container descriptor only + try (FileInputStream input = new FileInputStream(targetFile.toFile())) { + String containerYaml = new String(packer.unpackContainerDescriptor(input), + Charset.forName(StandardCharsets.UTF_8.name())); + Assert.assertEquals(TEST_DESCRIPTOR_FILE_CONTENT, containerYaml); + } + + KeyValueContainerData destinationContainerData = + createContainer(2L, DEST_CONTAINER_ROOT, conf); + + KeyValueContainer destinationContainer = + new KeyValueContainer(destinationContainerData, conf); + + String descriptor = ""; + + //unpackContainerData + try (FileInputStream input = new FileInputStream(targetFile.toFile())) { + descriptor = + new String(packer.unpackContainerData(destinationContainer, input), + Charset.forName(StandardCharsets.UTF_8.name())); + } + + assertExampleMetadataDbIsGood( + destinationContainerData.getDbFile().toPath()); + assertExampleChunkFileIsGood( + Paths.get(destinationContainerData.getChunksPath())); + Assert.assertFalse( + "Descriptor file should not been exctarcted by the " + + "unpackContainerData Call", + destinationContainer.getContainerFile().exists()); + Assert.assertEquals(TEST_DESCRIPTOR_FILE_CONTENT, descriptor); + + } + + + private void assertExampleMetadataDbIsGood(Path dbPath) + throws IOException { + + Path dbFile = dbPath.resolve(TEST_DB_FILE_NAME); + + Assert.assertTrue( + "example DB file is missing after pack/unpackContainerData: " + dbFile, + Files.exists(dbFile)); + + try (FileInputStream testFile = new FileInputStream(dbFile.toFile())) { + List strings = IOUtils + .readLines(testFile, Charset.forName(StandardCharsets.UTF_8.name())); + Assert.assertEquals(1, strings.size()); + Assert.assertEquals(TEST_DB_FILE_CONTENT, strings.get(0)); + } + } + + private void assertExampleChunkFileIsGood(Path chunkDirPath) + throws IOException { + + Path chunkFile = chunkDirPath.resolve(TEST_CHUNK_FILE_NAME); + + Assert.assertTrue( + "example chunk file is missing after pack/unpackContainerData: " + + chunkFile, + Files.exists(chunkFile)); + + try (FileInputStream testFile = new FileInputStream(chunkFile.toFile())) { + List strings = IOUtils + .readLines(testFile, Charset.forName(StandardCharsets.UTF_8.name())); + Assert.assertEquals(1, strings.size()); + Assert.assertEquals(TEST_CHUNK_FILE_CONTENT, strings.get(0)); + } + } + +} \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java new file mode 100644 index 00000000000..fea126b5da8 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.ozoneimpl; + + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import java.util.Random; +import java.util.UUID; + + +import static org.junit.Assert.assertEquals; + +/** + * This class is used to test OzoneContainer. + */ +public class TestOzoneContainer { + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + + private OzoneConfiguration conf; + private String scmId = UUID.randomUUID().toString(); + private VolumeSet volumeSet; + private RoundRobinVolumeChoosingPolicy volumeChoosingPolicy; + private KeyValueContainerData keyValueContainerData; + private KeyValueContainer keyValueContainer; + private final DatanodeDetails datanodeDetails = createDatanodeDetails(); + + @Before + public void setUp() throws Exception { + conf = new OzoneConfiguration(); + conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY, folder.getRoot() + .getAbsolutePath()); + conf.set(OzoneConfigKeys.OZONE_METADATA_DIRS, + folder.newFolder().getAbsolutePath()); + } + + @Test + public void testBuildContainerMap() throws Exception { + volumeSet = new VolumeSet(datanodeDetails.getUuidString(), conf); + volumeChoosingPolicy = new RoundRobinVolumeChoosingPolicy(); + + // Format the volumes + for (HddsVolume volume : volumeSet.getVolumesList()) { + volume.format(UUID.randomUUID().toString()); + } + + // Add containers to disk + for (int i=0; i<10; i++) { + keyValueContainerData = new KeyValueContainerData(i, + (long) StorageUnit.GB.toBytes(1)); + keyValueContainer = new KeyValueContainer( + keyValueContainerData, conf); + keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId); + } + + // When OzoneContainer is started, the containers from disk should be + // loaded into the containerSet. + OzoneContainer ozoneContainer = new + OzoneContainer(datanodeDetails, conf, null); + ContainerSet containerset = ozoneContainer.getContainerSet(); + assertEquals(10, containerset.containerCount()); + } + + + private DatanodeDetails createDatanodeDetails() { + Random random = new Random(); + String ipAddress = + random.nextInt(256) + "." + random.nextInt(256) + "." + random + .nextInt(256) + "." + random.nextInt(256); + + String uuid = UUID.randomUUID().toString(); + String hostName = uuid; + DatanodeDetails.Port containerPort = DatanodeDetails.newPort( + DatanodeDetails.Port.Name.STANDALONE, 0); + DatanodeDetails.Port ratisPort = DatanodeDetails.newPort( + DatanodeDetails.Port.Name.RATIS, 0); + DatanodeDetails.Port restPort = DatanodeDetails.newPort( + DatanodeDetails.Port.Name.REST, 0); + DatanodeDetails.Builder builder = DatanodeDetails.newBuilder(); + builder.setUuid(uuid) + .setHostName("localhost") + .setIpAddress(ipAddress) + .addPort(containerPort) + .addPort(ratisPort) + .addPort(restPort); + return builder.build(); + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/testutils/BlockDeletingServiceTestImpl.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/testutils/BlockDeletingServiceTestImpl.java index 7c129457fd2..115b5e2cf84 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/testutils/BlockDeletingServiceTestImpl.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/testutils/BlockDeletingServiceTestImpl.java @@ -19,8 +19,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerManager; -import org.apache.hadoop.ozone.container.common.statemachine.background +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.keyvalue.statemachine.background .BlockDeletingService; import java.util.concurrent.CountDownLatch; @@ -42,9 +42,9 @@ private Thread testingThread; private AtomicInteger numOfProcessed = new AtomicInteger(0); - public BlockDeletingServiceTestImpl(ContainerManager containerManager, + public BlockDeletingServiceTestImpl(ContainerSet containerSet, int serviceInterval, Configuration conf) { - super(containerManager, serviceInterval, SERVICE_TIMEOUT_IN_MILLISECONDS, + super(containerSet, serviceInterval, SERVICE_TIMEOUT_IN_MILLISECONDS, TimeUnit.MILLISECONDS, conf); } diff --git a/hadoop-hdds/container-service/src/test/resources/additionalfields.container b/hadoop-hdds/container-service/src/test/resources/additionalfields.container new file mode 100644 index 00000000000..fff5304bea2 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/resources/additionalfields.container @@ -0,0 +1,12 @@ +! +containerDBType: RocksDB +chunksPath: /hdds/current/aed-fg4-hji-jkl/containerDir0/1 +containerID: 9223372036854775807 +containerType: KeyValueContainer +metadataPath: /hdds/current/aed-fg4-hji-jkl/containerDir0/1 +layOutVersion: 1 +maxSize: 5368709120 +metadata: {OWNER: ozone, VOLUME: hdfs} +state: CLOSED +aclEnabled: true +checksum: c5b5373b8755c4e7199478dcaded9d996f9aca089704e08950259cdb0f290680 \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/test/resources/incorrect.checksum.container b/hadoop-hdds/container-service/src/test/resources/incorrect.checksum.container new file mode 100644 index 00000000000..d06ba571281 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/resources/incorrect.checksum.container @@ -0,0 +1,11 @@ +! +containerDBType: RocksDB +chunksPath: /hdds/current/aed-fg4-hji-jkl/containerdir0/1 +containerID: 9223372036854775807 +containerType: KeyValueContainer +metadataPath: /hdds/current/aed-fg4-hji-jkl/containerdir0/1 +layOutVersion: 1 +maxSize: 5368709120 +metadata: {OWNER: ozone, VOLUME: hdfs} +state: OPEN +checksum: 08bc9d390f9183aeed3cf33c789e2a07310bba60f3cf55941caccc939db8670f \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/test/resources/incorrect.container b/hadoop-hdds/container-service/src/test/resources/incorrect.container new file mode 100644 index 00000000000..0053ab29212 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/resources/incorrect.container @@ -0,0 +1,11 @@ +! +containerDBType: RocksDB +chunksPath: /hdds/current/aed-fg4-hji-jkl/containerDir0/1 +containerID: 9223372036854775807 +containerType: KeyValueContainer +metadataPath: /hdds/current/aed-fg4-hji-jkl/containerDir0/1 +layOutVersion: 1 +maxSize: 5368709120 +metadata: {OWNER: ozone, VOLUME: hdfs} +state: INVALID +checksum: 08bc9d390f9183aeed3cf33c789e2a07310bba60f3cf55941caccc939db8670f \ No newline at end of file diff --git a/hadoop-hdds/framework/pom.xml b/hadoop-hdds/framework/pom.xml index a497133f9dd..cb380e2606a 100644 --- a/hadoop-hdds/framework/pom.xml +++ b/hadoop-hdds/framework/pom.xml @@ -20,10 +20,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-hdds-server-framework - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Distributed Data Store Server Framework Apache Hadoop HDDS Server Framework jar @@ -37,7 +37,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds-common - provided + + + org.mockito + mockito-all + test diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/ServerUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/ServerUtils.java index a0e78dcc7fd..c6d85d8b4fc 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/ServerUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/ServerUtils.java @@ -136,4 +136,9 @@ public static File getOzoneMetaDirPath(Configuration conf) { return dirPath; } + public static void setOzoneMetaDirPath(OzoneConfiguration conf, + String path) { + conf.set(OzoneConfigKeys.OZONE_METADATA_DIRS, path); + } + } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java index 44d85f5ffc0..b2b0df2a2a7 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java @@ -18,7 +18,11 @@ package org.apache.hadoop.hdds.server.events; import com.google.common.annotations.VisibleForTesting; + +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; + +import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,6 +46,8 @@ private static final Logger LOG = LoggerFactory.getLogger(EventQueue.class); + private static final String EXECUTOR_NAME_SEPARATOR = "For"; + private final Map>> executors = new HashMap<>(); @@ -51,38 +57,74 @@ public > void addHandler( EVENT_TYPE event, EventHandler handler) { - - this.addHandler(event, new SingleThreadExecutor<>( - event.getName()), handler); + this.addHandler(event, handler, generateHandlerName(handler)); } + /** + * Add new handler to the event queue. + *

+ * By default a separated single thread executor will be dedicated to + * deliver the events to the registered event handler. + * + * @param event Triggering event. + * @param handler Handler of event (will be called from a separated + * thread) + * @param handlerName The name of handler (should be unique together with + * the event name) + * @param The type of the event payload. + * @param The type of the event identifier. + */ public > void addHandler( - EVENT_TYPE event, - EventExecutor executor, - EventHandler handler) { + EVENT_TYPE event, EventHandler handler, String handlerName) { + validateEvent(event); + Preconditions.checkNotNull(handler, "Handler name should not be null."); + String executorName = + StringUtils.camelize(event.getName()) + EXECUTOR_NAME_SEPARATOR + + handlerName; + this.addHandler(event, new SingleThreadExecutor<>(executorName), handler); + } - executors.putIfAbsent(event, new HashMap<>()); - executors.get(event).putIfAbsent(executor, new ArrayList<>()); + private > void validateEvent(EVENT_TYPE event) { + Preconditions + .checkArgument(!event.getName().contains(EXECUTOR_NAME_SEPARATOR), + "Event name should not contain " + EXECUTOR_NAME_SEPARATOR + + " string."); - executors.get(event) - .get(executor) - .add(handler); + } + + private String generateHandlerName(EventHandler handler) { + if (!"".equals(handler.getClass().getSimpleName())) { + return handler.getClass().getSimpleName(); + } else { + return handler.getClass().getName(); + } } /** - * Creates one executor with multiple event handlers. + * Add event handler with custom executor. + * + * @param event Triggering event. + * @param executor The executor imlementation to deliver events from a + * separated threads. Please keep in your mind that + * registering metrics is the responsibility of the + * caller. + * @param handler Handler of event (will be called from a separated + * thread) + * @param The type of the event payload. + * @param The type of the event identifier. */ - public void addHandlerGroup(String name, HandlerForEvent... - eventsAndHandlers) { - SingleThreadExecutor sharedExecutor = - new SingleThreadExecutor(name); - for (HandlerForEvent handlerForEvent : eventsAndHandlers) { - addHandler(handlerForEvent.event, sharedExecutor, - handlerForEvent.handler); - } + public > void addHandler( + EVENT_TYPE event, EventExecutor executor, + EventHandler handler) { + validateEvent(event); + executors.putIfAbsent(event, new HashMap<>()); + executors.get(event).putIfAbsent(executor, new ArrayList<>()); + executors.get(event).get(executor).add(handler); } + + /** * Route an event with payload to the right listener(s). * @@ -105,7 +147,12 @@ public void addHandlerGroup(String name, HandlerForEvent... for (EventHandler handler : executorAndHandlers.getValue()) { queuedCount.incrementAndGet(); - + if (LOG.isDebugEnabled()) { + LOG.debug("Delivering event {} to executor/handler {}: {}", + event.getName(), + executorAndHandlers.getKey().getName(), + payload); + } executorAndHandlers.getKey() .onMessage(handler, payload, this); @@ -113,8 +160,7 @@ public void addHandlerGroup(String name, HandlerForEvent... } } else { - throw new IllegalArgumentException( - "No event handler registered for event " + event); + LOG.warn("No event handler registered for event " + event); } } @@ -183,31 +229,5 @@ public void close() { }); } - /** - * Event identifier together with the handler. - * - * @param - */ - public static class HandlerForEvent { - - private final Event event; - - private final EventHandler handler; - - public HandlerForEvent( - Event event, - EventHandler handler) { - this.event = event; - this.handler = handler; - } - - public Event getEvent() { - return event; - } - - public EventHandler getHandler() { - return handler; - } - } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventWatcher.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventWatcher.java index 19fddde9b4d..ba5078b402c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventWatcher.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventWatcher.java @@ -21,17 +21,21 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Predicate; import java.util.stream.Collectors; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.ozone.lease.Lease; import org.apache.hadoop.ozone.lease.LeaseAlreadyExistException; import org.apache.hadoop.ozone.lease.LeaseExpiredException; import org.apache.hadoop.ozone.lease.LeaseManager; import org.apache.hadoop.ozone.lease.LeaseNotFoundException; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.commons.collections.map.HashedMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,20 +60,41 @@ private final Event completionEvent; - private final LeaseManager leaseManager; + private final LeaseManager leaseManager; - protected final Map trackedEventsByUUID = + private final EventWatcherMetrics metrics; + + private final String name; + + protected final Map trackedEventsByID = new ConcurrentHashMap<>(); protected final Set trackedEvents = new HashSet<>(); - public EventWatcher(Event startEvent, + private final Map startTrackingTimes = new HashedMap(); + + public EventWatcher(String name, Event startEvent, Event completionEvent, - LeaseManager leaseManager) { + LeaseManager leaseManager) { this.startEvent = startEvent; this.completionEvent = completionEvent; this.leaseManager = leaseManager; + this.metrics = new EventWatcherMetrics(); + Preconditions.checkNotNull(name); + if (name.equals("")) { + name = getClass().getSimpleName(); + } + if (name.equals("")) { + //for anonymous inner classes + name = getClass().getName(); + } + this.name = name; + } + public EventWatcher(Event startEvent, + Event completionEvent, + LeaseManager leaseManager) { + this("", startEvent, completionEvent, leaseManager); } public void start(EventQueue queue) { @@ -77,25 +102,30 @@ public void start(EventQueue queue) { queue.addHandler(startEvent, this::handleStartEvent); queue.addHandler(completionEvent, (completionPayload, publisher) -> { - UUID uuid = completionPayload.getUUID(); try { - handleCompletion(uuid, publisher); + handleCompletion(completionPayload, publisher); } catch (LeaseNotFoundException e) { //It's already done. Too late, we already retried it. //Not a real problem. - LOG.warn("Completion event without active lease. UUID={}", uuid); + LOG.warn("Completion event without active lease. Id={}", + completionPayload.getId()); } }); + MetricsSystem ms = DefaultMetricsSystem.instance(); + ms.register(name, "EventWatcher metrics", metrics); } private synchronized void handleStartEvent(TIMEOUT_PAYLOAD payload, EventPublisher publisher) { - UUID identifier = payload.getUUID(); - trackedEventsByUUID.put(identifier, payload); + metrics.incrementTrackedEvents(); + long identifier = payload.getId(); + startTrackingTimes.put(identifier, System.currentTimeMillis()); + + trackedEventsByID.put(identifier, payload); trackedEvents.add(payload); try { - Lease lease = leaseManager.acquire(identifier); + Lease lease = leaseManager.acquire(identifier); try { lease.registerCallBack(() -> { handleTimeout(publisher, identifier); @@ -110,18 +140,25 @@ private synchronized void handleStartEvent(TIMEOUT_PAYLOAD payload, } } - private synchronized void handleCompletion(UUID uuid, - EventPublisher publisher) throws LeaseNotFoundException { - leaseManager.release(uuid); - TIMEOUT_PAYLOAD payload = trackedEventsByUUID.remove(uuid); + protected synchronized void handleCompletion(COMPLETION_PAYLOAD + completionPayload, EventPublisher publisher) throws + LeaseNotFoundException { + metrics.incrementCompletedEvents(); + long id = completionPayload.getId(); + leaseManager.release(id); + TIMEOUT_PAYLOAD payload = trackedEventsByID.remove(id); trackedEvents.remove(payload); + long originalTime = startTrackingTimes.remove(id); + metrics.updateFinishingTime(System.currentTimeMillis() - originalTime); onFinished(publisher, payload); } private synchronized void handleTimeout(EventPublisher publisher, - UUID identifier) { - TIMEOUT_PAYLOAD payload = trackedEventsByUUID.remove(identifier); + long identifier) { + metrics.incrementTimedOutEvents(); + TIMEOUT_PAYLOAD payload = trackedEventsByID.remove(identifier); trackedEvents.remove(payload); + startTrackingTimes.remove(payload.getId()); onTimeout(publisher, payload); } @@ -135,23 +172,38 @@ public synchronized boolean contains(TIMEOUT_PAYLOAD payload) { public synchronized boolean remove(TIMEOUT_PAYLOAD payload) { try { - leaseManager.release(payload.getUUID()); + leaseManager.release(payload.getId()); } catch (LeaseNotFoundException e) { - LOG.warn("Completion event without active lease. UUID={}", - payload.getUUID()); + LOG.warn("Completion event without active lease. Id={}", + payload.getId()); } - trackedEventsByUUID.remove(payload.getUUID()); + trackedEventsByID.remove(payload.getId()); return trackedEvents.remove(payload); } - abstract void onTimeout(EventPublisher publisher, TIMEOUT_PAYLOAD payload); + protected abstract void onTimeout( + EventPublisher publisher, TIMEOUT_PAYLOAD payload); - abstract void onFinished(EventPublisher publisher, TIMEOUT_PAYLOAD payload); + protected abstract void onFinished( + EventPublisher publisher, TIMEOUT_PAYLOAD payload); public List getTimeoutEvents( Predicate predicate) { - return trackedEventsByUUID.values().stream().filter(predicate) + return trackedEventsByID.values().stream().filter(predicate) .collect(Collectors.toList()); } + + @VisibleForTesting + protected EventWatcherMetrics getMetrics() { + return metrics; + } + + /** + * Returns a tracked event to which the specified id is + * mapped, or {@code null} if there is no mapping for the id. + */ + public TIMEOUT_PAYLOAD getTrackedEventbyId(long id) { + return trackedEventsByID.get(id); + } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventWatcherMetrics.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventWatcherMetrics.java new file mode 100644 index 00000000000..1db81a98890 --- /dev/null +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventWatcherMetrics.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.server.events; + +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableRate; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Metrics for any event watcher. + */ +public class EventWatcherMetrics { + + @Metric() + private MutableCounterLong trackedEvents; + + @Metric() + private MutableCounterLong timedOutEvents; + + @Metric() + private MutableCounterLong completedEvents; + + @Metric() + private MutableRate completionTime; + + public void incrementTrackedEvents() { + trackedEvents.incr(); + } + + public void incrementTimedOutEvents() { + timedOutEvents.incr(); + } + + public void incrementCompletedEvents() { + completedEvents.incr(); + } + + @VisibleForTesting + public void updateFinishingTime(long duration) { + completionTime.add(duration); + } + + @VisibleForTesting + public MutableCounterLong getTrackedEvents() { + return trackedEvents; + } + + @VisibleForTesting + public MutableCounterLong getTimedOutEvents() { + return timedOutEvents; + } + + @VisibleForTesting + public MutableCounterLong getCompletedEvents() { + return completedEvents; + } + + @VisibleForTesting + public MutableRate getCompletionTime() { + return completionTime; + } +} diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/IdentifiableEventPayload.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/IdentifiableEventPayload.java index e73e30fcde9..3faa8e70d1b 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/IdentifiableEventPayload.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/IdentifiableEventPayload.java @@ -17,14 +17,12 @@ */ package org.apache.hadoop.hdds.server.events; -import java.util.UUID; - /** * Event with an additional unique identifier. * */ public interface IdentifiableEventPayload { - UUID getUUID(); + long getId(); } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/SingleThreadExecutor.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/SingleThreadExecutor.java index a64e3d761dd..3253f2d5db2 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/SingleThreadExecutor.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/SingleThreadExecutor.java @@ -23,13 +23,18 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; + +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; /** * Simple EventExecutor to call all the event handler one-by-one. * * @param */ +@Metrics(context = "EventQueue") public class SingleThreadExecutor implements EventExecutor { public static final String THREAD_NAME_PREFIX = "EventQueue"; @@ -41,14 +46,24 @@ private final ThreadPoolExecutor executor; - private final AtomicLong queuedCount = new AtomicLong(0); + @Metric + private MutableCounterLong queued; - private final AtomicLong successfulCount = new AtomicLong(0); + @Metric + private MutableCounterLong done; - private final AtomicLong failedCount = new AtomicLong(0); + @Metric + private MutableCounterLong failed; + /** + * Create SingleThreadExecutor. + * + * @param name Unique name used in monitoring and metrics. + */ public SingleThreadExecutor(String name) { this.name = name; + DefaultMetricsSystem.instance() + .register("EventQueue" + name, "Event Executor metrics ", this); LinkedBlockingQueue workQueue = new LinkedBlockingQueue<>(); executor = @@ -64,31 +79,31 @@ public SingleThreadExecutor(String name) { @Override public void onMessage(EventHandler handler, T message, EventPublisher publisher) { - queuedCount.incrementAndGet(); + queued.incr(); executor.execute(() -> { try { handler.onMessage(message, publisher); - successfulCount.incrementAndGet(); + done.incr(); } catch (Exception ex) { LOG.error("Error on execution message {}", message, ex); - failedCount.incrementAndGet(); + failed.incr(); } }); } @Override public long failedEvents() { - return failedCount.get(); + return failed.value(); } @Override public long successfulEvents() { - return successfulCount.get(); + return done.value(); } @Override public long queuedEvents() { - return queuedCount.get(); + return queued.value(); } @Override diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/TypedEvent.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/TypedEvent.java index c2159ad1557..27bba3ab6b4 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/TypedEvent.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/TypedEvent.java @@ -48,4 +48,11 @@ public String getName() { return name; } + @Override + public String toString() { + return "TypedEvent{" + + "payloadType=" + payloadType.getSimpleName() + + ", name='" + name + '\'' + + '}'; + } } diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/events/TestEventQueue.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/events/TestEventQueue.java index 39444097fed..0c1200f6d14 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/events/TestEventQueue.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/events/TestEventQueue.java @@ -22,8 +22,7 @@ import org.junit.Before; import org.junit.Test; -import java.util.Set; -import java.util.stream.Collectors; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; /** * Testing the basic functionality of the event queue. @@ -44,11 +43,13 @@ @Before public void startEventQueue() { + DefaultMetricsSystem.initialize(getClass().getSimpleName()); queue = new EventQueue(); } @After public void stopEventQueue() { + DefaultMetricsSystem.shutdown(); queue.close(); } @@ -79,35 +80,4 @@ public void multipleSubscriber() { } - @Test - public void handlerGroup() { - final long[] result = new long[2]; - queue.addHandlerGroup( - "group", - new EventQueue.HandlerForEvent<>(EVENT3, (payload, publisher) -> - result[0] = payload), - new EventQueue.HandlerForEvent<>(EVENT4, (payload, publisher) -> - result[1] = payload) - ); - - queue.fireEvent(EVENT3, 23L); - queue.fireEvent(EVENT4, 42L); - - queue.processAll(1000); - - Assert.assertEquals(23, result[0]); - Assert.assertEquals(42, result[1]); - - Set eventQueueThreadNames = - Thread.getAllStackTraces().keySet() - .stream() - .filter(t -> t.getName().startsWith(SingleThreadExecutor - .THREAD_NAME_PREFIX)) - .map(Thread::getName) - .collect(Collectors.toSet()); - System.out.println(eventQueueThreadNames); - Assert.assertEquals(1, eventQueueThreadNames.size()); - - } - } \ No newline at end of file diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/events/TestEventWatcher.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/events/TestEventWatcher.java index 1731350cfe5..b72d2ae7680 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/events/TestEventWatcher.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/events/TestEventWatcher.java @@ -19,10 +19,9 @@ import java.util.List; import java.util.Objects; -import java.util.UUID; - +import org.apache.hadoop.hdds.HddsIdFactory; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.ozone.lease.LeaseManager; - import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -42,23 +41,24 @@ private static final TypedEvent REPLICATION_COMPLETED = new TypedEvent<>(ReplicationCompletedEvent.class); - LeaseManager leaseManager; + LeaseManager leaseManager; @Before public void startLeaseManager() { - leaseManager = new LeaseManager<>(2000l); + DefaultMetricsSystem.instance(); + leaseManager = new LeaseManager<>("Test", 2000L); leaseManager.start(); } @After public void stopLeaseManager() { leaseManager.shutdown(); + DefaultMetricsSystem.shutdown(); } @Test public void testEventHandling() throws InterruptedException { - EventQueue queue = new EventQueue(); EventWatcher @@ -71,21 +71,21 @@ public void testEventHandling() throws InterruptedException { replicationWatcher.start(queue); - UUID uuid1 = UUID.randomUUID(); - UUID uuid2 = UUID.randomUUID(); + long id1 = HddsIdFactory.getLongId(); + long id2 = HddsIdFactory.getLongId(); queue.fireEvent(WATCH_UNDER_REPLICATED, - new UnderreplicatedEvent(uuid1, "C1")); + new UnderreplicatedEvent(id1, "C1")); queue.fireEvent(WATCH_UNDER_REPLICATED, - new UnderreplicatedEvent(uuid2, "C2")); + new UnderreplicatedEvent(id2, "C2")); Assert.assertEquals(0, underReplicatedEvents.getReceivedEvents().size()); Thread.sleep(1000); queue.fireEvent(REPLICATION_COMPLETED, - new ReplicationCompletedEvent(uuid1, "C2", "D1")); + new ReplicationCompletedEvent(id1, "C2", "D1")); Assert.assertEquals(0, underReplicatedEvents.getReceivedEvents().size()); @@ -94,8 +94,8 @@ public void testEventHandling() throws InterruptedException { queue.processAll(1000L); Assert.assertEquals(1, underReplicatedEvents.getReceivedEvents().size()); - Assert.assertEquals(uuid2, - underReplicatedEvents.getReceivedEvents().get(0).UUID); + Assert.assertEquals(id2, + underReplicatedEvents.getReceivedEvents().get(0).id); } @@ -115,15 +115,15 @@ public void testInprogressFilter() throws InterruptedException { replicationWatcher.start(queue); UnderreplicatedEvent event1 = - new UnderreplicatedEvent(UUID.randomUUID(), "C1"); + new UnderreplicatedEvent(HddsIdFactory.getLongId(), "C1"); queue.fireEvent(WATCH_UNDER_REPLICATED, event1); queue.fireEvent(WATCH_UNDER_REPLICATED, - new UnderreplicatedEvent(UUID.randomUUID(), "C2")); + new UnderreplicatedEvent(HddsIdFactory.getLongId(), "C2")); queue.fireEvent(WATCH_UNDER_REPLICATED, - new UnderreplicatedEvent(UUID.randomUUID(), "C1")); + new UnderreplicatedEvent(HddsIdFactory.getLongId(), "C1")); queue.processAll(1000L); Thread.sleep(1000L); @@ -139,44 +139,116 @@ public void testInprogressFilter() throws InterruptedException { Assert.assertEquals(0, c1todo.size()); Assert.assertFalse(replicationWatcher.contains(event1)); + } + + @Test + public void testMetrics() throws InterruptedException { + + DefaultMetricsSystem.initialize("test"); + + EventQueue queue = new EventQueue(); + + EventWatcher + replicationWatcher = createEventWatcher(); + + EventHandlerStub underReplicatedEvents = + new EventHandlerStub<>(); + + queue.addHandler(UNDER_REPLICATED, underReplicatedEvents); + + replicationWatcher.start(queue); + + //send 3 event to track 3 in-progress activity + UnderreplicatedEvent event1 = + new UnderreplicatedEvent(HddsIdFactory.getLongId(), "C1"); + + UnderreplicatedEvent event2 = + new UnderreplicatedEvent(HddsIdFactory.getLongId(), "C2"); + + UnderreplicatedEvent event3 = + new UnderreplicatedEvent(HddsIdFactory.getLongId(), "C1"); + + queue.fireEvent(WATCH_UNDER_REPLICATED, event1); + + queue.fireEvent(WATCH_UNDER_REPLICATED, event2); + + queue.fireEvent(WATCH_UNDER_REPLICATED, event3); + + //1st event is completed, don't need to track any more + ReplicationCompletedEvent event1Completed = + new ReplicationCompletedEvent(event1.id, "C1", "D1"); + + queue.fireEvent(REPLICATION_COMPLETED, event1Completed); + + Thread.sleep(2200l); + //until now: 3 in-progress activities are tracked with three + // UnderreplicatedEvents. The first one is completed, the remaining two + // are timed out (as the timeout -- defined in the leasmanager -- is 2000ms. + + EventWatcherMetrics metrics = replicationWatcher.getMetrics(); + + //3 events are received + Assert.assertEquals(3, metrics.getTrackedEvents().value()); + + //one is finished. doesn't need to be resent + Assert.assertEquals(1, metrics.getCompletedEvents().value()); + + //Other two are timed out and resent + Assert.assertEquals(2, metrics.getTimedOutEvents().value()); + + DefaultMetricsSystem.shutdown(); } private EventWatcher createEventWatcher() { - return new EventWatcher( - WATCH_UNDER_REPLICATED, REPLICATION_COMPLETED, leaseManager) { + return new CommandWatcherExample(WATCH_UNDER_REPLICATED, + REPLICATION_COMPLETED, leaseManager); + } - @Override - void onTimeout(EventPublisher publisher, UnderreplicatedEvent payload) { - publisher.fireEvent(UNDER_REPLICATED, payload); - } + private class CommandWatcherExample + extends EventWatcher { - @Override - void onFinished(EventPublisher publisher, UnderreplicatedEvent payload) { - //Good job. We did it. - } - }; + public CommandWatcherExample(Event startEvent, + Event completionEvent, + LeaseManager leaseManager) { + super("TestCommandWatcher", startEvent, completionEvent, leaseManager); + } + + @Override + protected void onTimeout(EventPublisher publisher, UnderreplicatedEvent payload) { + publisher.fireEvent(UNDER_REPLICATED, payload); + } + + @Override + protected void onFinished(EventPublisher publisher, UnderreplicatedEvent payload) { + //Good job. We did it. + } + + @Override + public EventWatcherMetrics getMetrics() { + return super.getMetrics(); + } } private static class ReplicationCompletedEvent implements IdentifiableEventPayload { - private final UUID UUID; + private final long id; private final String containerId; private final String datanodeId; - public ReplicationCompletedEvent(UUID UUID, String containerId, + public ReplicationCompletedEvent(long id, String containerId, String datanodeId) { - this.UUID = UUID; + this.id = id; this.containerId = containerId; this.datanodeId = datanodeId; } - public UUID getUUID() { - return UUID; + public long getId() { + return id; } @Override @@ -203,18 +275,18 @@ public int hashCode() { implements IdentifiableEventPayload { - private final UUID UUID; + private final long id; private final String containerId; - public UnderreplicatedEvent(UUID UUID, String containerId) { + public UnderreplicatedEvent(long id, String containerId) { this.containerId = containerId; - this.UUID = UUID; + this.id = id; } - public UUID getUUID() { - return UUID; + public long getId() { + return id; } } -} \ No newline at end of file +} diff --git a/hadoop-hdds/pom.xml b/hadoop-hdds/pom.xml index fab45e23586..ce4f234bc22 100644 --- a/hadoop-hdds/pom.xml +++ b/hadoop-hdds/pom.xml @@ -25,7 +25,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> hadoop-hdds - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Distributed Data Store Project Apache Hadoop HDDS pom @@ -44,17 +44,14 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-common - provided org.apache.hadoop hadoop-hdfs - provided org.apache.hadoop hadoop-hdfs-client - provided org.apache.hadoop @@ -68,6 +65,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> test test-jar + + info.picocli + picocli + 3.5.2 + com.google.protobuf protobuf-java @@ -91,6 +93,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> apache-rat-plugin + **/hs_err*.log **/target/** .gitattributes .idea/** @@ -98,10 +101,14 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> src/main/resources/webapps/static/angular-nvd3-1.0.9.min.js src/main/resources/webapps/static/angular-route-1.6.4.min.js src/main/resources/webapps/static/d3-3.5.17.min.js - src/main/resources/webapps/static/nvd3-1.8.5.min.css src/main/resources/webapps/static/nvd3-1.8.5.min.css.map - src/main/resources/webapps/static/nvd3-1.8.5.min.js + src/main/resources/webapps/static/nvd3-1.8.5.min.css src/main/resources/webapps/static/nvd3-1.8.5.min.js.map + src/main/resources/webapps/static/nvd3-1.8.5.min.js + src/test/resources/additionalfields.container + src/test/resources/incorrect.checksum.container + src/test/resources/incorrect.container + src/test/resources/test.db.ini @@ -114,4 +121,53 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + + + parallel-tests + + + + org.apache.hadoop + hadoop-maven-plugins + + + parallel-tests-createdir + + parallel-tests-createdir + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + ${testsThreadCount} + false + ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true + + ${testsThreadCount} + ${test.build.data}/${surefire.forkNumber} + ${test.build.dir}/${surefire.forkNumber} + ${hadoop.tmp.dir}/${surefire.forkNumber} + + + + + + ${test.build.data} + + + + + + fork-${surefire.forkNumber} + + + + + + + diff --git a/hadoop-hdds/server-scm/pom.xml b/hadoop-hdds/server-scm/pom.xml index 1330be8fe35..52f42e1c89d 100644 --- a/hadoop-hdds/server-scm/pom.xml +++ b/hadoop-hdds/server-scm/pom.xml @@ -20,10 +20,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-hdds-server-scm - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Distributed Data Store Storage Container Manager Server Apache Hadoop HDDS SCM Server jar @@ -37,25 +37,21 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds-common - provided org.apache.hadoop hadoop-hdds-container-service - provided org.apache.hadoop hadoop-hdds-client - provided org.apache.hadoop hadoop-hdds-server-framework - provided diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ScmUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ScmUtils.java new file mode 100644 index 00000000000..435f0a59322 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ScmUtils.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm; + +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.server.Precheck; + +/** + * SCM utility class. + */ +public final class ScmUtils { + + private ScmUtils() { + } + + /** + * Perform all prechecks for given scm operation. + * + * @param operation + * @param preChecks prechecks to be performed + */ + public static void preCheck(ScmOps operation, Precheck... preChecks) + throws SCMException { + for (Precheck preCheck : preChecks) { + preCheck.check(operation); + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java index 953f71eaecd..d383c687a76 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java @@ -16,9 +16,11 @@ */ package org.apache.hadoop.hdds.scm.block; -import java.util.UUID; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.ScmUtils; import org.apache.hadoop.hdds.scm.container.Mapping; import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; @@ -28,8 +30,10 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.scm.server.ChillModePrecheck; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.metrics2.util.MBeans; -import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; @@ -44,8 +48,6 @@ import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes .CHILL_MODE_EXCEPTION; @@ -61,7 +63,8 @@ .OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT; /** Block Manager manages the block access for SCM. */ -public class BlockManagerImpl implements BlockManager, BlockmanagerMXBean { +public class BlockManagerImpl implements EventHandler, + BlockManager, BlockmanagerMXBean { private static final Logger LOG = LoggerFactory.getLogger(BlockManagerImpl.class); // TODO : FIX ME : Hard coding the owner. @@ -71,7 +74,6 @@ private final NodeManager nodeManager; private final Mapping containerManager; - private final Lock lock; private final long containerSize; private final DeletedBlockLog deletedBlockLog; @@ -80,6 +82,7 @@ private final int containerProvisionBatchSize; private final Random rand; private ObjectName mxBean; + private ChillModePrecheck chillModePrecheck; /** * Constructor. @@ -87,29 +90,31 @@ * @param conf - configuration. * @param nodeManager - node manager. * @param containerManager - container manager. + * @param eventPublisher - event publisher. * @throws IOException */ public BlockManagerImpl(final Configuration conf, - final NodeManager nodeManager, final Mapping containerManager) + final NodeManager nodeManager, final Mapping containerManager, + EventPublisher eventPublisher) throws IOException { this.nodeManager = nodeManager; this.containerManager = containerManager; - this.containerSize = OzoneConsts.GB * conf.getInt( - ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_GB, - ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT); + this.containerSize = (long)conf.getStorageSize( + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, + StorageUnit.BYTES); this.containerProvisionBatchSize = conf.getInt( ScmConfigKeys.OZONE_SCM_CONTAINER_PROVISION_BATCH_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_PROVISION_BATCH_SIZE_DEFAULT); rand = new Random(); - this.lock = new ReentrantLock(); mxBean = MBeans.register("BlockManager", "BlockManagerImpl", this); // SCM block deleting transaction log and deleting service. - deletedBlockLog = new DeletedBlockLogImpl(conf); + deletedBlockLog = new DeletedBlockLogImpl(conf, containerManager); long svcInterval = conf.getTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, OZONE_BLOCK_DELETING_SERVICE_INTERVAL_DEFAULT, @@ -120,9 +125,9 @@ public BlockManagerImpl(final Configuration conf, OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS); blockDeletingService = - new SCMBlockDeletingService( - deletedBlockLog, containerManager, nodeManager, svcInterval, - serviceTimeout, conf); + new SCMBlockDeletingService(deletedBlockLog, containerManager, + nodeManager, eventPublisher, svcInterval, serviceTimeout, conf); + chillModePrecheck = new ChillModePrecheck(); } /** @@ -152,29 +157,22 @@ public void stop() throws IOException { * @param factor - how many copies needed for this container. * @throws IOException */ - private void preAllocateContainers(int count, ReplicationType type, - ReplicationFactor factor, String owner) + private synchronized void preAllocateContainers(int count, + ReplicationType type, ReplicationFactor factor, String owner) throws IOException { - lock.lock(); - try { - for (int i = 0; i < count; i++) { - ContainerWithPipeline containerWithPipeline = null; - try { - // TODO: Fix this later when Ratis is made the Default. - containerWithPipeline = containerManager.allocateContainer(type, factor, - owner); - - if (containerWithPipeline == null) { - LOG.warn("Unable to allocate container."); - continue; - } - } catch (IOException ex) { - LOG.warn("Unable to allocate container: {}", ex); - continue; + for (int i = 0; i < count; i++) { + ContainerWithPipeline containerWithPipeline; + try { + // TODO: Fix this later when Ratis is made the Default. + containerWithPipeline = containerManager.allocateContainer( + type, factor, owner); + + if (containerWithPipeline == null) { + LOG.warn("Unable to allocate container."); } + } catch (IOException ex) { + LOG.warn("Unable to allocate container: {}", ex); } - } finally { - lock.unlock(); } } @@ -192,80 +190,101 @@ public AllocatedBlock allocateBlock(final long size, ReplicationType type, ReplicationFactor factor, String owner) throws IOException { LOG.trace("Size;{} , type : {}, factor : {} ", size, type, factor); - + ScmUtils.preCheck(ScmOps.allocateBlock, chillModePrecheck); if (size < 0 || size > containerSize) { LOG.warn("Invalid block size requested : {}", size); throw new SCMException("Unsupported block size: " + size, INVALID_BLOCK_SIZE); } - if (!nodeManager.isOutOfChillMode()) { - LOG.warn("Not out of Chill mode."); - throw new SCMException("Unable to create block while in chill mode", - CHILL_MODE_EXCEPTION); + /* + Here is the high level logic. + + 1. First we check if there are containers in ALLOCATED state, that is + SCM has allocated them in the SCM namespace but the corresponding + container has not been created in the Datanode yet. If we have any in + that state, we will return that to the client, which allows client to + finish creating those containers. This is a sort of greedy algorithm, + our primary purpose is to get as many containers as possible. + + 2. If there are no allocated containers -- Then we find a Open container + that matches that pattern. + + 3. If both of them fail, the we will pre-allocate a bunch of containers + in SCM and try again. + + TODO : Support random picking of two containers from the list. So we can + use different kind of policies. + */ + + ContainerWithPipeline containerWithPipeline; + + // This is to optimize performance, if the below condition is evaluated + // to false, then we can be sure that there are no containers in + // ALLOCATED state. + // This can result in false positive, but it will never be false negative. + // How can this result in false positive? We check if there are any + // containers in ALLOCATED state, this check doesn't care about the + // USER of the containers. So there might be cases where a different + // USER has few containers in ALLOCATED state, which will result in + // false positive. + if (!containerManager.getStateManager().getContainerStateMap() + .getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED) + .isEmpty()) { + // Since the above check can result in false positive, we have to do + // the actual check and find out if there are containers in ALLOCATED + // state matching our criteria. + synchronized (this) { + // Using containers from ALLOCATED state should be done within + // synchronized block (or) write lock. Since we already hold a + // read lock, we will end up in deadlock situation if we take + // write lock here. + containerWithPipeline = containerManager + .getMatchingContainerWithPipeline(size, owner, type, factor, + HddsProtos.LifeCycleState.ALLOCATED); + if (containerWithPipeline != null) { + containerManager.updateContainerState( + containerWithPipeline.getContainerInfo().getContainerID(), + HddsProtos.LifeCycleEvent.CREATE); + return newBlock(containerWithPipeline, + HddsProtos.LifeCycleState.ALLOCATED); + } + } } - lock.lock(); - try { - /* - Here is the high level logic. - - 1. First we check if there are containers in ALLOCATED state, - that is - SCM has allocated them in the SCM namespace but the - corresponding - container has not been created in the Datanode yet. If we - have any - in that state, we will return that to the client, which allows - client to finish creating those containers. This is a sort of - greedy - algorithm, our primary purpose is to get as many containers as - possible. - - 2. If there are no allocated containers -- Then we find a Open - container that matches that pattern. - - 3. If both of them fail, the we will pre-allocate a bunch of - conatainers in SCM and try again. - - TODO : Support random picking of two containers from the list. - So we - can use different kind of policies. - */ - - ContainerWithPipeline containerWithPipeline; + // Since we found no allocated containers that match our criteria, let us + // look for OPEN containers that match the criteria. + containerWithPipeline = containerManager + .getMatchingContainerWithPipeline(size, owner, type, factor, + HddsProtos.LifeCycleState.OPEN); + if (containerWithPipeline != null) { + return newBlock(containerWithPipeline, HddsProtos.LifeCycleState.OPEN); + } - // Look for ALLOCATED container that matches all other parameters. - containerWithPipeline = containerManager - .getMatchingContainerWithPipeline(size, owner, type, factor, - HddsProtos.LifeCycleState.ALLOCATED); - if (containerWithPipeline != null) { - containerManager.updateContainerState( - containerWithPipeline.getContainerInfo().getContainerID(), - HddsProtos.LifeCycleEvent.CREATE); - return newBlock(containerWithPipeline, - HddsProtos.LifeCycleState.ALLOCATED); + // We found neither ALLOCATED or OPEN Containers. This generally means + // that most of our containers are full or we have not allocated + // containers of the type and replication factor. So let us go and + // allocate some. + + // Even though we have already checked the containers in ALLOCATED + // state, we have to check again as we only hold a read lock. + // Some other thread might have pre-allocated container in meantime. + synchronized (this) { + if (!containerManager.getStateManager().getContainerStateMap() + .getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED) + .isEmpty()) { + containerWithPipeline = containerManager + .getMatchingContainerWithPipeline(size, owner, type, factor, + HddsProtos.LifeCycleState.ALLOCATED); } - - // Since we found no allocated containers that match our criteria, let us - // look for OPEN containers that match the criteria. - containerWithPipeline = containerManager - .getMatchingContainerWithPipeline(size, owner, type, factor, - HddsProtos.LifeCycleState.OPEN); - if (containerWithPipeline != null) { - return newBlock(containerWithPipeline, HddsProtos.LifeCycleState.OPEN); + if (containerWithPipeline == null) { + preAllocateContainers(containerProvisionBatchSize, + type, factor, owner); + containerWithPipeline = containerManager + .getMatchingContainerWithPipeline(size, owner, type, factor, + HddsProtos.LifeCycleState.ALLOCATED); } - // We found neither ALLOCATED or OPEN Containers. This generally means - // that most of our containers are full or we have not allocated - // containers of the type and replication factor. So let us go and - // allocate some. - preAllocateContainers(containerProvisionBatchSize, type, factor, owner); - - // Since we just allocated a set of containers this should work - containerWithPipeline = containerManager - .getMatchingContainerWithPipeline(size, owner, type, factor, - HddsProtos.LifeCycleState.ALLOCATED); if (containerWithPipeline != null) { containerManager.updateContainerState( containerWithPipeline.getContainerInfo().getContainerID(), @@ -273,30 +292,13 @@ public AllocatedBlock allocateBlock(final long size, return newBlock(containerWithPipeline, HddsProtos.LifeCycleState.ALLOCATED); } - - // we have tried all strategies we know and but somehow we are not able - // to get a container for this block. Log that info and return a null. - LOG.error( - "Unable to allocate a block for the size: {}, type: {}, " + - "factor: {}", - size, - type, - factor); - return null; - } finally { - lock.unlock(); - } - } - - private String getChannelName(ReplicationType type) { - switch (type) { - case RATIS: - return "RA" + UUID.randomUUID().toString().substring(3); - case STAND_ALONE: - return "SA" + UUID.randomUUID().toString().substring(3); - default: - return "RA" + UUID.randomUUID().toString().substring(3); } + // we have tried all strategies we know and but somehow we are not able + // to get a container for this block. Log that info and return a null. + LOG.error( + "Unable to allocate a block for the size: {}, type: {}, factor: {}", + size, type, factor); + return null; } /** @@ -315,9 +317,7 @@ private AllocatedBlock newBlock(ContainerWithPipeline containerWithPipeline, } // TODO : Revisit this local ID allocation when HA is added. - // TODO: this does not work well if multiple allocation kicks in a tight - // loop. - long localID = Time.getUtcTime(); + long localID = UniqueId.next(); long containerID = containerInfo.getContainerID(); boolean createContainer = (state == HddsProtos.LifeCycleState.ALLOCATED); @@ -350,42 +350,34 @@ public void deleteBlocks(List blockIDs) throws IOException { CHILL_MODE_EXCEPTION); } - lock.lock(); LOG.info("Deleting blocks {}", StringUtils.join(",", blockIDs)); Map> containerBlocks = new HashMap<>(); // TODO: track the block size info so that we can reclaim the container // TODO: used space when the block is deleted. - try { - for (BlockID block : blockIDs) { - // Merge blocks to a container to blocks mapping, - // prepare to persist this info to the deletedBlocksLog. - long containerID = block.getContainerID(); - if (containerBlocks.containsKey(containerID)) { - containerBlocks.get(containerID).add(block.getLocalID()); - } else { - List item = new ArrayList<>(); - item.add(block.getLocalID()); - containerBlocks.put(containerID, item); - } + for (BlockID block : blockIDs) { + // Merge blocks to a container to blocks mapping, + // prepare to persist this info to the deletedBlocksLog. + long containerID = block.getContainerID(); + if (containerBlocks.containsKey(containerID)) { + containerBlocks.get(containerID).add(block.getLocalID()); + } else { + List item = new ArrayList<>(); + item.add(block.getLocalID()); + containerBlocks.put(containerID, item); } + } - try { - Map deleteTransactionsMap = - deletedBlockLog.addTransactions(containerBlocks); - containerManager.updateDeleteTransactionId(deleteTransactionsMap); - } catch (IOException e) { - throw new IOException( - "Skip writing the deleted blocks info to" - + " the delLog because addTransaction fails. Batch skipped: " - + StringUtils.join(",", blockIDs), - e); - } - // TODO: Container report handling of the deleted blocks: - // Remove tombstone and update open container usage. - // We will revisit this when the closed container replication is done. - } finally { - lock.unlock(); + try { + deletedBlockLog.addTransactions(containerBlocks); + } catch (IOException e) { + throw new IOException( + "Skip writing the deleted blocks info to" + + " the delLog because addTransaction fails. Batch skipped: " + + StringUtils.join(",", blockIDs), e); } + // TODO: Container report handling of the deleted blocks: + // Remove tombstone and update open container usage. + // We will revisit this when the closed container replication is done. } @Override @@ -424,4 +416,70 @@ public int getOpenContainersNo() { public SCMBlockDeletingService getSCMBlockDeletingService() { return this.blockDeletingService; } + + @Override + public void onMessage(Boolean inChillMode, EventPublisher publisher) { + this.chillModePrecheck.setInChillMode(inChillMode); + } + + /** + * Returns status of scm chill mode determined by CHILL_MODE_STATUS event. + * */ + public boolean isScmInChillMode() { + return this.chillModePrecheck.isInChillMode(); + } + + /** + * Get class logger. + * */ + public static Logger getLogger() { + return LOG; + } + + /** + * This class uses system current time milliseconds to generate unique id. + */ + public static final class UniqueId { + /* + * When we represent time in milliseconds using 'long' data type, + * the LSB bits are used. Currently we are only using 44 bits (LSB), + * 20 bits (MSB) are not used. + * We will exhaust this 44 bits only when we are in year 2525, + * until then we can safely use this 20 bits (MSB) for offset to generate + * unique id within millisecond. + * + * Year : Mon Dec 31 18:49:04 IST 2525 + * TimeInMillis: 17545641544247 + * Binary Representation: + * MSB (20 bits): 0000 0000 0000 0000 0000 + * LSB (44 bits): 1111 1111 0101 0010 1001 1011 1011 0100 1010 0011 0111 + * + * We have 20 bits to run counter, we should exclude the first bit (MSB) + * as we don't want to deal with negative values. + * To be on safer side we will use 'short' data type which is of length + * 16 bits and will give us 65,536 values for offset. + * + */ + + private static volatile short offset = 0; + + /** + * Private constructor so that no one can instantiate this class. + */ + private UniqueId() {} + + /** + * Calculate and returns next unique id based on System#currentTimeMillis. + * + * @return unique long value + */ + public static synchronized long next() { + long utcTime = Time.getUtcTime(); + if ((utcTime & 0xFFFF000000000000L) == 0) { + return utcTime << Short.SIZE | (offset++ & 0x0000FFFF); + } + throw new RuntimeException("Got invalid UTC time," + + " cannot generate unique Id. UTC Time: " + utcTime); + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java index d71e7b05fe3..25420fe9278 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DatanodeDeletedBlockTransactions.java @@ -28,6 +28,8 @@ import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; + +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; /** @@ -53,47 +55,62 @@ this.nodeNum = nodeNum; } - public void addTransaction(DeletedBlocksTransaction tx) throws IOException { + public boolean addTransaction(DeletedBlocksTransaction tx, + Set dnsWithTransactionCommitted) { Pipeline pipeline = null; try { - pipeline = mappingService.getContainerWithPipeline(tx.getContainerID()) - .getPipeline(); + ContainerWithPipeline containerWithPipeline = + mappingService.getContainerWithPipeline(tx.getContainerID()); + if (containerWithPipeline.getContainerInfo().isContainerOpen()) { + return false; + } + pipeline = containerWithPipeline.getPipeline(); } catch (IOException e) { SCMBlockDeletingService.LOG.warn("Got container info error.", e); + return false; } if (pipeline == null) { SCMBlockDeletingService.LOG.warn( "Container {} not found, continue to process next", tx.getContainerID()); - return; + return false; } for (DatanodeDetails dd : pipeline.getMachines()) { UUID dnID = dd.getUuid(); - if (transactions.containsKey(dnID)) { - List txs = transactions.get(dnID); - if (txs != null && txs.size() < maximumAllowedTXNum) { - boolean hasContained = false; - for (DeletedBlocksTransaction t : txs) { - if (t.getContainerID() == tx.getContainerID()) { - hasContained = true; - break; - } - } + if (dnsWithTransactionCommitted == null || + !dnsWithTransactionCommitted.contains(dnID)) { + // Transaction need not be sent to dns which have already committed it + addTransactionToDN(dnID, tx); + } + } + return true; + } - if (!hasContained) { - txs.add(tx); - currentTXNum++; + private void addTransactionToDN(UUID dnID, DeletedBlocksTransaction tx) { + if (transactions.containsKey(dnID)) { + List txs = transactions.get(dnID); + if (txs != null && txs.size() < maximumAllowedTXNum) { + boolean hasContained = false; + for (DeletedBlocksTransaction t : txs) { + if (t.getContainerID() == tx.getContainerID()) { + hasContained = true; + break; } } - } else { - currentTXNum++; - transactions.put(dnID, tx); + + if (!hasContained) { + txs.add(tx); + currentTXNum++; + } } - SCMBlockDeletingService.LOG.debug("Transaction added: {} <- TX({})", dnID, - tx.getTxID()); + } else { + currentTXNum++; + transactions.put(dnID, tx); } + SCMBlockDeletingService.LOG + .debug("Transaction added: {} <- TX({})", dnID, tx.getTxID()); } Set getDatanodeIDs() { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java index 28103bef954..db6c1c5dda2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hdds.scm.block; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto + .DeleteBlockTransactionResult; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; @@ -24,6 +27,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.UUID; /** * The DeletedBlockLog is a persisted log in SCM to keep tracking @@ -33,26 +37,15 @@ */ public interface DeletedBlockLog extends Closeable { - /** - * A limit size list of transactions. Note count is the max number - * of TXs to return, we might not be able to always return this - * number. and the processCount of those transactions - * should be [0, MAX_RETRY). - * - * @param count - number of transactions. - * @return a list of BlockDeletionTransaction. - */ - List getTransactions(int count) - throws IOException; - /** * Scan entire log once and returns TXs to DatanodeDeletedBlockTransactions. * Once DatanodeDeletedBlockTransactions is full, the scan behavior will * stop. * @param transactions a list of TXs will be set into. + * @return Mapping from containerId to latest transactionId for the container. * @throws IOException */ - void getTransactions(DatanodeDeletedBlockTransactions transactions) + Map getTransactions(DatanodeDeletedBlockTransactions transactions) throws IOException; /** @@ -81,10 +74,11 @@ void incrementCount(List txIDs) * Commits a transaction means to delete all footprints of a transaction * from the log. This method doesn't guarantee all transactions can be * successfully deleted, it tolerate failures and tries best efforts to. - * - * @param txIDs - transaction IDs. + * @param transactionResults - delete block transaction results. + * @param dnID - ID of datanode which acknowledges the delete block command. */ - void commitTransactions(List txIDs) throws IOException; + void commitTransactions(List transactionResults, + UUID dnID); /** * Creates a block deletion transaction and adds that into the log. @@ -108,10 +102,9 @@ void addTransaction(long containerID, List blocks) * number of containers) together (on success) or non (on failure). * * @param containerBlocksMap a map of containerBlocks. - * @return Mapping from containerId to latest transactionId for the container. * @throws IOException */ - Map addTransactions(Map> containerBlocksMap) + void addTransactions(Map> containerBlocksMap) throws IOException; /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java index 48fa2eb1112..49af65c9ebf 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java @@ -21,28 +21,39 @@ import com.google.common.collect.Lists; import com.google.common.primitives.Longs; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto + .DeleteBlockTransactionResult; +import org.apache.hadoop.hdds.scm.container.Mapping; +import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.utils.BatchOperation; -import org.apache.hadoop.utils.MetadataKeyFilters.MetadataKeyFilter; import org.apache.hadoop.utils.MetadataStore; import org.apache.hadoop.utils.MetadataStoreBuilder; +import org.eclipse.jetty.util.ConcurrentHashSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Collectors; +import static java.lang.Math.min; import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_BLOCK_DELETION_MAX_RETRY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys @@ -74,12 +85,15 @@ private final int maxRetry; private final MetadataStore deletedStore; + private final Mapping containerManager; private final Lock lock; // The latest id of deleted blocks in the db. private long lastTxID; - private long lastReadTxID; + // Maps txId to set of DNs which are successful in committing the transaction + private Map> transactionToDNsCommitMap; - public DeletedBlockLogImpl(Configuration conf) throws IOException { + public DeletedBlockLogImpl(Configuration conf, Mapping containerManager) + throws IOException { maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, OZONE_SCM_BLOCK_DELETION_MAX_RETRY_DEFAULT); @@ -95,11 +109,17 @@ public DeletedBlockLogImpl(Configuration conf) throws IOException { .setDbFile(deletedLogDbPath) .setCacheSize(cacheSize * OzoneConsts.MB) .build(); + this.containerManager = containerManager; this.lock = new ReentrantLock(); // start from the head of deleted store. - lastReadTxID = 0; lastTxID = findLatestTxIDInStore(); + + // transactionToDNsCommitMap is updated only when + // transaction is added to the log and when it is removed. + + // maps transaction to dns which have committed it. + transactionToDNsCommitMap = new ConcurrentHashMap<>(); } @VisibleForTesting @@ -123,39 +143,6 @@ private long findLatestTxIDInStore() throws IOException { return txid; } - @Override - public List getTransactions( - int count) throws IOException { - List result = new ArrayList<>(); - MetadataKeyFilter getNextTxID = (preKey, currentKey, nextKey) - -> Longs.fromByteArray(currentKey) > lastReadTxID; - MetadataKeyFilter avoidInvalidTxid = (preKey, currentKey, nextKey) - -> !Arrays.equals(LATEST_TXID, currentKey); - lock.lock(); - try { - deletedStore.iterate(null, (key, value) -> { - if (getNextTxID.filterKey(null, key, null) && - avoidInvalidTxid.filterKey(null, key, null)) { - DeletedBlocksTransaction block = DeletedBlocksTransaction - .parseFrom(value); - if (block.getCount() > -1 && block.getCount() <= maxRetry) { - result.add(block); - } - } - return result.size() < count; - }); - // Scan the metadata from the beginning. - if (result.size() < count || result.size() < 1) { - lastReadTxID = 0; - } else { - lastReadTxID = result.get(result.size() - 1).getTxID(); - } - } finally { - lock.unlock(); - } - return result; - } - @Override public List getFailedTransactions() throws IOException { @@ -235,18 +222,56 @@ private DeletedBlocksTransaction constructNewTransaction(long txID, /** * {@inheritDoc} * - * @param txIDs - transaction IDs. + * @param transactionResults - transaction IDs. + * @param dnID - Id of Datanode which has acknowledged a delete block command. * @throws IOException */ @Override - public void commitTransactions(List txIDs) throws IOException { + public void commitTransactions( + List transactionResults, UUID dnID) { lock.lock(); try { - for (Long txID : txIDs) { + Set dnsWithCommittedTxn; + for (DeleteBlockTransactionResult transactionResult : + transactionResults) { + if (isTransactionFailed(transactionResult)) { + continue; + } try { - deletedStore.delete(Longs.toByteArray(txID)); - } catch (IOException ex) { - LOG.warn("Cannot commit txID " + txID, ex); + long txID = transactionResult.getTxID(); + // set of dns which have successfully committed transaction txId. + dnsWithCommittedTxn = transactionToDNsCommitMap.get(txID); + Long containerId = transactionResult.getContainerID(); + if (dnsWithCommittedTxn == null) { + LOG.warn("Transaction txId={} commit by dnId={} for containerID={} " + + "failed. Corresponding entry not found.", txID, dnID, + containerId); + return; + } + + dnsWithCommittedTxn.add(dnID); + Pipeline pipeline = + containerManager.getContainerWithPipeline(containerId) + .getPipeline(); + Collection containerDnsDetails = + pipeline.getDatanodes().values(); + // The delete entry can be safely removed from the log if all the + // corresponding nodes commit the txn. It is required to check that + // the nodes returned in the pipeline match the replication factor. + if (min(containerDnsDetails.size(), dnsWithCommittedTxn.size()) + >= pipeline.getFactor().getNumber()) { + List containerDns = containerDnsDetails.stream() + .map(DatanodeDetails::getUuid) + .collect(Collectors.toList()); + if (dnsWithCommittedTxn.containsAll(containerDns)) { + transactionToDNsCommitMap.remove(txID); + LOG.debug("Purging txId={} from block deletion log", txID); + deletedStore.delete(Longs.toByteArray(txID)); + } + } + } catch (IOException e) { + LOG.warn("Could not commit delete block transaction: " + + transactionResult.getTxID(), e); } } } finally { @@ -254,6 +279,20 @@ public void commitTransactions(List txIDs) throws IOException { } } + private boolean isTransactionFailed(DeleteBlockTransactionResult result) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Got block deletion ACK from datanode, TXIDs={}, " + "success={}", + result.getTxID(), result.getSuccess()); + } + if (!result.getSuccess()) { + LOG.warn("Got failed ACK for TXID={}, prepare to resend the " + + "TX in next interval", result.getTxID()); + return true; + } + return false; + } + /** * {@inheritDoc} * @@ -307,15 +346,13 @@ public int getNumOfValidTransactions() throws IOException { * {@inheritDoc} * * @param containerBlocksMap a map of containerBlocks. - * @return Mapping from containerId to latest transactionId for the container. * @throws IOException */ @Override - public Map addTransactions( + public void addTransactions( Map> containerBlocksMap) throws IOException { BatchOperation batch = new BatchOperation(); - Map deleteTransactionsMap = new HashMap<>(); lock.lock(); try { long currentLatestID = lastTxID; @@ -325,13 +362,11 @@ public int getNumOfValidTransactions() throws IOException { byte[] key = Longs.toByteArray(currentLatestID); DeletedBlocksTransaction tx = constructNewTransaction(currentLatestID, entry.getKey(), entry.getValue()); - deleteTransactionsMap.put(entry.getKey(), currentLatestID); batch.put(key, tx.toByteArray()); } lastTxID = currentLatestID; batch.put(LATEST_TXID, Longs.toByteArray(lastTxID)); deletedStore.writeBatch(batch); - return deleteTransactionsMap; } finally { lock.unlock(); } @@ -345,22 +380,29 @@ public void close() throws IOException { } @Override - public void getTransactions(DatanodeDeletedBlockTransactions transactions) - throws IOException { + public Map getTransactions( + DatanodeDeletedBlockTransactions transactions) throws IOException { lock.lock(); try { + Map deleteTransactionMap = new HashMap<>(); deletedStore.iterate(null, (key, value) -> { if (!Arrays.equals(LATEST_TXID, key)) { DeletedBlocksTransaction block = DeletedBlocksTransaction .parseFrom(value); if (block.getCount() > -1 && block.getCount() <= maxRetry) { - transactions.addTransaction(block); + if (transactions.addTransaction(block, + transactionToDNsCommitMap.get(block.getTxID()))) { + deleteTransactionMap.put(block.getContainerID(), block.getTxID()); + transactionToDNsCommitMap + .putIfAbsent(block.getTxID(), new ConcurrentHashSet<>()); + } } return !transactions.isFull(); } return true; }); + return deleteTransactionMap; } finally { lock.unlock(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/PendingDeleteHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/PendingDeleteHandler.java new file mode 100644 index 00000000000..736daac54c2 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/PendingDeleteHandler.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.block; + +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; + +public class PendingDeleteHandler implements + EventHandler { + + private SCMBlockDeletingService scmBlockDeletingService; + + public PendingDeleteHandler( + SCMBlockDeletingService scmBlockDeletingService) { + this.scmBlockDeletingService = scmBlockDeletingService; + } + + @Override + public void onMessage(PendingDeleteStatusList pendingDeleteStatusList, + EventPublisher publisher) { + scmBlockDeletingService.handlePendingDeletes(pendingDeleteStatusList); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/PendingDeleteStatusList.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/PendingDeleteStatusList.java new file mode 100644 index 00000000000..904762db596 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/PendingDeleteStatusList.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.block; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; + +import java.util.LinkedList; +import java.util.List; + +public class PendingDeleteStatusList { + + private List pendingDeleteStatuses; + private DatanodeDetails datanodeDetails; + + public PendingDeleteStatusList(DatanodeDetails datanodeDetails) { + this.datanodeDetails = datanodeDetails; + pendingDeleteStatuses = new LinkedList<>(); + } + + public void addPendingDeleteStatus(long dnDeleteTransactionId, + long scmDeleteTransactionId, long containerId) { + pendingDeleteStatuses.add( + new PendingDeleteStatus(dnDeleteTransactionId, scmDeleteTransactionId, + containerId)); + } + + public static class PendingDeleteStatus { + private long dnDeleteTransactionId; + private long scmDeleteTransactionId; + private long containerId; + + public PendingDeleteStatus(long dnDeleteTransactionId, + long scmDeleteTransactionId, long containerId) { + this.dnDeleteTransactionId = dnDeleteTransactionId; + this.scmDeleteTransactionId = scmDeleteTransactionId; + this.containerId = containerId; + } + + public long getDnDeleteTransactionId() { + return dnDeleteTransactionId; + } + + public long getScmDeleteTransactionId() { + return scmDeleteTransactionId; + } + + public long getContainerId() { + return containerId; + } + + } + + public List getPendingDeleteStatuses() { + return pendingDeleteStatuses; + } + + public int getNumPendingDeletes() { + return pendingDeleteStatuses.size(); + } + + public DatanodeDetails getDatanodeDetails() { + return datanodeDetails; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java index 2c555e04212..de3fe266826 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java @@ -20,11 +20,14 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.scm.container.Mapping; +import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.DeleteBlocksCommand; import org.apache.hadoop.util.Time; import org.apache.hadoop.utils.BackgroundService; @@ -36,6 +39,7 @@ import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.UUID; import java.util.concurrent.TimeUnit; @@ -53,7 +57,7 @@ */ public class SCMBlockDeletingService extends BackgroundService { - static final Logger LOG = + public static final Logger LOG = LoggerFactory.getLogger(SCMBlockDeletingService.class); // ThreadPoolSize=2, 1 for scheduler and the other for the scanner. @@ -61,6 +65,7 @@ private final DeletedBlockLog deletedBlockLog; private final Mapping mappingService; private final NodeManager nodeManager; + private final EventPublisher eventPublisher; // Block delete limit size is dynamically calculated based on container // delete limit size (ozone.block.deleting.container.limit.per.interval) @@ -76,13 +81,14 @@ private int blockDeleteLimitSize; public SCMBlockDeletingService(DeletedBlockLog deletedBlockLog, - Mapping mapper, NodeManager nodeManager, - long interval, long serviceTimeout, Configuration conf) { + Mapping mapper, NodeManager nodeManager, EventPublisher eventPublisher, + long interval, long serviceTimeout, Configuration conf) { super("SCMBlockDeletingService", interval, TimeUnit.MILLISECONDS, BLOCK_DELETING_SERVICE_CORE_POOL_SIZE, serviceTimeout); this.deletedBlockLog = deletedBlockLog; this.mappingService = mapper; this.nodeManager = nodeManager; + this.eventPublisher = eventPublisher; int containerLimit = conf.getInt( OZONE_BLOCK_DELETING_CONTAINER_LIMIT_PER_INTERVAL, @@ -101,6 +107,19 @@ public BackgroundTaskQueue getTasks() { return queue; } + public void handlePendingDeletes(PendingDeleteStatusList deletionStatusList) { + DatanodeDetails dnDetails = deletionStatusList.getDatanodeDetails(); + for (PendingDeleteStatusList.PendingDeleteStatus deletionStatus : + deletionStatusList.getPendingDeleteStatuses()) { + LOG.info( + "Block deletion txnID mismatch in datanode {} for containerID {}." + + " Datanode delete txnID: {}, SCM txnID: {}", + dnDetails.getUuid(), deletionStatus.getContainerId(), + deletionStatus.getDnDeleteTransactionId(), + deletionStatus.getScmDeleteTransactionId()); + } + } + private class DeletedBlockTransactionScanner implements BackgroundTask { @@ -118,11 +137,12 @@ public EmptyTaskResult call() throws Exception { LOG.debug("Running DeletedBlockTransactionScanner"); DatanodeDeletedBlockTransactions transactions = null; List datanodes = nodeManager.getNodes(NodeState.HEALTHY); + Map transactionMap = null; if (datanodes != null) { transactions = new DatanodeDeletedBlockTransactions(mappingService, blockDeleteLimitSize, datanodes.size()); try { - deletedBlockLog.getTransactions(transactions); + transactionMap = deletedBlockLog.getTransactions(transactions); } catch (IOException e) { // We may tolerant a number of failures for sometime // but if it continues to fail, at some point we need to raise @@ -145,8 +165,8 @@ public EmptyTaskResult call() throws Exception { // We should stop caching new commands if num of un-processed // command is bigger than a limit, e.g 50. In case datanode goes // offline for sometime, the cached commands be flooded. - nodeManager.addDatanodeCommand(dnId, - new DeleteBlocksCommand(dnTXs)); + eventPublisher.fireEvent(SCMEvents.DATANODE_COMMAND, + new CommandForDatanode<>(dnId, new DeleteBlocksCommand(dnTXs))); LOG.debug( "Added delete block command for datanode {} in the queue," + " number of delete block transactions: {}, TxID list: {}", @@ -154,6 +174,7 @@ public EmptyTaskResult call() throws Exception { transactions.getTransactionIDList(dnId))); } } + mappingService.updateDeleteTransactionId(transactionMap); } if (dnTxCount > 0) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/command/CommandStatusReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/command/CommandStatusReportHandler.java new file mode 100644 index 00000000000..054665a1b5a --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/command/CommandStatusReportHandler.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.hdds.scm.command; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatus; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .CommandStatusReportFromDatanode; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.IdentifiableEventPayload; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +/** + * Handles CommandStatusReports from datanode. + */ +public class CommandStatusReportHandler implements + EventHandler { + + private static final Logger LOGGER = LoggerFactory + .getLogger(CommandStatusReportHandler.class); + + @Override + public void onMessage(CommandStatusReportFromDatanode report, + EventPublisher publisher) { + Preconditions.checkNotNull(report); + List cmdStatusList = report.getReport().getCmdStatusList(); + Preconditions.checkNotNull(cmdStatusList); + LOGGER.trace("Processing command status report for dn: {}", report + .getDatanodeDetails()); + + // Route command status to its watchers. + cmdStatusList.forEach(cmdStatus -> { + LOGGER.trace("Emitting command status for id:{} type: {}", cmdStatus + .getCmdId(), cmdStatus.getType()); + switch (cmdStatus.getType()) { + case replicateContainerCommand: + publisher.fireEvent(SCMEvents.REPLICATION_STATUS, new + ReplicationStatus(cmdStatus)); + break; + case closeContainerCommand: + publisher.fireEvent(SCMEvents.CLOSE_CONTAINER_STATUS, new + CloseContainerStatus(cmdStatus)); + break; + case deleteBlocksCommand: + publisher.fireEvent(SCMEvents.DELETE_BLOCK_STATUS, new + DeleteBlockCommandStatus(cmdStatus)); + break; + default: + LOGGER.debug("CommandStatus of type:{} not handled in " + + "CommandStatusReportHandler.", cmdStatus.getType()); + break; + } + }); + } + + /** + * Wrapper event for CommandStatus. + */ + public static class CommandStatusEvent implements IdentifiableEventPayload { + private CommandStatus cmdStatus; + + CommandStatusEvent(CommandStatus cmdStatus) { + this.cmdStatus = cmdStatus; + } + + public CommandStatus getCmdStatus() { + return cmdStatus; + } + + @Override + public String toString() { + return "CommandStatusEvent:" + cmdStatus.toString(); + } + + @Override + public long getId() { + return cmdStatus.getCmdId(); + } + } + + /** + * Wrapper event for Replicate Command. + */ + public static class ReplicationStatus extends CommandStatusEvent { + public ReplicationStatus(CommandStatus cmdStatus) { + super(cmdStatus); + } + } + + /** + * Wrapper event for CloseContainer Command. + */ + public static class CloseContainerStatus extends CommandStatusEvent { + public CloseContainerStatus(CommandStatus cmdStatus) { + super(cmdStatus); + } + } + + /** + * Wrapper event for DeleteBlock Command. + */ + public static class DeleteBlockCommandStatus extends CommandStatusEvent { + public DeleteBlockCommandStatus(CommandStatus cmdStatus) { + super(cmdStatus); + } + } + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/command/package-info.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/command/package-info.java new file mode 100644 index 00000000000..ba17fb9eeaa --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/command/package-info.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + *

+ * This package contains HDDS protocol related classes. + */ + +/** + * This package contains HDDS protocol related classes. + */ +package org.apache.hadoop.hdds.scm.command; +/* + * Classes related to commands issued from SCM to DataNode. + * */ \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/CloseContainerEventHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/CloseContainerEventHandler.java index 7b24538a6bc..b94ce4fcb25 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/CloseContainerEventHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/CloseContainerEventHandler.java @@ -21,18 +21,21 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; -import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.server.events.EventHandler; import org.apache.hadoop.hdds.server.events.EventPublisher; -import org.apache.hadoop.hdds.server.events.TypedEvent; +import org.apache.hadoop.hdds.server.events.IdentifiableEventPayload; import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.DATANODE_COMMAND; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ; + /** * In case of a node failure, volume failure, volume out of spapce, node - * out of space etc, CLOSE_CONTAINER_EVENT will be triggered. - * CloseContainerEventHandler is the handler for CLOSE_CONTAINER_EVENT. + * out of space etc, CLOSE_CONTAINER will be triggered. + * CloseContainerEventHandler is the handler for CLOSE_CONTAINER. * When a close container event is fired, a close command for the container * should be sent to all the datanodes in the pipeline and containerStateManager * needs to update the container state to Closing. @@ -42,8 +45,6 @@ public static final Logger LOG = LoggerFactory.getLogger(CloseContainerEventHandler.class); - public static final TypedEvent CLOSE_CONTAINER_EVENT = - new TypedEvent<>(ContainerID.class); private final Mapping containerManager; @@ -59,33 +60,50 @@ public void onMessage(ContainerID containerID, EventPublisher publisher) { ContainerWithPipeline containerWithPipeline = null; ContainerInfo info; try { - containerWithPipeline = containerManager.getContainerWithPipeline(containerID.getId()); + containerWithPipeline = + containerManager.getContainerWithPipeline(containerID.getId()); info = containerWithPipeline.getContainerInfo(); if (info == null) { - LOG.info("Failed to update the container state. Container with id : {} " - + "does not exist", containerID.getId()); + LOG.error("Failed to update the container state. Container with id : {}" + + " does not exist", containerID.getId()); return; } } catch (IOException e) { - LOG.info("Failed to update the container state. Container with id : {} " - + "does not exist", containerID.getId()); + LOG.error("Failed to update the container state. Container with id : {} " + + "does not exist", containerID.getId(), e); return; } if (info.getState() == HddsProtos.LifeCycleState.OPEN) { - for (DatanodeDetails datanode : containerWithPipeline.getPipeline().getMachines()) { - containerManager.getNodeManager().addDatanodeCommand(datanode.getUuid(), + for (DatanodeDetails datanode : + containerWithPipeline.getPipeline().getMachines()) { + CommandForDatanode closeContainerCommand = new CommandForDatanode<>( + datanode.getUuid(), new CloseContainerCommand(containerID.getId(), - info.getReplicationType())); + info.getReplicationType(), info.getPipelineID())); + publisher.fireEvent(DATANODE_COMMAND, closeContainerCommand); + publisher.fireEvent(CLOSE_CONTAINER_RETRYABLE_REQ, new + CloseContainerRetryableReq(containerID)); } try { // Finalize event will make sure the state of the container transitions // from OPEN to CLOSING in containerStateManager. - containerManager.getStateManager() - .updateContainerState(info, HddsProtos.LifeCycleEvent.FINALIZE); - } catch (SCMException ex) { - LOG.error("Failed to update the container state for container : {}" - + containerID); + containerManager.updateContainerState(containerID.getId(), + HddsProtos.LifeCycleEvent.FINALIZE); + } catch (IOException ex) { + LOG.error("Failed to update the container state to FINALIZE for" + + "container : {}" + containerID, ex); + } + } else if (info.getState() == HddsProtos.LifeCycleState.ALLOCATED) { + try { + // Create event will make sure the state of the container transitions + // from OPEN to CREATING in containerStateManager, this will move + // the container out of active allocation path. + containerManager.updateContainerState(containerID.getId(), + HddsProtos.LifeCycleEvent.CREATE); + } catch (IOException ex) { + LOG.error("Failed to update the container state to CREATE for" + + "container:{}" + containerID, ex); } } else { LOG.info("container with id : {} is in {} state and need not be closed.", @@ -93,4 +111,26 @@ public void onMessage(ContainerID containerID, EventPublisher publisher) { } } + + /** + * Class to create retryable event. Prevents redundant requests for same + * container Id. + */ + public static class CloseContainerRetryableReq implements + IdentifiableEventPayload { + + private ContainerID containerID; + public CloseContainerRetryableReq(ContainerID containerID) { + this.containerID = containerID; + } + + public ContainerID getContainerID() { + return containerID; + } + + @Override + public long getId() { + return containerID.getId(); + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/CloseContainerWatcher.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/CloseContainerWatcher.java new file mode 100644 index 00000000000..8e277b9f369 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/CloseContainerWatcher.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ *

http://www.apache.org/licenses/LICENSE-2.0 + *

+ *

Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.container; + +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatus.Status; +import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler + .CloseContainerStatus; + +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.server.events.Event; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.server.events.EventWatcher; +import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler + .CloseContainerRetryableReq; +import org.apache.hadoop.ozone.lease.LeaseManager; +import org.apache.hadoop.ozone.lease.LeaseNotFoundException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +/** + * This watcher will watch for CLOSE_CONTAINER_STATUS events fired from + * CommandStatusReport. If required it will re-trigger CloseContainer command + * for DataNodes to CloseContainerEventHandler. + */ +public class CloseContainerWatcher extends + EventWatcher { + + public static final Logger LOG = + LoggerFactory.getLogger(CloseContainerWatcher.class); + private final Mapping containerManager; + + public CloseContainerWatcher(Event startEvent, + Event completionEvent, + LeaseManager leaseManager, Mapping containerManager) { + super(startEvent, completionEvent, leaseManager); + this.containerManager = containerManager; + } + + @Override + protected void onTimeout(EventPublisher publisher, + CloseContainerRetryableReq payload) { + // Let CloseContainerEventHandler handle this message. + this.resendEventToHandler(payload.getId(), publisher); + } + + @Override + protected void onFinished(EventPublisher publisher, + CloseContainerRetryableReq payload) { + LOG.trace("CloseContainerCommand for containerId: {} executed ", payload + .getContainerID().getId()); + } + + @Override + protected synchronized void handleCompletion(CloseContainerStatus status, + EventPublisher publisher) throws LeaseNotFoundException { + // If status is PENDING then return without doing anything. + if(status.getCmdStatus().getStatus().equals(Status.PENDING)){ + return; + } + + CloseContainerRetryableReq closeCont = getTrackedEventbyId(status.getId()); + super.handleCompletion(status, publisher); + // If status is FAILED then send a msg to Handler to resend the command. + if (status.getCmdStatus().getStatus().equals(Status.FAILED) && closeCont + != null) { + this.resendEventToHandler(closeCont.getId(), publisher); + } + } + + private void resendEventToHandler(long containerID, EventPublisher + publisher) { + try { + // Check if container is still open + if (containerManager.getContainer(containerID).isContainerOpen()) { + publisher.fireEvent(SCMEvents.CLOSE_CONTAINER, + ContainerID.valueof(containerID)); + } + } catch (IOException e) { + LOG.warn("Error in CloseContainerWatcher while processing event " + + "for containerId {} ExceptionMsg: ", containerID, e.getMessage()); + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerActionsHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerActionsHandler.java new file mode 100644 index 00000000000..ce399eb89b8 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerActionsHandler.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerAction; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .ContainerActionsFromDatanode; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Handles container reports from datanode. + */ +public class ContainerActionsHandler implements + EventHandler { + + private static final Logger LOG = LoggerFactory.getLogger( + ContainerActionsHandler.class); + + @Override + public void onMessage( + ContainerActionsFromDatanode containerReportFromDatanode, + EventPublisher publisher) { + DatanodeDetails dd = containerReportFromDatanode.getDatanodeDetails(); + for (ContainerAction action : containerReportFromDatanode.getReport() + .getContainerActionsList()) { + ContainerID containerId = ContainerID.valueof(action.getContainerID()); + switch (action.getAction()) { + case CLOSE: + LOG.debug("Closing container {} in datanode {} because the" + + " container is {}.", containerId, dd, action.getReason()); + publisher.fireEvent(SCMEvents.CLOSE_CONTAINER, containerId); + break; + default: + LOG.warn("Invalid action {} with reason {}, from datanode {}. ", + action.getAction(), action.getReason(), dd); } + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerMapping.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerMapping.java index e25c5b47405..206e24be3c1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerMapping.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerMapping.java @@ -20,15 +20,20 @@ import com.google.common.base.Preconditions; import com.google.common.primitives.Longs; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.SCMContainerInfo; +import org.apache.hadoop.hdds.scm.block.PendingDeleteStatusList; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.closer.ContainerCloser; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; +import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipelines.PipelineSelector; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -38,10 +43,12 @@ .StorageContainerDatanodeProtocolProtos; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; +import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.lease.Lease; import org.apache.hadoop.ozone.lease.LeaseException; import org.apache.hadoop.ozone.lease.LeaseManager; +import org.apache.hadoop.utils.BatchOperation; import org.apache.hadoop.utils.MetadataStore; import org.apache.hadoop.utils.MetadataStoreBuilder; import org.slf4j.Logger; @@ -53,6 +60,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -60,7 +68,7 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_CONTAINER_SIZE_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_CONTAINER_SIZE_GB; + .OZONE_SCM_CONTAINER_SIZE; import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes .FAILED_TO_CHANGE_CONTAINER_STATE; import static org.apache.hadoop.hdds.server.ServerUtils.getOzoneMetaDirPath; @@ -83,8 +91,7 @@ private final PipelineSelector pipelineSelector; private final ContainerStateManager containerStateManager; private final LeaseManager containerLeaseManager; - private final float containerCloseThreshold; - private final ContainerCloser closer; + private final EventPublisher eventPublisher; private final long size; /** @@ -104,10 +111,9 @@ @SuppressWarnings("unchecked") public ContainerMapping( final Configuration conf, final NodeManager nodeManager, final int - cacheSizeMB) throws IOException { + cacheSizeMB, EventPublisher eventPublisher) throws IOException { this.nodeManager = nodeManager; this.cacheSize = cacheSizeMB; - this.closer = new ContainerCloser(nodeManager, conf); File metaDir = getOzoneMetaDirPath(conf); @@ -122,25 +128,24 @@ public ContainerMapping( this.lock = new ReentrantLock(); - this.pipelineSelector = new PipelineSelector(nodeManager, conf); + size = (long)conf.getStorageSize(OZONE_SCM_CONTAINER_SIZE, + OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); - // To be replaced with code getStorageSize once it is committed. - size = conf.getLong(OZONE_SCM_CONTAINER_SIZE_GB, - OZONE_SCM_CONTAINER_SIZE_DEFAULT) * 1024 * 1024 * 1024; - this.containerStateManager = - new ContainerStateManager(conf, this); + this.pipelineSelector = new PipelineSelector(nodeManager, + conf, eventPublisher, cacheSizeMB); - this.containerCloseThreshold = conf.getFloat( - ScmConfigKeys.OZONE_SCM_CONTAINER_CLOSE_THRESHOLD, - ScmConfigKeys.OZONE_SCM_CONTAINER_CLOSE_THRESHOLD_DEFAULT); + this.containerStateManager = + new ContainerStateManager(conf, this, pipelineSelector); LOG.trace("Container State Manager created."); + this.eventPublisher = eventPublisher; + long containerCreationLeaseTimeout = conf.getTimeDuration( ScmConfigKeys.OZONE_SCM_CONTAINER_CREATION_LEASE_TIMEOUT, ScmConfigKeys.OZONE_SCM_CONTAINER_CREATION_LEASE_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS); - LOG.trace("Starting Container Lease Manager."); - containerLeaseManager = new LeaseManager<>(containerCreationLeaseTimeout); + containerLeaseManager = new LeaseManager<>("ContainerCreation", + containerCreationLeaseTimeout); containerLeaseManager.start(); } @@ -193,14 +198,24 @@ public ContainerWithPipeline getContainerWithPipeline(long containerID) HddsProtos.SCMContainerInfo temp = HddsProtos.SCMContainerInfo.PARSER .parseFrom(containerBytes); contInfo = ContainerInfo.fromProtobuf(temp); - Pipeline pipeline = pipelineSelector - .getPipeline(contInfo.getPipelineName(), - contInfo.getReplicationType()); - - if(pipeline == null) { - pipeline = pipelineSelector - .getReplicationPipeline(contInfo.getReplicationType(), - contInfo.getReplicationFactor()); + + Pipeline pipeline; + if (contInfo.isContainerOpen()) { + // If pipeline with given pipeline Id already exist return it + pipeline = pipelineSelector.getPipeline(contInfo.getPipelineID()); + } else { + // For close containers create pipeline from datanodes with replicas + Set dnWithReplicas = containerStateManager + .getContainerReplicas(contInfo.containerID()); + if (dnWithReplicas.size() == 0) { + throw new SCMException("Can't create a pipeline for container with " + + "no replica.", ResultCodes.NO_REPLICA_FOUND); + } + pipeline = + new Pipeline(dnWithReplicas.iterator().next().getUuidString(), + contInfo.getState(), ReplicationType.STAND_ALONE, + contInfo.getReplicationFactor(), PipelineID.randomId()); + dnWithReplicas.forEach(pipeline::addMember); } return new ContainerWithPipeline(contInfo, pipeline); } finally { @@ -259,12 +274,6 @@ public ContainerWithPipeline allocateContainer( ContainerInfo containerInfo; ContainerWithPipeline containerWithPipeline; - if (!nodeManager.isOutOfChillMode()) { - throw new SCMException( - "Unable to create container while in chill mode", - SCMException.ResultCodes.CHILL_MODE_EXCEPTION); - } - lock.lock(); try { containerWithPipeline = containerStateManager.allocateContainer( @@ -372,6 +381,10 @@ public void deleteContainer(long containerID) throws IOException { // Like releasing the lease in case of BEGIN_CREATE. ContainerInfo updatedContainer = containerStateManager .updateContainerState(containerInfo, event); + if (!updatedContainer.isContainerOpen()) { + pipelineSelector.removeContainerFromPipeline( + containerInfo.getPipelineID(), containerID); + } containerStore.put(dbKey, updatedContainer.getProtobuf().toByteArray()); return updatedContainer.getState(); } catch (LeaseException e) { @@ -390,8 +403,13 @@ public void deleteContainer(long containerID) throws IOException { */ public void updateDeleteTransactionId(Map deleteTransactionMap) throws IOException { + if (deleteTransactionMap == null) { + return; + } + lock.lock(); try { + BatchOperation batch = new BatchOperation(); for (Map.Entry entry : deleteTransactionMap.entrySet()) { long containerID = entry.getKey(); byte[] dbKey = Longs.toByteArray(containerID); @@ -405,10 +423,11 @@ public void updateDeleteTransactionId(Map deleteTransactionMap) ContainerInfo containerInfo = ContainerInfo.fromProtobuf( HddsProtos.SCMContainerInfo.parseFrom(containerBytes)); containerInfo.updateDeleteTransactionId(entry.getValue()); - containerStore.put(dbKey, containerInfo.getProtobuf().toByteArray()); - containerStateManager - .updateDeleteTransactionId(containerID, entry.getValue()); + batch.put(dbKey, containerInfo.getProtobuf().toByteArray()); } + containerStore.writeBatch(batch); + containerStateManager + .updateDeleteTransactionId(deleteTransactionMap); } finally { lock.unlock(); } @@ -427,32 +446,44 @@ public ContainerStateManager getStateManager() { /** * Return a container matching the attributes specified. * - * @param size - Space needed in the Container. + * @param sizeRequired - Space needed in the Container. * @param owner - Owner of the container - A specific nameservice. * @param type - Replication Type {StandAlone, Ratis} * @param factor - Replication Factor {ONE, THREE} * @param state - State of the Container-- {Open, Allocated etc.} * @return ContainerInfo, null if there is no match found. */ - public ContainerWithPipeline getMatchingContainerWithPipeline(final long size, - String owner, ReplicationType type, ReplicationFactor factor, - LifeCycleState state) throws IOException { + public ContainerWithPipeline getMatchingContainerWithPipeline( + final long sizeRequired, String owner, ReplicationType type, + ReplicationFactor factor, LifeCycleState state) throws IOException { ContainerInfo containerInfo = getStateManager() - .getMatchingContainer(size, owner, type, factor, state); + .getMatchingContainer(sizeRequired, owner, type, factor, state); if (containerInfo == null) { return null; } Pipeline pipeline = pipelineSelector - .getPipeline(containerInfo.getPipelineName(), - containerInfo.getReplicationType()); - if (pipeline == null) { - pipelineSelector - .getReplicationPipeline(containerInfo.getReplicationType(), - containerInfo.getReplicationFactor()); - } + .getPipeline(containerInfo.getPipelineID()); return new ContainerWithPipeline(containerInfo, pipeline); } + public void handlePipelineClose(PipelineID pipelineID) { + try { + Pipeline pipeline = pipelineSelector.getPipeline(pipelineID); + if (pipeline != null) { + pipelineSelector.finalizePipeline(pipeline); + } else { + LOG.debug("pipeline:{} not found", pipelineID); + } + } catch (Exception e) { + LOG.info("failed to close pipeline:{}", pipelineID, e); + } + } + + public Set getPipelineOnDatanode( + DatanodeDetails datanodeDetails) { + return pipelineSelector.getPipelineId(datanodeDetails.getUuid()); + } + /** * Process container report from Datanode. *

@@ -472,14 +503,26 @@ public ContainerWithPipeline getMatchingContainerWithPipeline(final long size, */ @Override public void processContainerReports(DatanodeDetails datanodeDetails, - ContainerReportsProto reports) + ContainerReportsProto reports, boolean isRegisterCall) throws IOException { List containerInfos = reports.getReportsList(); - - for (StorageContainerDatanodeProtocolProtos.ContainerInfo datanodeState : + PendingDeleteStatusList pendingDeleteStatusList = + new PendingDeleteStatusList(datanodeDetails); + for (StorageContainerDatanodeProtocolProtos.ContainerInfo contInfo : containerInfos) { - byte[] dbKey = Longs.toByteArray(datanodeState.getContainerID()); + // Update replica info during registration process. + if (isRegisterCall) { + try { + getStateManager().addContainerReplica(ContainerID. + valueof(contInfo.getContainerID()), datanodeDetails); + } catch (Exception ex) { + // Continue to next one after logging the error. + LOG.error("Error while adding replica for containerId {}.", + contInfo.getContainerID(), ex); + } + } + byte[] dbKey = Longs.toByteArray(contInfo.getContainerID()); lock.lock(); try { byte[] containerBytes = containerStore.get(dbKey); @@ -487,8 +530,28 @@ public void processContainerReports(DatanodeDetails datanodeDetails, HddsProtos.SCMContainerInfo knownState = HddsProtos.SCMContainerInfo.PARSER.parseFrom(containerBytes); + if (knownState.getState() == LifeCycleState.CLOSING + && contInfo.getState() == LifeCycleState.CLOSED) { + + updateContainerState(contInfo.getContainerID(), + LifeCycleEvent.CLOSE); + + //reread the container + knownState = + HddsProtos.SCMContainerInfo.PARSER + .parseFrom(containerStore.get(dbKey)); + } + HddsProtos.SCMContainerInfo newState = - reconcileState(datanodeState, knownState, datanodeDetails); + reconcileState(contInfo, knownState, datanodeDetails); + + if (knownState.getDeleteTransactionId() > contInfo + .getDeleteTransactionId()) { + pendingDeleteStatusList + .addPendingDeleteStatus(contInfo.getDeleteTransactionId(), + knownState.getDeleteTransactionId(), + knownState.getContainerID()); + } // FIX ME: This can be optimized, we write twice to memory, where a // single write would work well. @@ -497,28 +560,22 @@ public void processContainerReports(DatanodeDetails datanodeDetails, // the updated State. containerStore.put(dbKey, newState.toByteArray()); - // If the container is closed, then state is already written to SCM - Pipeline pipeline = pipelineSelector.getPipeline(newState.getPipelineName(), newState.getReplicationType()); - if(pipeline == null) { - pipeline = pipelineSelector - .getReplicationPipeline(newState.getReplicationType(), - newState.getReplicationFactor()); - } - // DB.TODO: So can we can write only once to DB. - if (closeContainerIfNeeded(newState, pipeline)) { - LOG.info("Closing the Container: {}", newState.getContainerID()); - } } else { // Container not found in our container db. LOG.error("Error while processing container report from datanode :" + " {}, for container: {}, reason: container doesn't exist in" + "container database.", datanodeDetails, - datanodeState.getContainerID()); + contInfo.getContainerID()); } } finally { lock.unlock(); } } + if (pendingDeleteStatusList.getNumPendingDeletes() > 0) { + eventPublisher.fireEvent(SCMEvents.PENDING_DELETE_STATUS, + pendingDeleteStatusList); + } + } /** @@ -535,12 +592,12 @@ public void processContainerReports(DatanodeDetails datanodeDetails, HddsProtos.SCMContainerInfo.Builder builder = HddsProtos.SCMContainerInfo.newBuilder(); builder.setContainerID(knownState.getContainerID()) - .setPipelineName(knownState.getPipelineName()) + .setPipelineID(knownState.getPipelineID()) .setReplicationType(knownState.getReplicationType()) .setReplicationFactor(knownState.getReplicationFactor()); - // TODO: If current state doesn't have this DN in list of DataNodes with replica - // then add it in list of replicas. + // TODO: If current state doesn't have this DN in list of DataNodes with + // replica then add it in list of replicas. // If used size is greater than allocated size, we will be updating // allocated size with used size. This update is done as a fallback @@ -563,52 +620,6 @@ public void processContainerReports(DatanodeDetails datanodeDetails, return builder.build(); } - /** - * Queues the close container command, to datanode and writes the new state - * to container DB. - *

- * TODO : Remove this 2 ContainerInfo definitions. It is brain dead to have - * one protobuf in one file and another definition in another file. - * - * @param newState - This is the state we maintain in SCM. - * @param pipeline - * @throws IOException - */ - private boolean closeContainerIfNeeded(SCMContainerInfo newState, - Pipeline pipeline) - throws IOException { - float containerUsedPercentage = 1.0f * - newState.getUsedBytes() / this.size; - - ContainerInfo scmInfo = getContainer(newState.getContainerID()); - if (containerUsedPercentage >= containerCloseThreshold - && !isClosed(scmInfo)) { - // We will call closer till get to the closed state. - // That is SCM will make this call repeatedly until we reach the closed - // state. - closer.close(newState, pipeline); - - if (shouldClose(scmInfo)) { - // This event moves the Container from Open to Closing State, this is - // a state inside SCM. This is the desired state that SCM wants this - // container to reach. We will know that a container has reached the - // closed state from container reports. This state change should be - // invoked once and only once. - HddsProtos.LifeCycleState state = updateContainerState( - scmInfo.getContainerID(), - HddsProtos.LifeCycleEvent.FINALIZE); - if (state != HddsProtos.LifeCycleState.CLOSING) { - LOG.error("Failed to close container {}, reason : Not able " + - "to " + - "update container state, current container state: {}.", - newState.getContainerID(), state); - return false; - } - return true; - } - } - return false; - } /** * In Container is in closed state, if it is in closed, Deleting or Deleted @@ -625,11 +636,6 @@ private boolean isClosed(ContainerInfo info) { return info.getState() == HddsProtos.LifeCycleState.CLOSED; } - @VisibleForTesting - public ContainerCloser getCloser() { - return closer; - } - /** * Closes this stream and releases any system resources associated with it. * If the stream is @@ -656,6 +662,10 @@ public void close() throws IOException { if (containerStore != null) { containerStore.close(); } + + if (pipelineSelector != null) { + pipelineSelector.shutdown(); + } } /** @@ -679,21 +689,7 @@ public void flushContainerInfo() throws IOException { // return info of a deleted container. may revisit this in the future, // for now, just skip a not-found container if (containerBytes != null) { - HddsProtos.SCMContainerInfo oldInfoProto = - HddsProtos.SCMContainerInfo.PARSER.parseFrom(containerBytes); - ContainerInfo oldInfo = ContainerInfo.fromProtobuf(oldInfoProto); - ContainerInfo newInfo = new ContainerInfo.Builder() - .setAllocatedBytes(info.getAllocatedBytes()) - .setNumberOfKeys(oldInfo.getNumberOfKeys()) - .setOwner(oldInfo.getOwner()) - .setPipelineName(oldInfo.getPipelineName()) - .setState(oldInfo.getState()) - .setUsedBytes(oldInfo.getUsedBytes()) - .setDeleteTransactionId(oldInfo.getDeleteTransactionId()) - .setReplicationFactor(oldInfo.getReplicationFactor()) - .setReplicationType(oldInfo.getReplicationType()) - .build(); - containerStore.put(dbKey, newInfo.getProtobuf().toByteArray()); + containerStore.put(dbKey, info.getProtobuf().toByteArray()); } else { LOG.debug("Container state manager has container {} but not found " + "in container store, a deleted container?", @@ -709,13 +705,13 @@ public void flushContainerInfo() throws IOException { } } - @Override - public NodeManager getNodeManager() { - return nodeManager; - } - @VisibleForTesting public MetadataStore getContainerStore() { return containerStore; } + + @VisibleForTesting + public PipelineSelector getPipelineSelector() { + return pipelineSelector; + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java new file mode 100644 index 00000000000..dcbd49c5ec7 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.container; + +import java.io.IOException; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationActivityStatus; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationRequest; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.states.Node2ContainerMap; +import org.apache.hadoop.hdds.scm.node.states.ReportResult; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.ContainerReportFromDatanode; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Handles container reports from datanode. + */ +public class ContainerReportHandler implements + EventHandler { + + private static final Logger LOG = + LoggerFactory.getLogger(ContainerReportHandler.class); + + private final Node2ContainerMap node2ContainerMap; + + private final Mapping containerMapping; + + private ContainerStateManager containerStateManager; + + private ReplicationActivityStatus replicationStatus; + + public ContainerReportHandler(Mapping containerMapping, + Node2ContainerMap node2ContainerMap, + ReplicationActivityStatus replicationActivityStatus) { + Preconditions.checkNotNull(containerMapping); + Preconditions.checkNotNull(node2ContainerMap); + Preconditions.checkNotNull(replicationActivityStatus); + this.containerStateManager = containerMapping.getStateManager(); + this.containerMapping = containerMapping; + this.node2ContainerMap = node2ContainerMap; + this.replicationStatus = replicationActivityStatus; + } + + @Override + public void onMessage(ContainerReportFromDatanode containerReportFromDatanode, + EventPublisher publisher) { + + DatanodeDetails datanodeOrigin = + containerReportFromDatanode.getDatanodeDetails(); + + ContainerReportsProto containerReport = + containerReportFromDatanode.getReport(); + try { + + //update state in container db and trigger close container events + containerMapping + .processContainerReports(datanodeOrigin, containerReport, false); + + Set containerIds = containerReport.getReportsList().stream() + .map(StorageContainerDatanodeProtocolProtos + .ContainerInfo::getContainerID) + .map(ContainerID::new) + .collect(Collectors.toSet()); + + ReportResult reportResult = node2ContainerMap + .processReport(datanodeOrigin.getUuid(), containerIds); + + //we have the report, so we can update the states for the next iteration. + node2ContainerMap + .setContainersForDatanode(datanodeOrigin.getUuid(), containerIds); + + for (ContainerID containerID : reportResult.getMissingContainers()) { + containerStateManager + .removeContainerReplica(containerID, datanodeOrigin); + checkReplicationState(containerID, publisher); + } + + for (ContainerID containerID : reportResult.getNewContainers()) { + containerStateManager.addContainerReplica(containerID, datanodeOrigin); + checkReplicationState(containerID, publisher); + } + + } catch (IOException e) { + //TODO: stop all the replication? + LOG.error("Error on processing container report from datanode {}", + datanodeOrigin, e); + } + + } + + private void checkReplicationState(ContainerID containerID, + EventPublisher publisher) + throws SCMException { + ContainerInfo container = containerStateManager.getContainer(containerID); + + if (container == null) { + //warning unknown container + LOG.warn( + "Container is missing from containerStateManager. Can't request " + + "replication. {}", + containerID); + return; + } + if (container.isContainerOpen()) { + return; + } + + ReplicationRequest replicationState = + containerStateManager.checkReplicationState(containerID); + if (replicationState != null) { + if (replicationStatus.isReplicationEnabled()) { + publisher.fireEvent(SCMEvents.REPLICATE_CONTAINER, + replicationState); + } else { + LOG.warn( + "Over/under replicated container but the replication is not " + + "(yet) enabled: " + + replicationState.toString()); + } + } + + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java index 870ab1d1e5e..930c098f0f5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManager.java @@ -17,12 +17,17 @@ package org.apache.hadoop.hdds.scm.container; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationRequest; import org.apache.hadoop.hdds.scm.container.states.ContainerState; import org.apache.hadoop.hdds.scm.container.states.ContainerStateMap; import org.apache.hadoop.hdds.scm.exceptions.SCMException; @@ -32,11 +37,11 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; -import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.common.statemachine .InvalidStateTransitionException; import org.apache.hadoop.ozone.common.statemachine.StateMachine; import org.apache.hadoop.util.Time; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,6 +50,7 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.NavigableSet; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -130,7 +136,7 @@ */ @SuppressWarnings("unchecked") public ContainerStateManager(Configuration configuration, - Mapping containerMapping) { + Mapping containerMapping, PipelineSelector pipelineSelector) { // Initialize the container state machine. Set finalStates = new HashSet(); @@ -144,17 +150,19 @@ public ContainerStateManager(Configuration configuration, finalStates); initializeStateMachine(); - this.containerSize = OzoneConsts.GB * configuration.getInt( - ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_GB, - ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT); + this.containerSize = (long) configuration.getStorageSize( + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, + StorageUnit.BYTES); lastUsedMap = new ConcurrentHashMap<>(); containerCount = new AtomicLong(0); containers = new ContainerStateMap(); - loadExistingContainers(containerMapping); + loadExistingContainers(containerMapping, pipelineSelector); } - private void loadExistingContainers(Mapping containerMapping) { + private void loadExistingContainers(Mapping containerMapping, + PipelineSelector pipelineSelector) { List containerList; try { @@ -176,6 +184,8 @@ private void loadExistingContainers(Mapping containerMapping) { long maxID = 0; for (ContainerInfo container : containerList) { containers.addContainer(container); + pipelineSelector.addContainerToPipeline( + container.getPipelineID(), container.getContainerID()); if (maxID < container.getContainerID()) { maxID = container.getContainerID(); @@ -283,9 +293,10 @@ private void initializeStateMachine() { * @return ContainerWithPipeline * @throws IOException on Failure. */ - public ContainerWithPipeline allocateContainer(PipelineSelector selector, HddsProtos - .ReplicationType type, HddsProtos.ReplicationFactor replicationFactor, - String owner) throws IOException { + public ContainerWithPipeline allocateContainer(PipelineSelector selector, + HddsProtos.ReplicationType type, + HddsProtos.ReplicationFactor replicationFactor, String owner) + throws IOException { Pipeline pipeline = selector.getReplicationPipeline(type, replicationFactor); @@ -294,9 +305,10 @@ public ContainerWithPipeline allocateContainer(PipelineSelector selector, HddsPr + "replication=%s couldn't be found for the new container. " + "Do you have enough nodes?", type, replicationFactor); + long containerID = containerCount.incrementAndGet(); ContainerInfo containerInfo = new ContainerInfo.Builder() .setState(HddsProtos.LifeCycleState.ALLOCATED) - .setPipelineName(pipeline.getPipelineName()) + .setPipelineID(pipeline.getId()) // This is bytes allocated for blocks inside container, not the // container size .setAllocatedBytes(0) @@ -304,11 +316,12 @@ public ContainerWithPipeline allocateContainer(PipelineSelector selector, HddsPr .setNumberOfKeys(0) .setStateEnterTime(Time.monotonicNow()) .setOwner(owner) - .setContainerID(containerCount.incrementAndGet()) + .setContainerID(containerID) .setDeleteTransactionId(0) .setReplicationFactor(replicationFactor) .setReplicationType(pipeline.getType()) .build(); + selector.addContainerToPipeline(pipeline.getId(), containerID); Preconditions.checkNotNull(containerInfo); containers.addContainer(containerInfo); LOG.trace("New container allocated: {}", containerInfo); @@ -358,13 +371,14 @@ public ContainerInfo updateContainerInfo(ContainerInfo info) /** * Update deleteTransactionId for a container. * - * @param containerID ContainerID of the container whose delete - * transactionId needs to be updated. - * @param transactionId latest transactionId to be updated for the container + * @param deleteTransactionMap maps containerId to its new + * deleteTransactionID */ - public void updateDeleteTransactionId(Long containerID, long transactionId) { - containers.getContainerMap().get(ContainerID.valueof(containerID)) - .updateDeleteTransactionId(transactionId); + public void updateDeleteTransactionId(Map deleteTransactionMap) { + for (Map.Entry entry : deleteTransactionMap.entrySet()) { + containers.getContainerMap().get(ContainerID.valueof(entry.getKey())) + .updateDeleteTransactionId(entry.getValue()); + } } /** @@ -392,7 +406,7 @@ public ContainerInfo getMatchingContainer(final long size, // container ID. ContainerState key = new ContainerState(owner, type, factor); ContainerID lastID = lastUsedMap.get(key); - if(lastID == null) { + if (lastID == null) { lastID = matchingSet.first(); } @@ -419,7 +433,7 @@ public ContainerInfo getMatchingContainer(final long size, selectedContainer = findContainerWithSpace(size, resultSet, owner); } // Update the allocated Bytes on this container. - if(selectedContainer != null) { + if (selectedContainer != null) { selectedContainer.updateAllocatedBytes(size); } return selectedContainer; @@ -430,7 +444,7 @@ private ContainerInfo findContainerWithSpace(long size, NavigableSet searchSet, String owner) { // Get the container with space to meet our request. for (ContainerID id : searchSet) { - ContainerInfo containerInfo = containers.getContainerInfo(id.getId()); + ContainerInfo containerInfo = containers.getContainerInfo(id); if (containerInfo.getAllocatedBytes() + size <= this.containerSize) { containerInfo.updateLastUsedTime(); @@ -468,9 +482,9 @@ private ContainerInfo findContainerWithSpace(long size, * @throws IOException */ public ContainerWithPipeline getContainer(PipelineSelector selector, - ContainerID containerID) throws IOException { + ContainerID containerID) { ContainerInfo info = containers.getContainerInfo(containerID.getId()); - Pipeline pipeline = selector.getPipeline(info.getPipelineName(), info.getReplicationType()); + Pipeline pipeline = selector.getPipeline(info.getPipelineID()); return new ContainerWithPipeline(info, pipeline); } @@ -481,11 +495,76 @@ public ContainerWithPipeline getContainer(PipelineSelector selector, * @throws IOException */ public ContainerInfo getContainer(ContainerID containerID) { - return containers.getContainerInfo(containerID.getId()); + return containers.getContainerInfo(containerID); } @Override public void close() throws IOException { } + /** + * Returns the latest list of DataNodes where replica for given containerId + * exist. Throws an SCMException if no entry is found for given containerId. + * + * @param containerID + * @return Set + */ + public Set getContainerReplicas(ContainerID containerID) + throws SCMException { + return containers.getContainerReplicas(containerID); + } + + /** + * Add a container Replica for given DataNode. + * + * @param containerID + * @param dn + */ + public void addContainerReplica(ContainerID containerID, DatanodeDetails dn) { + containers.addContainerReplica(containerID, dn); + } + + /** + * Remove a container Replica for given DataNode. + * + * @param containerID + * @param dn + * @return True of dataNode is removed successfully else false. + */ + public boolean removeContainerReplica(ContainerID containerID, + DatanodeDetails dn) throws SCMException { + return containers.removeContainerReplica(containerID, dn); + } + + /** + * Compare the existing replication number with the expected one. + */ + public ReplicationRequest checkReplicationState(ContainerID containerID) + throws SCMException { + int existingReplicas = getContainerReplicas(containerID).size(); + int expectedReplicas = getContainer(containerID) + .getReplicationFactor().getNumber(); + if (existingReplicas != expectedReplicas) { + return new ReplicationRequest(containerID.getId(), existingReplicas, + expectedReplicas); + } + return null; + } + + /** + * Checks if the container is open. + */ + public boolean isOpen(ContainerID containerID) { + Preconditions.checkNotNull(containerID); + ContainerInfo container = Preconditions + .checkNotNull(getContainer(containerID), + "Container can't be found " + containerID); + return container.isContainerOpen(); + } + + @VisibleForTesting + public ContainerStateMap getContainerStateMap() { + return containers; + } + } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/Mapping.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/Mapping.java index f52eb05ce01..1b0c57c3522 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/Mapping.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/Mapping.java @@ -25,12 +25,13 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; -import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import java.io.Closeable; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Set; /** * Mapping class contains the mapping from a name to a pipeline mapping. This is @@ -116,7 +117,7 @@ ContainerWithPipeline allocateContainer(HddsProtos.ReplicationType type, * @param reports Container report */ void processContainerReports(DatanodeDetails datanodeDetails, - ContainerReportsProto reports) + ContainerReportsProto reports, boolean isRegisterCall) throws IOException; /** @@ -129,17 +130,23 @@ void processContainerReports(DatanodeDetails datanodeDetails, void updateDeleteTransactionId(Map deleteTransactionMap) throws IOException; - /** - * Returns the nodeManager. - * @return NodeManager - */ - NodeManager getNodeManager(); - /** * Returns the ContainerWithPipeline. * @return NodeManager */ - public ContainerWithPipeline getMatchingContainerWithPipeline(final long size, + ContainerWithPipeline getMatchingContainerWithPipeline(long size, String owner, ReplicationType type, ReplicationFactor factor, LifeCycleState state) throws IOException; + + /** + * Handle a pipeline close event. + * @param pipelineID pipeline id + */ + void handlePipelineClose(PipelineID pipelineID); + + /** + * Get set of pipeline for a specific datanode. + * @param datanodeDetails datanode for which pipelines needs to be fetched. + */ + Set getPipelineOnDatanode(DatanodeDetails datanodeDetails); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/closer/ContainerCloser.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/closer/ContainerCloser.java deleted file mode 100644 index 3ca8ba91e65..00000000000 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/closer/ContainerCloser.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package org.apache.hadoop.hdds.scm.container.closer; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.SCMContainerInfo; -import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; -import org.apache.hadoop.hdds.scm.node.NodeManager; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand; -import org.apache.hadoop.util.Time; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.apache.hadoop.ozone.OzoneConfigKeys - .OZONE_CONTAINER_REPORT_INTERVAL; -import static org.apache.hadoop.ozone.OzoneConfigKeys - .OZONE_CONTAINER_REPORT_INTERVAL_DEFAULT; - -/** - * A class that manages closing of containers. This allows transition from a - * open but full container to a closed container, to which no data is written. - */ -public class ContainerCloser { - private static final Logger LOG = - LoggerFactory.getLogger(ContainerCloser.class); - private static final long MULTIPLIER = 3L; - private static final int CLEANUP_WATER_MARK = 1000; - private final NodeManager nodeManager; - private final Map commandIssued; - private final Configuration configuration; - private final AtomicInteger mapCount; - private final long reportInterval; - private final AtomicInteger threadRunCount; - private final AtomicBoolean isRunning; - - /** - * Constructs the ContainerCloser class. - * - * @param nodeManager - NodeManager - * @param conf - Configuration - */ - public ContainerCloser(NodeManager nodeManager, Configuration conf) { - Preconditions.checkNotNull(nodeManager); - Preconditions.checkNotNull(conf); - this.nodeManager = nodeManager; - this.configuration = conf; - this.commandIssued = new ConcurrentHashMap<>(); - this.mapCount = new AtomicInteger(0); - this.threadRunCount = new AtomicInteger(0); - this.isRunning = new AtomicBoolean(false); - this.reportInterval = this.configuration.getTimeDuration( - OZONE_CONTAINER_REPORT_INTERVAL, - OZONE_CONTAINER_REPORT_INTERVAL_DEFAULT, TimeUnit.SECONDS); - Preconditions.checkState(this.reportInterval > 0, - "report interval has to be greater than 0"); - } - - @VisibleForTesting - public static int getCleanupWaterMark() { - return CLEANUP_WATER_MARK; - } - - /** - * Sends a Container Close command to the data nodes where this container - * lives. - * - * @param info - ContainerInfo. - * @param pipeline - */ - public void close(SCMContainerInfo info, - Pipeline pipeline) { - - if (commandIssued.containsKey(info.getContainerID())) { - // We check if we issued a close command in last 3 * reportInterval secs. - long commandQueueTime = commandIssued.get(info.getContainerID()); - long currentTime = TimeUnit.MILLISECONDS.toSeconds(Time.monotonicNow()); - if (currentTime > commandQueueTime + (MULTIPLIER * reportInterval)) { - commandIssued.remove(info.getContainerID()); - mapCount.decrementAndGet(); - } else { - // Ignore this request, since we just issued a close command. We - // should wait instead of sending a command to datanode again. - return; - } - } - - // if we reached here, it means that we have not issued a command to the - // data node in last (3 times report interval). We are presuming that is - // enough time to close the container. Let us go ahead and queue a close - // to all the datanodes that participate in the container. - // - // Three important things to note here: - // - // 1. It is ok to send this command multiple times to a datanode. Close - // container is an idempotent command, if the container is already closed - // then we have no issues. - // - // 2. The container close command is issued to all datanodes. But - // depending on the pipeline type, some of the datanodes might ignore it. - // - // 3. SCM will see that datanode is closed from container reports, but it - // is possible that datanodes might get close commands since - // this queue can be emptied by a datanode after a close report is send - // to SCM. In that case also, data node will ignore this command. - - for (DatanodeDetails datanodeDetails : pipeline.getMachines()) { - nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), - new CloseContainerCommand(info.getContainerID(), - info.getReplicationType())); - } - if (!commandIssued.containsKey(info.getContainerID())) { - commandIssued.put(info.getContainerID(), - TimeUnit.MILLISECONDS.toSeconds(Time.monotonicNow())); - mapCount.incrementAndGet(); - } - // run the hash map cleaner thread if needed, non-blocking call. - runCleanerThreadIfNeeded(); - } - - private void runCleanerThreadIfNeeded() { - // Let us check if we should run a cleaner thread, not using map.size - // since it runs a loop in the case of the concurrentMap. - if (mapCount.get() > CLEANUP_WATER_MARK && - isRunning.compareAndSet(false, true)) { - Runnable entryCleaner = () -> { - LOG.debug("Starting close container Hash map cleaner."); - try { - for (Map.Entry entry : commandIssued.entrySet()) { - long commandQueueTime = entry.getValue(); - if (commandQueueTime + (MULTIPLIER * reportInterval) > - TimeUnit.MILLISECONDS.toSeconds(Time.monotonicNow())) { - - // It is possible for this remove to fail due to race conditions. - // No big deal we will cleanup next time. - commandIssued.remove(entry.getKey()); - mapCount.decrementAndGet(); - } - } - isRunning.compareAndSet(true, false); - LOG.debug("Finished running, close container Hash map cleaner."); - } catch (Exception ex) { - LOG.error("Unable to finish cleaning the closed containers map.", ex); - } - }; - - // Launch the cleaner thread when we need instead of having a daemon - // thread that is sleeping all the time. We need to set the Daemon to - // true to avoid blocking clean exits. - Thread cleanerThread = new ThreadFactoryBuilder() - .setDaemon(true) - .setNameFormat("Closed Container Cleaner Thread - %d") - .build().newThread(entryCleaner); - threadRunCount.incrementAndGet(); - cleanerThread.start(); - } - } - - @VisibleForTesting - public int getThreadRunCount() { - return threadRunCount.get(); - } - - @VisibleForTesting - public int getCloseCount() { - return mapCount.get(); - } -} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java index 5d91ac5dad1..3336c8e80e7 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicy.java @@ -31,11 +31,14 @@ /** * Given the replication factor and size required, return set of datanodes * that satisfy the nodes and size requirement. + * + * @param excludedNodes - list of nodes to be excluded. * @param nodesRequired - number of datanodes required. * @param sizeRequired - size required for the container or block. * @return list of datanodes chosen. * @throws IOException */ - List chooseDatanodes(int nodesRequired, long sizeRequired) + List chooseDatanodes(List excludedNodes, + int nodesRequired, long sizeRequired) throws IOException; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java index 0a595d55ba5..ba241dcabcf 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java @@ -95,16 +95,20 @@ public Configuration getConf() { * 3. if a set of containers are requested, we either meet the required * number of nodes or we fail that request. * + * + * @param excludedNodes - datanodes with existing replicas * @param nodesRequired - number of datanodes required. * @param sizeRequired - size required for the container or block. * @return list of datanodes chosen. * @throws SCMException SCM exception. */ - public List chooseDatanodes(int nodesRequired, final long - sizeRequired) throws SCMException { + public List chooseDatanodes( + List excludedNodes, + int nodesRequired, final long sizeRequired) throws SCMException { List healthyNodes = nodeManager.getNodes(HddsProtos.NodeState.HEALTHY); + healthyNodes.removeAll(excludedNodes); String msg; if (healthyNodes.size() == 0) { msg = "No healthy node found to allocate container."; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java index 85a6b544cce..8df8f6e034d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java @@ -17,17 +17,18 @@ package org.apache.hadoop.hdds.scm.container.placement.algorithms; -import com.google.common.annotations.VisibleForTesting; +import java.util.List; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.node.NodeManager; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; + +import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.List; - /** * Container placement policy that randomly choose datanodes with remaining * space to satisfy the size constraints. @@ -83,6 +84,8 @@ public SCMContainerPlacementCapacity(final NodeManager nodeManager, /** * Called by SCM to choose datanodes. * + * + * @param excludedNodes - list of the datanodes to exclude. * @param nodesRequired - number of datanodes required. * @param sizeRequired - size required for the container or block. * @return List of datanodes. @@ -90,9 +93,10 @@ public SCMContainerPlacementCapacity(final NodeManager nodeManager, */ @Override public List chooseDatanodes( - final int nodesRequired, final long sizeRequired) throws SCMException { + List excludedNodes, final int nodesRequired, + final long sizeRequired) throws SCMException { List healthyNodes = - super.chooseDatanodes(nodesRequired, sizeRequired); + super.chooseDatanodes(excludedNodes, nodesRequired, sizeRequired); if (healthyNodes.size() == nodesRequired) { return healthyNodes; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java index 9903c84e317..76702d555ef 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java @@ -56,6 +56,8 @@ public SCMContainerPlacementRandom(final NodeManager nodeManager, /** * Choose datanodes called by the SCM to choose the datanode. * + * + * @param excludedNodes - list of the datanodes to exclude. * @param nodesRequired - number of datanodes required. * @param sizeRequired - size required for the container or block. * @return List of Datanodes. @@ -63,9 +65,10 @@ public SCMContainerPlacementRandom(final NodeManager nodeManager, */ @Override public List chooseDatanodes( - final int nodesRequired, final long sizeRequired) throws SCMException { + List excludedNodes, final int nodesRequired, + final long sizeRequired) throws SCMException { List healthyNodes = - super.chooseDatanodes(nodesRequired, sizeRequired); + super.chooseDatanodes(excludedNodes, nodesRequired, sizeRequired); if (healthyNodes.size() == nodesRequired) { return healthyNodes; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationActivityStatus.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationActivityStatus.java new file mode 100644 index 00000000000..993a9860244 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationActivityStatus.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +import javax.management.ObjectName; +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.metrics2.util.MBeans; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Event listener to track the current state of replication. + */ +public class ReplicationActivityStatus implements + ReplicationActivityStatusMXBean, Closeable { + + private static final Logger LOG = + LoggerFactory.getLogger(ReplicationActivityStatus.class); + + private AtomicBoolean replicationEnabled = new AtomicBoolean(); + private AtomicBoolean replicationStatusSetExternally = new AtomicBoolean(); + private ObjectName jmxObjectName; + private ReplicationStatusListener replicationStatusListener; + private ChillModeStatusListener chillModeStatusListener; + + public ReplicationActivityStatus(){ + replicationStatusListener = new ReplicationStatusListener(); + chillModeStatusListener = new ChillModeStatusListener(); + } + + public boolean isReplicationEnabled() { + return replicationEnabled.get(); + } + + @VisibleForTesting + public void setReplicationEnabled(boolean enabled) { + replicationEnabled.set(enabled); + } + + @VisibleForTesting + public void enableReplication() { + replicationEnabled.set(true); + } + + + public void start() { + try { + this.jmxObjectName = + MBeans.register( + "StorageContainerManager", "ReplicationActivityStatus", this); + } catch (Exception ex) { + LOG.error("JMX bean for ReplicationActivityStatus can't be registered", + ex); + } + } + + @Override + public void close() throws IOException { + if (this.jmxObjectName != null) { + MBeans.unregister(jmxObjectName); + } + } + + /** + * Replication status listener. + */ + class ReplicationStatusListener implements EventHandler { + @Override + public void onMessage(Boolean status, EventPublisher publisher) { + replicationStatusSetExternally.set(true); + replicationEnabled.set(status); + } + } + + /** + * Replication status is influenced by Chill mode status as well. + */ + class ChillModeStatusListener implements EventHandler { + + @Override + public void onMessage(Boolean inChillMode, EventPublisher publisher) { + if (!replicationStatusSetExternally.get()) { + replicationEnabled.set(!inChillMode); + } + } + } + + public ReplicationStatusListener getReplicationStatusListener() { + return replicationStatusListener; + } + + public ChillModeStatusListener getChillModeStatusListener() { + return chillModeStatusListener; + } + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationActivityStatusMXBean.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationActivityStatusMXBean.java new file mode 100644 index 00000000000..164bd247efb --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationActivityStatusMXBean.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +/** + * JMX interface to monitor replication status. + */ +public interface ReplicationActivityStatusMXBean { + + boolean isReplicationEnabled(); + + void setReplicationEnabled(boolean enabled); +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationCommandWatcher.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationCommandWatcher.java new file mode 100644 index 00000000000..03a81a7db86 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationCommandWatcher.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager + .ReplicationCompleted; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager + .ReplicationRequestToRepeat; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.server.events.Event; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.server.events.EventWatcher; +import org.apache.hadoop.ozone.lease.LeaseManager; + +/** + * Command watcher to track the replication commands. + */ +public class ReplicationCommandWatcher + extends + EventWatcher { + + public ReplicationCommandWatcher(Event startEvent, + Event completionEvent, + LeaseManager leaseManager) { + super(startEvent, completionEvent, leaseManager); + } + + @Override + protected void onTimeout(EventPublisher publisher, + ReplicationRequestToRepeat payload) { + //put back to the original queue + publisher.fireEvent(SCMEvents.REPLICATE_CONTAINER, + payload.getRequest()); + } + + @Override + protected void onFinished(EventPublisher publisher, + ReplicationRequestToRepeat payload) { + + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java new file mode 100644 index 00000000000..ddecdbcfa5c --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -0,0 +1,250 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ThreadFactory; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerStateManager; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.placement.algorithms + .ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.hdds.server.events.IdentifiableEventPayload; +import org.apache.hadoop.ozone.lease.LeaseManager; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; +import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import static org.apache.hadoop.hdds.scm.events.SCMEvents + .TRACK_REPLICATE_COMMAND; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Replication Manager manages the replication of the closed container. + */ +public class ReplicationManager implements Runnable { + + private static final Logger LOG = + LoggerFactory.getLogger(ReplicationManager.class); + + private ReplicationQueue replicationQueue; + + private ContainerPlacementPolicy containerPlacement; + + private EventPublisher eventPublisher; + + private ReplicationCommandWatcher replicationCommandWatcher; + + private boolean running = true; + + private ContainerStateManager containerStateManager; + + public ReplicationManager(ContainerPlacementPolicy containerPlacement, + ContainerStateManager containerStateManager, EventQueue eventQueue, + LeaseManager commandWatcherLeaseManager) { + + this.containerPlacement = containerPlacement; + this.containerStateManager = containerStateManager; + this.eventPublisher = eventQueue; + + this.replicationCommandWatcher = + new ReplicationCommandWatcher(TRACK_REPLICATE_COMMAND, + SCMEvents.REPLICATION_COMPLETE, commandWatcherLeaseManager); + + this.replicationQueue = new ReplicationQueue(); + + eventQueue.addHandler(SCMEvents.REPLICATE_CONTAINER, + (replicationRequest, publisher) -> replicationQueue + .add(replicationRequest)); + + this.replicationCommandWatcher.start(eventQueue); + + } + + public void start() { + + ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("Replication Manager").build(); + + threadFactory.newThread(this).start(); + } + + public void run() { + + while (running) { + ReplicationRequest request = null; + try { + //TODO: add throttling here + request = replicationQueue.take(); + + ContainerID containerID = new ContainerID(request.getContainerId()); + ContainerInfo containerInfo = + containerStateManager.getContainer(containerID); + + Preconditions.checkNotNull(containerInfo, + "No information about the container " + request.getContainerId()); + + Preconditions + .checkState(containerInfo.getState() == LifeCycleState.CLOSED, + "Container should be in closed state"); + + //check the current replication + List datanodesWithReplicas = + new ArrayList<>(getCurrentReplicas(request)); + + if (datanodesWithReplicas.size() == 0) { + LOG.warn( + "Container {} should be replicated but can't find any existing " + + "replicas", + containerID); + return; + } + + ReplicationRequest finalRequest = request; + + int inFlightReplications = replicationCommandWatcher.getTimeoutEvents( + e -> e.request.getContainerId() == finalRequest.getContainerId()) + .size(); + + int deficit = + request.getExpecReplicationCount() - datanodesWithReplicas.size() + - inFlightReplications; + + if (deficit > 0) { + + List selectedDatanodes = containerPlacement + .chooseDatanodes(datanodesWithReplicas, deficit, + containerInfo.getUsedBytes()); + + //send the command + for (DatanodeDetails datanode : selectedDatanodes) { + + ReplicateContainerCommand replicateCommand = + new ReplicateContainerCommand(containerID.getId(), + datanodesWithReplicas); + + eventPublisher.fireEvent(SCMEvents.DATANODE_COMMAND, + new CommandForDatanode<>( + datanode.getUuid(), replicateCommand)); + + ReplicationRequestToRepeat timeoutEvent = + new ReplicationRequestToRepeat(replicateCommand.getId(), + request); + + eventPublisher.fireEvent(TRACK_REPLICATE_COMMAND, timeoutEvent); + + } + + } else if (deficit < 0) { + //TODO: too many replicas. Not handled yet. + } + + } catch (Exception e) { + LOG.error("Can't replicate container {}", request, e); + } + } + + } + + @VisibleForTesting + protected Set getCurrentReplicas(ReplicationRequest request) + throws IOException { + return containerStateManager + .getContainerReplicas(new ContainerID(request.getContainerId())); + } + + @VisibleForTesting + public ReplicationQueue getReplicationQueue() { + return replicationQueue; + } + + public void stop() { + running = false; + } + + /** + * Event for the ReplicationCommandWatcher to repeate the embedded request. + * in case fof timeout. + */ + public static class ReplicationRequestToRepeat + implements IdentifiableEventPayload { + + private final long commandId; + + private final ReplicationRequest request; + + public ReplicationRequestToRepeat(long commandId, + ReplicationRequest request) { + this.commandId = commandId; + this.request = request; + } + + public ReplicationRequest getRequest() { + return request; + } + + @Override + public long getId() { + return commandId; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ReplicationRequestToRepeat that = (ReplicationRequestToRepeat) o; + return Objects.equals(request, that.request); + } + + @Override + public int hashCode() { + + return Objects.hash(request); + } + } + + public static class ReplicationCompleted implements IdentifiableEventPayload { + + private final long uuid; + + public ReplicationCompleted(long uuid) { + this.uuid = uuid; + } + + @Override + public long getId() { + return uuid; + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationQueue.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java similarity index 68% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationQueue.java rename to hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java index e0a235122e8..4ca67be4e11 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationQueue.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java @@ -15,11 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.ozone.container.replication; +package org.apache.hadoop.hdds.scm.container.replication; import java.util.List; -import java.util.PriorityQueue; -import java.util.Queue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.PriorityBlockingQueue; /** * Priority queue to handle under-replicated and over replicated containers @@ -28,13 +28,13 @@ */ public class ReplicationQueue { - private final Queue queue; + private final BlockingQueue queue; - ReplicationQueue() { - queue = new PriorityQueue<>(); + public ReplicationQueue() { + queue = new PriorityBlockingQueue<>(); } - public synchronized boolean add(ReplicationRequest repObj) { + public boolean add(ReplicationRequest repObj) { if (this.queue.contains(repObj)) { // Remove the earlier message and insert this one this.queue.remove(repObj); @@ -42,7 +42,7 @@ public synchronized boolean add(ReplicationRequest repObj) { return this.queue.add(repObj); } - public synchronized boolean remove(ReplicationRequest repObj) { + public boolean remove(ReplicationRequest repObj) { return queue.remove(repObj); } @@ -52,21 +52,18 @@ public synchronized boolean remove(ReplicationRequest repObj) { * * @return the head of this queue, or {@code null} if this queue is empty */ - public synchronized ReplicationRequest peek() { + public ReplicationRequest peek() { return queue.peek(); } /** - * Retrieves and removes the head of this queue, - * or returns {@code null} if this queue is empty. - * - * @return the head of this queue, or {@code null} if this queue is empty + * Retrieves and removes the head of this queue (blocking queue). */ - public synchronized ReplicationRequest poll() { - return queue.poll(); + public ReplicationRequest take() throws InterruptedException { + return queue.take(); } - public synchronized boolean removeAll(List repObjs) { + public boolean removeAll(List repObjs) { return queue.removeAll(repObjs); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationRequest.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationRequest.java similarity index 78% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationRequest.java rename to hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationRequest.java index a6ccce13e0b..d40cd9cd17d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationRequest.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationRequest.java @@ -15,9 +15,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.ozone.container.replication; +package org.apache.hadoop.hdds.scm.container.replication; import java.io.Serializable; + import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; @@ -28,18 +29,24 @@ public class ReplicationRequest implements Comparable, Serializable { private final long containerId; - private final short replicationCount; - private final short expecReplicationCount; + private final int replicationCount; + private final int expecReplicationCount; private final long timestamp; - public ReplicationRequest(long containerId, short replicationCount, - long timestamp, short expecReplicationCount) { + public ReplicationRequest(long containerId, int replicationCount, + long timestamp, int expecReplicationCount) { this.containerId = containerId; this.replicationCount = replicationCount; this.timestamp = timestamp; this.expecReplicationCount = expecReplicationCount; } + public ReplicationRequest(long containerId, int replicationCount, + int expecReplicationCount) { + this(containerId, replicationCount, System.currentTimeMillis(), + expecReplicationCount); + } + /** * Compares this object with the specified object for order. Returns a * negative integer, zero, or a positive integer as this object is less @@ -92,7 +99,7 @@ public long getContainerId() { return containerId; } - public short getReplicationCount() { + public int getReplicationCount() { return replicationCount; } @@ -100,7 +107,17 @@ public long getTimestamp() { return timestamp; } - public short getExpecReplicationCount() { + public int getExpecReplicationCount() { return expecReplicationCount; } + + @Override + public String toString() { + return "ReplicationRequest{" + + "containerId=" + containerId + + ", replicationCount=" + replicationCount + + ", expecReplicationCount=" + expecReplicationCount + + ", timestamp=" + timestamp + + '}'; + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/package-info.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/package-info.java new file mode 100644 index 00000000000..934b01e6231 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/package-info.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +/** + * HDDS (Closed) Container replicaton related classes. + */ \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerQueryKey.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerQueryKey.java new file mode 100644 index 00000000000..cd491154291 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerQueryKey.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + */ + +package org.apache.hadoop.hdds.scm.container.states; + +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; + +/** + * Key for the Caching layer for Container Query. + */ +public class ContainerQueryKey { + private final HddsProtos.LifeCycleState state; + private final String owner; + private final HddsProtos.ReplicationFactor factor; + private final HddsProtos.ReplicationType type; + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + ContainerQueryKey that = (ContainerQueryKey) o; + + return new EqualsBuilder() + .append(getState(), that.getState()) + .append(getOwner(), that.getOwner()) + .append(getFactor(), that.getFactor()) + .append(getType(), that.getType()) + .isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(61, 71) + .append(getState()) + .append(getOwner()) + .append(getFactor()) + .append(getType()) + .toHashCode(); + } + + /** + * Constructor for ContainerQueryKey. + * @param state LifeCycleState + * @param owner - Name of the Owner. + * @param factor Replication Factor. + * @param type - Replication Type. + */ + public ContainerQueryKey(HddsProtos.LifeCycleState state, String owner, + HddsProtos.ReplicationFactor factor, HddsProtos.ReplicationType type) { + this.state = state; + this.owner = owner; + this.factor = factor; + this.type = type; + } + + /** + * Returns the state of containers which this key represents. + * @return LifeCycleState + */ + public HddsProtos.LifeCycleState getState() { + return state; + } + + /** + * Returns the owner of containers which this key represents. + * @return Owner + */ + public String getOwner() { + return owner; + } + + /** + * Returns the replication factor of containers which this key represents. + * @return ReplicationFactor + */ + public HddsProtos.ReplicationFactor getFactor() { + return factor; + } + + /** + * Returns the replication type of containers which this key represents. + * @return ReplicationType + */ + public HddsProtos.ReplicationType getType() { + return type; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java index c23b1fd17dc..880a715f6bc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java @@ -18,14 +18,18 @@ package org.apache.hadoop.hdds.scm.container.states; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import java.util.HashSet; +import java.util.Set; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; -import org.apache.hadoop.util.AutoCloseableLock; +import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,6 +38,9 @@ import java.util.Map; import java.util.NavigableSet; import java.util.TreeSet; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.concurrent.ConcurrentHashMap; import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes .CONTAINER_EXISTS; @@ -46,7 +53,7 @@ * Container State Map acts like a unified map for various attributes that are * used to select containers when we need allocated blocks. *

- * This class provides the ability to query 4 classes of attributes. They are + * This class provides the ability to query 5 classes of attributes. They are *

* 1. LifeCycleStates - LifeCycle States of container describe in which state * a container is. For example, a container needs to be in Open State for a @@ -83,13 +90,16 @@ private final ContainerAttribute typeMap; private final Map containerMap; + // Map to hold replicas of given container. + private final Map> contReplicaMap; private final static NavigableSet EMPTY_SET = Collections.unmodifiableNavigableSet(new TreeSet<>()); + private final Map> resultCache; // Container State Map lock should be held before calling into // Update ContainerAttributes. The consistency of ContainerAttributes is // protected by this lock. - private final AutoCloseableLock autoLock; + private final ReadWriteLock lock; /** * Create a ContainerStateMap. @@ -100,11 +110,13 @@ public ContainerStateMap() { factorMap = new ContainerAttribute<>(); typeMap = new ContainerAttribute<>(); containerMap = new HashMap<>(); - autoLock = new AutoCloseableLock(); + lock = new ReentrantReadWriteLock(); + contReplicaMap = new HashMap<>(); // new InstrumentedLock(getClass().getName(), LOG, // new ReentrantLock(), // 1000, // 300)); + resultCache = new ConcurrentHashMap<>(); } /** @@ -119,7 +131,8 @@ public void addContainer(ContainerInfo info) Preconditions.checkArgument(info.getReplicationFactor().getNumber() > 0, "ExpectedReplicaCount should be greater than 0"); - try (AutoCloseableLock lock = autoLock.acquire()) { + lock.writeLock().lock(); + try { ContainerID id = ContainerID.valueof(info.getContainerID()); if (containerMap.putIfAbsent(id, info) != null) { LOG.debug("Duplicate container ID detected. {}", id); @@ -132,7 +145,13 @@ public void addContainer(ContainerInfo info) ownerMap.insert(info.getOwner(), id); factorMap.insert(info.getReplicationFactor(), id); typeMap.insert(info.getReplicationType(), id); + + // Flush the cache of this container type, will be added later when + // get container queries are executed. + flushCache(info); LOG.trace("Created container with {} successfully.", id); + } finally { + lock.writeLock().unlock(); } } @@ -153,8 +172,107 @@ public ContainerInfo getContainerInfo(ContainerInfo info) { * @return container info, if found. */ public ContainerInfo getContainerInfo(long containerID) { - ContainerID id = new ContainerID(containerID); - return containerMap.get(id); + return getContainerInfo(ContainerID.valueof(containerID)); + } + + /** + * Returns the latest state of Container from SCM's Container State Map. + * + * @param containerID - ContainerID + * @return container info, if found. + */ + public ContainerInfo getContainerInfo(ContainerID containerID) { + lock.readLock().lock(); + try { + return containerMap.get(containerID); + } finally { + lock.readLock().unlock(); + } + } + + /** + * Returns the latest list of DataNodes where replica for given containerId + * exist. Throws an SCMException if no entry is found for given containerId. + * + * @param containerID + * @return Set + */ + public Set getContainerReplicas(ContainerID containerID) + throws SCMException { + Preconditions.checkNotNull(containerID); + lock.readLock().lock(); + try { + if (contReplicaMap.containsKey(containerID)) { + return Collections + .unmodifiableSet(contReplicaMap.get(containerID)); + } + } finally { + lock.readLock().unlock(); + } + throw new SCMException( + "No entry exist for containerId: " + containerID + " in replica map.", + ResultCodes.NO_REPLICA_FOUND); + } + + /** + * Adds given datanodes as nodes where replica for given containerId exist. + * Logs a debug entry if a datanode is already added as replica for given + * ContainerId. + * + * @param containerID + * @param dnList + */ + public void addContainerReplica(ContainerID containerID, + DatanodeDetails... dnList) { + Preconditions.checkNotNull(containerID); + lock.writeLock().lock(); + try { + for (DatanodeDetails dn : dnList) { + Preconditions.checkNotNull(dn); + if (contReplicaMap.containsKey(containerID)) { + if(!contReplicaMap.get(containerID).add(dn)) { + LOG.debug("ReplicaMap already contains entry for container Id: " + + "{},DataNode: {}", containerID, dn); + } + } else { + Set dnSet = new HashSet<>(); + dnSet.add(dn); + contReplicaMap.put(containerID, dnSet); + } + } + } finally { + lock.writeLock().unlock(); + } + } + + /** + * Remove a container Replica for given DataNode. + * + * @param containerID + * @param dn + * @return True of dataNode is removed successfully else false. + */ + public boolean removeContainerReplica(ContainerID containerID, + DatanodeDetails dn) throws SCMException { + Preconditions.checkNotNull(containerID); + Preconditions.checkNotNull(dn); + + lock.writeLock().lock(); + try { + if (contReplicaMap.containsKey(containerID)) { + return contReplicaMap.get(containerID).remove(dn); + } + } finally { + lock.writeLock().unlock(); + } + throw new SCMException( + "No entry exist for containerId: " + containerID + " in replica map.", + ResultCodes.FAILED_TO_FIND_CONTAINER); + } + + @VisibleForTesting + public static Logger getLOG() { + return LOG; } /** @@ -163,8 +281,11 @@ public ContainerInfo getContainerInfo(long containerID) { * @return - Map */ public Map getContainerMap() { - try (AutoCloseableLock lock = autoLock.acquire()) { + lock.readLock().lock(); + try { return Collections.unmodifiableMap(containerMap); + } finally { + lock.readLock().unlock(); } } @@ -175,14 +296,18 @@ public ContainerInfo getContainerInfo(long containerID) { public void updateContainerInfo(ContainerInfo info) throws SCMException { Preconditions.checkNotNull(info); ContainerInfo currentInfo = null; - try (AutoCloseableLock lock = autoLock.acquire()) { + lock.writeLock().lock(); + try { currentInfo = containerMap.get( ContainerID.valueof(info.getContainerID())); if (currentInfo == null) { throw new SCMException("No such container.", FAILED_TO_FIND_CONTAINER); } + flushCache(info, currentInfo); containerMap.put(info.containerID(), info); + } finally { + lock.writeLock().unlock(); } } @@ -202,46 +327,56 @@ public void updateState(ContainerInfo info, LifeCycleState currentState, ContainerID id = new ContainerID(info.getContainerID()); ContainerInfo currentInfo = null; - try (AutoCloseableLock lock = autoLock.acquire()) { - currentInfo = containerMap.get(id); + lock.writeLock().lock(); + try { + try { + // Just flush both old and new data sets from the result cache. + ContainerInfo newInfo = new ContainerInfo(info); + newInfo.setState(newState); + flushCache(newInfo, info); - if (currentInfo == null) { - throw new - SCMException("No such container.", FAILED_TO_FIND_CONTAINER); + currentInfo = containerMap.get(id); + + if (currentInfo == null) { + throw new + SCMException("No such container.", FAILED_TO_FIND_CONTAINER); + } + // We are updating two places before this update is done, these can + // fail independently, since the code needs to handle it. + + // We update the attribute map, if that fails it will throw an + // exception, so no issues, if we are successful, we keep track of the + // fact that we have updated the lifecycle state in the map, and update + // the container state. If this second update fails, we will attempt to + // roll back the earlier change we did. If the rollback fails, we can + // be in an inconsistent state, + + info.setState(newState); + containerMap.put(id, info); + lifeCycleStateMap.update(currentState, newState, id); + LOG.trace("Updated the container {} to new state. Old = {}, new = " + + "{}", id, currentState, newState); + } catch (SCMException ex) { + LOG.error("Unable to update the container state. {}", ex); + // we need to revert the change in this attribute since we are not + // able to update the hash table. + LOG.info("Reverting the update to lifecycle state. Moving back to " + + "old state. Old = {}, Attempted state = {}", currentState, + newState); + + containerMap.put(id, currentInfo); + + // if this line throws, the state map can be in an inconsistent + // state, since we will have modified the attribute by the + // container state will not in sync since we were not able to put + // that into the hash table. + lifeCycleStateMap.update(newState, currentState, id); + + throw new SCMException("Updating the container map failed.", ex, + FAILED_TO_CHANGE_CONTAINER_STATE); } - // We are updating two places before this update is done, these can - // fail independently, since the code needs to handle it. - - // We update the attribute map, if that fails it will throw an exception, - // so no issues, if we are successful, we keep track of the fact that we - // have updated the lifecycle state in the map, and update the container - // state. If this second update fails, we will attempt to roll back the - // earlier change we did. If the rollback fails, we can be in an - // inconsistent state, - - info.setState(newState); - containerMap.put(id, info); - lifeCycleStateMap.update(currentState, newState, id); - LOG.trace("Updated the container {} to new state. Old = {}, new = " + - "{}", id, currentState, newState); - } catch (SCMException ex) { - LOG.error("Unable to update the container state. {}", ex); - // we need to revert the change in this attribute since we are not - // able to update the hash table. - LOG.info("Reverting the update to lifecycle state. Moving back to " + - "old state. Old = {}, Attempted state = {}", currentState, - newState); - - containerMap.put(id, currentInfo); - - // if this line throws, the state map can be in an inconsistent - // state, since we will have modified the attribute by the - // container state will not in sync since we were not able to put - // that into the hash table. - lifeCycleStateMap.update(newState, currentState, id); - - throw new SCMException("Updating the container map failed.", ex, - FAILED_TO_CHANGE_CONTAINER_STATE); + } finally { + lock.writeLock().unlock(); } } @@ -253,9 +388,11 @@ public void updateState(ContainerInfo info, LifeCycleState currentState, */ NavigableSet getContainerIDsByOwner(String ownerName) { Preconditions.checkNotNull(ownerName); - - try (AutoCloseableLock lock = autoLock.acquire()) { + lock.readLock().lock(); + try { return ownerMap.getCollection(ownerName); + } finally { + lock.readLock().unlock(); } } @@ -267,9 +404,11 @@ public void updateState(ContainerInfo info, LifeCycleState currentState, */ NavigableSet getContainerIDsByType(ReplicationType type) { Preconditions.checkNotNull(type); - - try (AutoCloseableLock lock = autoLock.acquire()) { + lock.readLock().lock(); + try { return typeMap.getCollection(type); + } finally { + lock.readLock().unlock(); } } @@ -281,9 +420,11 @@ public void updateState(ContainerInfo info, LifeCycleState currentState, */ NavigableSet getContainerIDsByFactor(ReplicationFactor factor) { Preconditions.checkNotNull(factor); - - try (AutoCloseableLock lock = autoLock.acquire()) { + lock.readLock().lock(); + try { return factorMap.getCollection(factor); + } finally { + lock.readLock().unlock(); } } @@ -293,11 +434,14 @@ public void updateState(ContainerInfo info, LifeCycleState currentState, * @param state - State - Open, Closed etc. * @return List of containers by state. */ - NavigableSet getContainerIDsByState(LifeCycleState state) { + public NavigableSet getContainerIDsByState( + LifeCycleState state) { Preconditions.checkNotNull(state); - - try (AutoCloseableLock lock = autoLock.acquire()) { + lock.readLock().lock(); + try { return lifeCycleStateMap.getCollection(state); + } finally { + lock.readLock().unlock(); } } @@ -319,7 +463,13 @@ public void updateState(ContainerInfo info, LifeCycleState currentState, Preconditions.checkNotNull(factor, "Factor cannot be null"); Preconditions.checkNotNull(type, "Type cannot be null"); - try (AutoCloseableLock lock = autoLock.acquire()) { + lock.readLock().lock(); + try { + ContainerQueryKey queryKey = + new ContainerQueryKey(state, owner, factor, type); + if(resultCache.containsKey(queryKey)){ + return resultCache.get(queryKey); + } // If we cannot meet any one condition we return EMPTY_SET immediately. // Since when we intersect these sets, the result will be empty if any @@ -356,7 +506,10 @@ public void updateState(ContainerInfo info, LifeCycleState currentState, for (int x = 1; x < sets.length; x++) { currentSet = intersectSets(currentSet, sets[x]); } + resultCache.put(queryKey, currentSet); return currentSet; + } finally { + lock.readLock().unlock(); } } @@ -403,4 +556,14 @@ public void updateState(ContainerInfo info, LifeCycleState currentState, } return sets; } + + private void flushCache(ContainerInfo... containerInfos) { + for (ContainerInfo containerInfo : containerInfos) { + ContainerQueryKey key = new ContainerQueryKey(containerInfo.getState(), + containerInfo.getOwner(), containerInfo.getReplicationFactor(), + containerInfo.getReplicationType()); + resultCache.remove(key); + } + } + } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java new file mode 100644 index 00000000000..9d72eb106a3 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.hdds.scm.events; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.block.PendingDeleteStatusList; +import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler + .CloseContainerStatus; +import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler + .DeleteBlockCommandStatus; +import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler + .ReplicationStatus; +import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler.CloseContainerRetryableReq; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .PipelineActionsFromDatanode; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .ContainerActionsFromDatanode; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .CommandStatusReportFromDatanode; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .ContainerReportFromDatanode; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .NodeReportFromDatanode; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager + .ReplicationCompleted; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationRequest; + +import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport; +import org.apache.hadoop.hdds.server.events.Event; +import org.apache.hadoop.hdds.server.events.TypedEvent; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; + +/** + * Class that acts as the namespace for all SCM Events. + */ +public final class SCMEvents { + + /** + * NodeReports are sent out by Datanodes. This report is received by + * SCMDatanodeHeartbeatDispatcher and NodeReport Event is generated. + */ + public static final TypedEvent NODE_REPORT = + new TypedEvent<>(NodeReportFromDatanode.class, "Node_Report"); + + /** + * Event generated on DataNode registration. + */ + public static final TypedEvent + NODE_REGISTRATION_CONT_REPORT = new TypedEvent<>( + NodeRegistrationContainerReport.class, + "Node_Registration_Container_Report"); + + /** + * ContainerReports are send out by Datanodes. This report is received by + * SCMDatanodeHeartbeatDispatcher and Container_Report Event + * isTestSCMDatanodeHeartbeatDispatcher generated. + */ + public static final TypedEvent CONTAINER_REPORT = + new TypedEvent<>(ContainerReportFromDatanode.class, "Container_Report"); + + /** + * ContainerActions are sent by Datanode. This event is received by + * SCMDatanodeHeartbeatDispatcher and CONTAINER_ACTIONS event is generated. + */ + public static final TypedEvent + CONTAINER_ACTIONS = new TypedEvent<>(ContainerActionsFromDatanode.class, + "Container_Actions"); + + /** + * PipelineActions are sent by Datanode. This event is received by + * SCMDatanodeHeartbeatDispatcher and PIPELINE_ACTIONS event is generated. + */ + public static final TypedEvent + PIPELINE_ACTIONS = new TypedEvent<>(PipelineActionsFromDatanode.class, + "Pipeline_Actions"); + + /** + * Pipeline close event are triggered to close pipeline because of failure, + * stale node, decommissioning etc. + */ + public static final TypedEvent + PIPELINE_CLOSE = new TypedEvent<>(PipelineID.class, + "Pipeline_Close"); + + /** + * A Command status report will be sent by datanodes. This repoort is received + * by SCMDatanodeHeartbeatDispatcher and CommandReport event is generated. + */ + public static final TypedEvent + CMD_STATUS_REPORT = + new TypedEvent<>(CommandStatusReportFromDatanode.class, + "Cmd_Status_Report"); + + /** + * When ever a command for the Datanode needs to be issued by any component + * inside SCM, a Datanode_Command event is generated. NodeManager listens to + * these events and dispatches them to Datanode for further processing. + */ + public static final Event DATANODE_COMMAND = + new TypedEvent<>(CommandForDatanode.class, "Datanode_Command"); + + /** + * A Close Container Event can be triggered under many condition. Some of them + * are: 1. A Container is full, then we stop writing further information to + * that container. DN's let SCM know that current state and sends a + * informational message that allows SCM to close the container. + *

+ * 2. If a pipeline is open; for example Ratis; if a single node fails, we + * will proactively close these containers. + *

+ * Once a command is dispatched to DN, we will also listen to updates from the + * datanode which lets us know that this command completed or timed out. + */ + public static final TypedEvent CLOSE_CONTAINER = + new TypedEvent<>(ContainerID.class, "Close_Container"); + + /** + * A CLOSE_CONTAINER_RETRYABLE_REQ will be triggered by + * CloseContainerEventHandler after sending a SCMCommand to DataNode. + * CloseContainerWatcher will track this event. Watcher will be responsible + * for retrying it in event of failure or timeout. + */ + public static final TypedEvent + CLOSE_CONTAINER_RETRYABLE_REQ = new TypedEvent<>( + CloseContainerRetryableReq.class, "Close_Container_Retryable"); + + /** + * This event will be triggered whenever a new datanode is registered with + * SCM. + */ + public static final TypedEvent NEW_NODE = + new TypedEvent<>(DatanodeDetails.class, "New_Node"); + + /** + * This event will be triggered whenever a datanode is moved from healthy to + * stale state. + */ + public static final TypedEvent STALE_NODE = + new TypedEvent<>(DatanodeDetails.class, "Stale_Node"); + + /** + * This event will be triggered whenever a datanode is moved from stale to + * dead state. + */ + public static final TypedEvent DEAD_NODE = + new TypedEvent<>(DatanodeDetails.class, "Dead_Node"); + + /** + * This event will be triggered by CommandStatusReportHandler whenever a + * status for Replication SCMCommand is received. + */ + public static final Event REPLICATION_STATUS = new + TypedEvent<>(ReplicationStatus.class, "ReplicateCommandStatus"); + /** + * This event will be triggered by CommandStatusReportHandler whenever a + * status for CloseContainer SCMCommand is received. + */ + public static final Event + CLOSE_CONTAINER_STATUS = + new TypedEvent<>(CloseContainerStatus.class, + "CloseContainerCommandStatus"); + /** + * This event will be triggered by CommandStatusReportHandler whenever a + * status for DeleteBlock SCMCommand is received. + */ + public static final Event + DELETE_BLOCK_STATUS = + new TypedEvent<>(DeleteBlockCommandStatus.class, + "DeleteBlockCommandStatus"); + + /** + * This event will be triggered while processing container reports from DN + * when deleteTransactionID of container in report mismatches with the + * deleteTransactionID on SCM. + */ + public static final Event PENDING_DELETE_STATUS = + new TypedEvent<>(PendingDeleteStatusList.class, "PendingDeleteStatus"); + + /** + * This is the command for ReplicationManager to handle under/over + * replication. Sent by the ContainerReportHandler after processing the + * heartbeat. + */ + public static final TypedEvent REPLICATE_CONTAINER = + new TypedEvent<>(ReplicationRequest.class); + + /** + * This event is sent by the ReplicaManager to the + * ReplicationCommandWatcher to track the in-progress replication. + */ + public static final TypedEvent + TRACK_REPLICATE_COMMAND = + new TypedEvent<>(ReplicationManager.ReplicationRequestToRepeat.class); + /** + * This event comes from the Heartbeat dispatcher (in fact from the + * datanode) to notify the scm that the replication is done. This is + * received by the replicate command watcher to mark in-progress task as + * finished. +

+ * TODO: Temporary event, should be replaced by specific Heartbeat + * ActionRequred event. + */ + public static final TypedEvent REPLICATION_COMPLETE = + new TypedEvent<>(ReplicationCompleted.class); + + /** + * Signal for all the components (but especially for the replication + * manager and container report handler) that the replication could be + * started. Should be send only if (almost) all the container state are + * available from the datanodes. + */ + public static final TypedEvent START_REPLICATION = + new TypedEvent<>(Boolean.class); + public static final TypedEvent CHILL_MODE_STATUS = + new TypedEvent<>(Boolean.class); + + /** + * Private Ctor. Never Constructed. + */ + private SCMEvents() { + } + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/package-info.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/package-info.java new file mode 100644 index 00000000000..46181a3eb5f --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +/** + * Events Package contains all the Events used by SCM internally to + * communicate between different sub-systems that make up SCM. + */ +package org.apache.hadoop.hdds.scm.events; \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java index d7d70ef98ce..87a29e3a5dc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java @@ -107,6 +107,7 @@ public ResultCodes getResult() { FAILED_TO_LOAD_OPEN_CONTAINER, FAILED_TO_ALLOCATE_CONTAINER, FAILED_TO_CHANGE_CONTAINER_STATE, + FAILED_TO_CHANGE_PIPELINE_STATE, CONTAINER_EXISTS, FAILED_TO_FIND_CONTAINER, FAILED_TO_FIND_CONTAINER_WITH_SPACE, @@ -116,6 +117,7 @@ public ResultCodes getResult() { UNEXPECTED_CONTAINER_STATE, SCM_NOT_INITIALIZED, DUPLICATE_DATANODE, - NO_SUCH_DATANODE + NO_SUCH_DATANODE, + NO_REPLICA_FOUND } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java index 51465ee95d8..26b8b95b040 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java @@ -106,4 +106,23 @@ public void updateStorageReports(List reports) { lock.readLock().unlock(); } } + + /** + * Returns the last updated time of datanode info. + * @return the last updated time of datanode info. + */ + public long getLastStatsUpdatedTime() { + return lastStatsUpdatedTime; + } + + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) { + return super.equals(obj); + } + } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java new file mode 100644 index 00000000000..7fda67d9af2 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.node; + +import java.util.Set; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerStateManager; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationRequest; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.states.Node2ContainerMap; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Handles Dead Node event. + */ +public class DeadNodeHandler implements EventHandler { + + private final Node2ContainerMap node2ContainerMap; + + private final ContainerStateManager containerStateManager; + + private static final Logger LOG = + LoggerFactory.getLogger(DeadNodeHandler.class); + + public DeadNodeHandler( + Node2ContainerMap node2ContainerMap, + ContainerStateManager containerStateManager) { + this.node2ContainerMap = node2ContainerMap; + this.containerStateManager = containerStateManager; + } + + @Override + public void onMessage(DatanodeDetails datanodeDetails, + EventPublisher publisher) { + Set containers = + node2ContainerMap.getContainers(datanodeDetails.getUuid()); + if (containers == null) { + LOG.info("There's no containers in dead datanode {}, no replica will be" + + " removed from the in-memory state.", datanodeDetails.getUuid()); + return; + } + LOG.info( + "Datanode {} is dead. Removing replications from the in-memory state.", + datanodeDetails.getUuid()); + for (ContainerID container : containers) { + try { + containerStateManager.removeContainerReplica(container, + datanodeDetails); + + if (!containerStateManager.isOpen(container)) { + ReplicationRequest replicationRequest = + containerStateManager.checkReplicationState(container); + + if (replicationRequest != null) { + publisher.fireEvent(SCMEvents.REPLICATE_CONTAINER, + replicationRequest); + } + } + } catch (SCMException e) { + LOG.error("Can't remove container from containerStateMap {}", container + .getId(), e); + } + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NewNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NewNodeHandler.java new file mode 100644 index 00000000000..79b75a5af00 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NewNodeHandler.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.node; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.states.Node2ContainerMap; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; + +import java.util.Collections; + +/** + * Handles New Node event. + */ +public class NewNodeHandler implements EventHandler { + + private final Node2ContainerMap node2ContainerMap; + + public NewNodeHandler(Node2ContainerMap node2ContainerMap) { + this.node2ContainerMap = node2ContainerMap; + } + + @Override + public void onMessage(DatanodeDetails datanodeDetails, + EventPublisher publisher) { + try { + node2ContainerMap.insertNewDatanode(datanodeDetails.getUuid(), + Collections.emptySet()); + } catch (SCMException e) { + // TODO: log exception message. + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java index c13c37c136c..deb1628a362 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java @@ -17,12 +17,15 @@ */ package org.apache.hadoop.hdds.scm.node; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; +import org.apache.hadoop.hdds.server.events.EventHandler; import org.apache.hadoop.ozone.protocol.StorageContainerNodeProtocol; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import java.io.Closeable; @@ -53,7 +56,7 @@ * list, by calling removeNode. We will throw away this nodes info soon. */ public interface NodeManager extends StorageContainerNodeProtocol, - NodeManagerMXBean, Closeable { + EventHandler, NodeManagerMXBean, Closeable { /** * Removes a data node from the management of this Node Manager. * @@ -136,4 +139,12 @@ * @param command */ void addDatanodeCommand(UUID dnId, SCMCommand command); + + /** + * Process node report. + * + * @param dnUuid + * @param nodeReport + */ + void processNodeReport(UUID dnUuid, NodeReportProto nodeReport); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeReportHandler.java new file mode 100644 index 00000000000..331bfed1ab3 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeReportHandler.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.node; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .NodeReportFromDatanode; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Handles Node Reports from datanode. + */ +public class NodeReportHandler implements EventHandler { + + private static final Logger LOGGER = LoggerFactory + .getLogger(NodeReportHandler.class); + private final NodeManager nodeManager; + + public NodeReportHandler(NodeManager nodeManager) { + Preconditions.checkNotNull(nodeManager); + this.nodeManager = nodeManager; + } + + @Override + public void onMessage(NodeReportFromDatanode nodeReportFromDatanode, + EventPublisher publisher) { + Preconditions.checkNotNull(nodeReportFromDatanode); + DatanodeDetails dn = nodeReportFromDatanode.getDatanodeDetails(); + Preconditions.checkNotNull(dn, "NodeReport is " + + "missing DatanodeDetails."); + LOGGER.trace("Processing node report for dn: {}", dn); + nodeManager + .processNodeReport(dn.getUuid(), nodeReportFromDatanode.getReport()); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java index 5543c04039c..77f939eab18 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java @@ -24,9 +24,12 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; import org.apache.hadoop.hdds.scm.HddsServerUtil; +import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.node.states.NodeAlreadyExistsException; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; import org.apache.hadoop.hdds.scm.node.states.NodeStateMap; +import org.apache.hadoop.hdds.server.events.Event; +import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.ozone.common.statemachine .InvalidStateTransitionException; import org.apache.hadoop.ozone.common.statemachine.StateMachine; @@ -36,9 +39,11 @@ import org.slf4j.LoggerFactory; import java.io.Closeable; +import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.ScheduledExecutorService; @@ -86,6 +91,14 @@ * This is the map which maintains the current state of all datanodes. */ private final NodeStateMap nodeStateMap; + /** + * Used for publishing node state change events. + */ + private final EventPublisher eventPublisher; + /** + * Maps the event to be triggered when a node state us updated. + */ + private final Map> state2EventMap; /** * ExecutorService used for scheduling heartbeat processing thread. */ @@ -108,8 +121,11 @@ * * @param conf Configuration */ - public NodeStateManager(Configuration conf) { - nodeStateMap = new NodeStateMap(); + public NodeStateManager(Configuration conf, EventPublisher eventPublisher) { + this.nodeStateMap = new NodeStateMap(); + this.eventPublisher = eventPublisher; + this.state2EventMap = new HashMap<>(); + initialiseState2EventMap(); Set finalStates = new HashSet<>(); finalStates.add(NodeState.DECOMMISSIONED); this.stateMachine = new StateMachine<>(NodeState.HEALTHY, finalStates); @@ -130,6 +146,14 @@ public NodeStateManager(Configuration conf) { TimeUnit.MILLISECONDS); } + /** + * Populates state2event map. + */ + private void initialiseState2EventMap() { + state2EventMap.put(NodeState.STALE, SCMEvents.STALE_NODE); + state2EventMap.put(NodeState.DEAD, SCMEvents.DEAD_NODE); + } + /* * * Node and State Transition Mapping: @@ -220,6 +244,7 @@ private void initializeStateMachine() { public void addNode(DatanodeDetails datanodeDetails) throws NodeAlreadyExistsException { nodeStateMap.addNode(datanodeDetails, stateMachine.getInitialState()); + eventPublisher.fireEvent(SCMEvents.NEW_NODE, datanodeDetails); } /** @@ -548,6 +573,9 @@ private void updateNodeState(DatanodeInfo node, Predicate condition, if (condition.test(node.getLastHeartbeatTime())) { NodeState newState = stateMachine.getNextState(state, lifeCycleEvent); nodeStateMap.updateNodeState(node.getUuid(), state, newState); + if (state2EventMap.containsKey(newState)) { + eventPublisher.fireEvent(state2EventMap.get(newState), node); + } } } catch (InvalidStateTransitionException e) { LOG.warn("Invalid state transition of node {}." + diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 15ac3f283e9..fca08bd1775 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -25,10 +25,7 @@ import org.apache.hadoop.hdds.scm.VersionInfo; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; -import org.apache.hadoop.hdds.server.events.Event; -import org.apache.hadoop.hdds.server.events.EventHandler; import org.apache.hadoop.hdds.server.events.EventPublisher; -import org.apache.hadoop.hdds.server.events.TypedEvent; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; @@ -43,6 +40,7 @@ .StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.metrics2.util.MBeans; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.protocol.StorageContainerNodeProtocol; import org.apache.hadoop.ozone.protocol.VersionResponse; import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; @@ -79,8 +77,7 @@ * as soon as you read it. */ public class SCMNodeManager - implements NodeManager, StorageContainerNodeProtocol, - EventHandler { + implements NodeManager, StorageContainerNodeProtocol { @VisibleForTesting static final Logger LOG = @@ -118,15 +115,13 @@ // Node pool manager. private final StorageContainerManager scmManager; - public static final Event DATANODE_COMMAND = - new TypedEvent<>(CommandForDatanode.class, "DATANODE_COMMAND"); - /** * Constructs SCM machine Manager. */ public SCMNodeManager(OzoneConfiguration conf, String clusterID, - StorageContainerManager scmManager) throws IOException { - this.nodeStateManager = new NodeStateManager(conf); + StorageContainerManager scmManager, EventPublisher eventPublisher) + throws IOException { + this.nodeStateManager = new NodeStateManager(conf, eventPublisher); this.nodeStats = new ConcurrentHashMap<>(); this.scmStat = new SCMNodeStat(); this.clusterID = clusterID; @@ -347,6 +342,10 @@ public void close() throws IOException { public VersionResponse getVersion(SCMVersionRequestProto versionRequest) { return VersionResponse.newBuilder() .setVersion(this.version.getVersion()) + .addValue(OzoneConsts.SCM_ID, + this.scmManager.getScmStorage().getScmId()) + .addValue(OzoneConsts.CLUSTER_ID, this.scmManager.getScmStorage() + .getClusterID()) .build(); } @@ -366,15 +365,11 @@ public VersionResponse getVersion(SCMVersionRequestProto versionRequest) { public RegisteredCommand register( DatanodeDetails datanodeDetails, NodeReportProto nodeReport) { - String hostname = null; - String ip = null; InetAddress dnAddress = Server.getRemoteIp(); if (dnAddress != null) { // Mostly called inside an RPC, update ip and peer hostname - hostname = dnAddress.getHostName(); - ip = dnAddress.getHostAddress(); - datanodeDetails.setHostName(hostname); - datanodeDetails.setIpAddress(ip); + datanodeDetails.setHostName(dnAddress.getHostName()); + datanodeDetails.setIpAddress(dnAddress.getHostAddress()); } UUID dnId = datanodeDetails.getUuid(); try { @@ -392,14 +387,12 @@ public RegisteredCommand register( LOG.trace("Datanode is already registered. Datanode: {}", datanodeDetails.toString()); } - RegisteredCommand.Builder builder = - RegisteredCommand.newBuilder().setErrorCode(ErrorCode.success) - .setDatanodeUUID(datanodeDetails.getUuidString()) - .setClusterID(this.clusterID); - if (hostname != null && ip != null) { - builder.setHostname(hostname).setIpAddress(ip); - } - return builder.build(); + return RegisteredCommand.newBuilder().setErrorCode(ErrorCode.success) + .setDatanodeUUID(datanodeDetails.getUuidString()) + .setClusterID(this.clusterID) + .setHostname(datanodeDetails.getHostName()) + .setIpAddress(datanodeDetails.getIpAddress()) + .build(); } /** @@ -424,6 +417,17 @@ public RegisteredCommand register( return commandQueue.getCommand(datanodeDetails.getUuid()); } + /** + * Process node report. + * + * @param dnUuid + * @param nodeReport + */ + @Override + public void processNodeReport(UUID dnUuid, NodeReportProto nodeReport) { + this.updateNodeStat(dnUuid, nodeReport); + } + /** * Returns the aggregated node stats. * @return the aggregated node stats. @@ -461,14 +465,25 @@ public SCMNodeMetric getNodeStat(DatanodeDetails datanodeDetails) { return nodeCountMap; } + // TODO: + // Since datanode commands are added through event queue, onMessage method + // should take care of adding commands to command queue. + // Refactor and remove all the usage of this method and delete this method. @Override public void addDatanodeCommand(UUID dnId, SCMCommand command) { this.commandQueue.addCommand(dnId, command); } + /** + * This method is called by EventQueue whenever someone adds a new + * DATANODE_COMMAND to the Queue. + * + * @param commandForDatanode DatanodeCommand + * @param ignored publisher + */ @Override public void onMessage(CommandForDatanode commandForDatanode, - EventPublisher publisher) { + EventPublisher ignored) { addDatanodeCommand(commandForDatanode.getDatanodeId(), commandForDatanode.getCommand()); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/StaleNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/StaleNodeHandler.java new file mode 100644 index 00000000000..b435e777aee --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/StaleNodeHandler.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.node; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.container.Mapping; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.node.states.Node2ContainerMap; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Set; + +/** + * Handles Stale node event. + */ +public class StaleNodeHandler implements EventHandler { + static final Logger LOG = LoggerFactory.getLogger(StaleNodeHandler.class); + + private final Node2ContainerMap node2ContainerMap; + private final Mapping containerManager; + + public StaleNodeHandler(Node2ContainerMap node2ContainerMap, + Mapping containerManager) { + this.node2ContainerMap = node2ContainerMap; + this.containerManager = containerManager; + } + + @Override + public void onMessage(DatanodeDetails datanodeDetails, + EventPublisher publisher) { + Set pipelineIDs = + containerManager.getPipelineOnDatanode(datanodeDetails); + for (PipelineID id : pipelineIDs) { + LOG.info("closing pipeline {}.", id); + publisher.fireEvent(SCMEvents.PIPELINE_CLOSE, id); + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ContainerMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ContainerMap.java index 1960604906c..97c254be316 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ContainerMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/Node2ContainerMap.java @@ -18,19 +18,23 @@ package org.apache.hadoop.hdds.scm.node.states; -import com.google.common.base.Preconditions; -import org.apache.hadoop.hdds.scm.container.ContainerID; -import org.apache.hadoop.hdds.scm.exceptions.SCMException; - import java.util.Collections; +import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; -import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes.DUPLICATE_DATANODE; -import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes.NO_SUCH_DATANODE; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes + .DUPLICATE_DATANODE; +import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes + .NO_SUCH_DATANODE; /** * This data structure maintains the list of containers that is on a datanode. @@ -61,16 +65,17 @@ public boolean isKnownDatanode(UUID datanodeID) { /** * Insert a new datanode into Node2Container Map. * - * @param datanodeID -- Datanode UUID + * @param datanodeID -- Datanode UUID * @param containerIDs - List of ContainerIDs. */ public void insertNewDatanode(UUID datanodeID, Set containerIDs) throws SCMException { Preconditions.checkNotNull(containerIDs); Preconditions.checkNotNull(datanodeID); - if(dn2ContainerMap.putIfAbsent(datanodeID, containerIDs) != null) { + if (dn2ContainerMap.putIfAbsent(datanodeID, new HashSet<>(containerIDs)) + != null) { throw new SCMException("Node already exists in the map", - DUPLICATE_DATANODE); + DUPLICATE_DATANODE); } } @@ -82,17 +87,20 @@ public void insertNewDatanode(UUID datanodeID, Set containerIDs) * @throws SCMException - if we don't know about this datanode, for new DN * use insertNewDatanode. */ - public void updateDatanodeMap(UUID datanodeID, Set containers) - throws SCMException { + public void setContainersForDatanode(UUID datanodeID, + Set containers) throws SCMException { Preconditions.checkNotNull(datanodeID); Preconditions.checkNotNull(containers); - if(dn2ContainerMap.computeIfPresent(datanodeID, (k, v) -> v) == null){ + if (dn2ContainerMap + .computeIfPresent(datanodeID, (k, v) -> new HashSet<>(containers)) + == null) { throw new SCMException("No such datanode", NO_SUCH_DATANODE); } } /** * Removes datanode Entry from the map. + * * @param datanodeID - Datanode ID. */ public void removeDatanode(UUID datanodeID) { @@ -166,10 +174,6 @@ public ReportResult processReport(UUID datanodeID, Set .build(); } - - - - /** * Results possible from processing a container report by * Node2ContainerMapper. @@ -181,4 +185,9 @@ public ReportResult processReport(UUID datanodeID, Set MISSING_AND_NEW_CONTAINERS_FOUND, NEW_DATANODE_FOUND } + + @VisibleForTesting + public int size() { + return dn2ContainerMap.size(); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/NodeNotFoundException.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/NodeNotFoundException.java index 52a352e854b..c44a08cf51e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/NodeNotFoundException.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/NodeNotFoundException.java @@ -17,8 +17,6 @@ package org.apache.hadoop.hdds.scm.node.states; -import java.io.IOException; - /** * This exception represents that the node that is being accessed does not * exist in NodeStateMap. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/ReportResult.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/ReportResult.java index cb06cb3eeac..9bb6cf15875 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/ReportResult.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/ReportResult.java @@ -21,10 +21,13 @@ import org.apache.hadoop.hdds.scm.container.ContainerID; +import java.util.Collections; import java.util.Set; +import com.google.common.base.Preconditions; + /** - * A Container Report gets processsed by the Node2Container and returns the + * A Container Report gets processsed by the Node2Container and returns * Report Result class. */ public class ReportResult { @@ -36,6 +39,8 @@ Set missingContainers, Set newContainers) { this.status = status; + Preconditions.checkNotNull(missingContainers); + Preconditions.checkNotNull(newContainers); this.missingContainers = missingContainers; this.newContainers = newContainers; } @@ -80,7 +85,17 @@ public ReportResultBuilder setNewContainers( } ReportResult build() { - return new ReportResult(status, missingContainers, newContainers); + + Set nullSafeMissingContainers = this.missingContainers; + Set nullSafeNewContainers = this.newContainers; + if (nullSafeNewContainers == null) { + nullSafeNewContainers = Collections.emptySet(); + } + if (nullSafeMissingContainers == null) { + nullSafeMissingContainers = Collections.emptySet(); + } + return new ReportResult(status, nullSafeMissingContainers, + nullSafeNewContainers); } } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/Node2PipelineMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/Node2PipelineMap.java new file mode 100644 index 00000000000..363ce715566 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/Node2PipelineMap.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + */ + +package org.apache.hadoop.hdds.scm.pipelines; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; + +/** + * This data structure maintains the list of pipelines which the given datanode is a part of. This + * information will be added whenever a new pipeline allocation happens. + * + *

TODO: this information needs to be regenerated from pipeline reports on SCM restart + */ +public class Node2PipelineMap { + private final Map> dn2PipelineMap; + + /** Constructs a Node2PipelineMap Object. */ + public Node2PipelineMap() { + dn2PipelineMap = new ConcurrentHashMap<>(); + } + + /** + * Returns true if this a datanode that is already tracked by Node2PipelineMap. + * + * @param datanodeID - UUID of the Datanode. + * @return True if this is tracked, false if this map does not know about it. + */ + private boolean isKnownDatanode(UUID datanodeID) { + Preconditions.checkNotNull(datanodeID); + return dn2PipelineMap.containsKey(datanodeID); + } + + /** + * Removes datanode Entry from the map. + * + * @param datanodeID - Datanode ID. + */ + public synchronized void removeDatanode(UUID datanodeID) { + Preconditions.checkNotNull(datanodeID); + dn2PipelineMap.computeIfPresent(datanodeID, (k, v) -> null); + } + + /** + * Returns null if there no pipelines associated with this datanode ID. + * + * @param datanode - UUID + * @return Set of pipelines or Null. + */ + public Set getPipelines(UUID datanode) { + Preconditions.checkNotNull(datanode); + final Set s = dn2PipelineMap.get(datanode); + return s != null? Collections.unmodifiableSet(s): Collections.emptySet(); + } + + /** + * Adds a pipeline entry to a given dataNode in the map. + * + * @param pipeline Pipeline to be added + */ + public synchronized void addPipeline(Pipeline pipeline) { + for (DatanodeDetails details : pipeline.getDatanodes().values()) { + UUID dnId = details.getUuid(); + dn2PipelineMap.computeIfAbsent(dnId, k -> new HashSet<>()) + .add(pipeline.getId()); + } + } + + public synchronized void removePipeline(Pipeline pipeline) { + for (DatanodeDetails details : pipeline.getDatanodes().values()) { + UUID dnId = details.getUuid(); + dn2PipelineMap.computeIfPresent( + dnId, + (k, v) -> { + v.remove(pipeline.getId()); + return v; + }); + } + } + + public Map> getDn2PipelineMap() { + return Collections.unmodifiableMap(dn2PipelineMap); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineActionEventHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineActionEventHandler.java new file mode 100644 index 00000000000..1053149bba8 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineActionEventHandler.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.hdds.scm.pipelines; + +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.PipelineAction; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .PipelineActionsFromDatanode; + +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Handles pipeline actions from datanode. + */ +public class PipelineActionEventHandler implements + EventHandler { + + public static final Logger LOG = LoggerFactory.getLogger( + PipelineActionEventHandler.class); + + public PipelineActionEventHandler() { + + } + + @Override + public void onMessage(PipelineActionsFromDatanode report, + EventPublisher publisher) { + for (PipelineAction action : report.getReport().getPipelineActionsList()) { + switch (action.getAction()) { + case CLOSE: + PipelineID pipelineID = PipelineID. + getFromProtobuf(action.getClosePipeline().getPipelineID()); + LOG.info("Closing pipeline " + pipelineID + " for reason:" + action + .getClosePipeline().getDetailedReason()); + publisher.fireEvent(SCMEvents.PIPELINE_CLOSE, pipelineID); + break; + default: + LOG.error("unknown pipeline action:{}" + action.getAction()); + } + } + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineCloseHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineCloseHandler.java new file mode 100644 index 00000000000..733dec56339 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineCloseHandler.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.hdds.scm.pipelines; + +import org.apache.hadoop.hdds.scm.container.Mapping; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; + +/** + * Handles pipeline close event. + */ +public class PipelineCloseHandler implements EventHandler { + private final Mapping mapping; + public PipelineCloseHandler(Mapping mapping) { + this.mapping = mapping; + } + + @Override + public void onMessage(PipelineID pipelineID, EventPublisher publisher) { + mapping.handlePipelineClose(pipelineID); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineManager.java index a1fbce6c9ee..07ff2b0918f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineManager.java @@ -16,14 +16,13 @@ */ package org.apache.hadoop.hdds.scm.pipelines; +import java.util.ArrayList; import java.util.LinkedList; -import java.util.Map; -import java.util.WeakHashMap; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,14 +36,53 @@ public abstract class PipelineManager { private static final Logger LOG = LoggerFactory.getLogger(PipelineManager.class); - private final List activePipelines; - private final Map activePipelineMap; - private final AtomicInteger pipelineIndex; + private final ArrayList activePipelines; public PipelineManager() { - activePipelines = new LinkedList<>(); - pipelineIndex = new AtomicInteger(0); - activePipelineMap = new WeakHashMap<>(); + activePipelines = new ArrayList<>(); + for (ReplicationFactor factor : ReplicationFactor.values()) { + activePipelines.add(factor.ordinal(), new ActivePipelines()); + } + } + + private static class ActivePipelines { + private final List activePipelines; + private final AtomicInteger pipelineIndex; + + ActivePipelines() { + activePipelines = new LinkedList<>(); + pipelineIndex = new AtomicInteger(0); + } + + void addPipeline(PipelineID pipelineID) { + activePipelines.add(pipelineID); + } + + void removePipeline(PipelineID pipelineID) { + activePipelines.remove(pipelineID); + } + + /** + * Find a Pipeline that is operational. + * + * @return - Pipeline or null + */ + PipelineID findOpenPipeline() { + if (activePipelines.size() == 0) { + LOG.error("No Operational pipelines found. Returning null."); + return null; + } + return activePipelines.get(getNextIndex()); + } + + /** + * gets the next index of the Pipeline to get. + * + * @return index in the link list to get. + */ + private int getNextIndex() { + return pipelineIndex.incrementAndGet() % activePipelines.size(); + } } /** @@ -56,70 +94,30 @@ public PipelineManager() { * @param replicationFactor - Replication Factor * @return a Pipeline. */ - public synchronized final Pipeline getPipeline( - ReplicationFactor replicationFactor, ReplicationType replicationType) - throws IOException { - /** - * In the Ozone world, we have a very simple policy. - * - * 1. Try to create a pipeline if there are enough free nodes. - * - * 2. This allows all nodes to part of a pipeline quickly. - * - * 3. if there are not enough free nodes, return conduits in a - * round-robin fashion. - * - * TODO: Might have to come up with a better algorithm than this. - * Create a new placement policy that returns conduits in round robin - * fashion. - */ - Pipeline pipeline = - allocatePipeline(replicationFactor); - if (pipeline != null) { - LOG.debug("created new pipeline:{} for container with " + + public synchronized final PipelineID getPipeline( + ReplicationFactor replicationFactor, ReplicationType replicationType) { + PipelineID id = + activePipelines.get(replicationFactor.ordinal()).findOpenPipeline(); + if (id != null) { + LOG.debug("re-used pipeline:{} for container with " + "replicationType:{} replicationFactor:{}", - pipeline.getPipelineName(), replicationType, replicationFactor); - activePipelines.add(pipeline); - activePipelineMap.put(pipeline.getPipelineName(), pipeline); - } else { - pipeline = - findOpenPipeline(replicationType, replicationFactor); - if (pipeline != null) { - LOG.debug("re-used pipeline:{} for container with " + - "replicationType:{} replicationFactor:{}", - pipeline.getPipelineName(), replicationType, replicationFactor); - } + id, replicationType, replicationFactor); } - if (pipeline == null) { + if (id == null) { LOG.error("Get pipeline call failed. We are not able to find" + - "free nodes or operational pipeline."); + " operational pipeline."); return null; } else { - return pipeline; + return id; } } - /** - * This function to get pipeline with given pipeline name. - * - * @param pipelineName - * @return a Pipeline. - */ - public synchronized final Pipeline getPipeline(String pipelineName) { - Pipeline pipeline = null; - - // 1. Check if pipeline channel already exists - if (activePipelineMap.containsKey(pipelineName)) { - pipeline = activePipelineMap.get(pipelineName); - LOG.debug("Returning pipeline for pipelineName:{}", pipelineName); - return pipeline; - } else { - LOG.debug("Unable to find pipeline for pipelineName:{}", pipelineName); - } - return pipeline; + void addOpenPipeline(Pipeline pipeline) { + activePipelines.get(pipeline.getFactor().ordinal()) + .addPipeline(pipeline.getId()); } - protected int getReplicationCount(ReplicationFactor factor) { + protected static int getReplicationCount(ReplicationFactor factor) { switch (factor) { case ONE: return 1; @@ -131,70 +129,55 @@ protected int getReplicationCount(ReplicationFactor factor) { } public abstract Pipeline allocatePipeline( - ReplicationFactor replicationFactor) throws IOException; + ReplicationFactor replicationFactor); /** - * Find a Pipeline that is operational. - * - * @return - Pipeline or null + * Initialize the pipeline. + * TODO: move the initialization to Ozone Client later */ - private Pipeline findOpenPipeline( - ReplicationType type, ReplicationFactor factor) { - Pipeline pipeline = null; - final int sentinal = -1; - if (activePipelines.size() == 0) { - LOG.error("No Operational conduits found. Returning null."); - return null; - } - int startIndex = getNextIndex(); - int nextIndex = sentinal; - for (; startIndex != nextIndex; nextIndex = getNextIndex()) { - // Just walk the list in a circular way. - Pipeline temp = - activePipelines - .get(nextIndex != sentinal ? nextIndex : startIndex); - // if we find an operational pipeline just return that. - if ((temp.getLifeCycleState() == LifeCycleState.OPEN) && - (temp.getFactor() == factor) && (temp.getType() == type)) { - pipeline = temp; - break; - } - } - return pipeline; - } + public abstract void initializePipeline(Pipeline pipeline) throws IOException; /** - * gets the next index of the Pipeline to get. - * - * @return index in the link list to get. + * Creates a pipeline with a specified replication factor and type. + * @param replicationFactor - Replication Factor. + * @param replicationType - Replication Type. */ - private int getNextIndex() { - return pipelineIndex.incrementAndGet() % activePipelines.size(); + public Pipeline createPipeline(ReplicationFactor replicationFactor, + ReplicationType replicationType) throws IOException { + Pipeline pipeline = allocatePipeline(replicationFactor); + if (pipeline != null) { + LOG.debug("created new pipeline:{} for container with " + + "replicationType:{} replicationFactor:{}", + pipeline.getId(), replicationType, replicationFactor); + } + return pipeline; } /** - * Creates a pipeline from a specified set of Nodes. - * @param pipelineID - Name of the pipeline - * @param datanodes - The list of datanodes that make this pipeline. + * Remove the pipeline from active allocation. + * @param pipeline pipeline to be finalized */ - public abstract void createPipeline(String pipelineID, - List datanodes) throws IOException; + public synchronized void finalizePipeline(Pipeline pipeline) { + activePipelines.get(pipeline.getFactor().ordinal()) + .removePipeline(pipeline.getId()); + } /** - * Close the pipeline with the given clusterId. + * + * @param pipeline */ - public abstract void closePipeline(String pipelineID) throws IOException; + public abstract void closePipeline(Pipeline pipeline) throws IOException; /** - * list members in the pipeline . + * list members in the pipeline. * @return the datanode */ - public abstract List getMembers(String pipelineID) + public abstract List getMembers(PipelineID pipelineID) throws IOException; /** * Update the datanode list of the pipeline. */ - public abstract void updatePipeline(String pipelineID, + public abstract void updatePipeline(PipelineID pipelineID, List newDatanodes) throws IOException; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineSelector.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineSelector.java index 3846a8426dd..c9f51f7a420 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineSelector.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineSelector.java @@ -18,13 +18,17 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import org.apache.hadoop.hdds.scm.container.placement.algorithms .ContainerPlacementPolicy; import org.apache.hadoop.hdds.scm.container.placement.algorithms .SCMContainerPlacementRandom; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipelines.ratis.RatisManagerImpl; import org.apache.hadoop.hdds.scm.pipelines.standalone.StandaloneManagerImpl; @@ -33,15 +37,35 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.lease.Lease; +import org.apache.hadoop.ozone.lease.LeaseException; +import org.apache.hadoop.ozone.lease.LeaseManager; +import org.apache.hadoop.utils.MetadataStore; +import org.apache.hadoop.utils.MetadataStoreBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; +import java.util.HashSet; import java.util.List; -import java.util.stream.Collectors; +import java.util.HashMap; +import java.util.Set; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes + .FAILED_TO_CHANGE_PIPELINE_STATE; +import static org.apache.hadoop.hdds.server + .ServerUtils.getOzoneMetaDirPath; +import static org.apache.hadoop.ozone + .OzoneConsts.SCM_PIPELINE_DB; /** * Sends the request to the right pipeline manager. @@ -52,9 +76,16 @@ private final ContainerPlacementPolicy placementPolicy; private final NodeManager nodeManager; private final Configuration conf; + private final EventPublisher eventPublisher; private final RatisManagerImpl ratisManager; private final StandaloneManagerImpl standaloneManager; private final long containerSize; + private final MetadataStore pipelineStore; + private final PipelineStateManager stateManager; + private final Node2PipelineMap node2PipelineMap; + private final Map> pipeline2ContainerMap; + private final Map pipelineMap; + private final LeaseManager pipelineLeaseManager; /** * Constructs a pipeline Selector. @@ -62,19 +93,80 @@ * @param nodeManager - node manager * @param conf - Ozone Config */ - public PipelineSelector(NodeManager nodeManager, Configuration conf) { + public PipelineSelector(NodeManager nodeManager, Configuration conf, + EventPublisher eventPublisher, int cacheSizeMB) throws IOException { this.nodeManager = nodeManager; this.conf = conf; + this.eventPublisher = eventPublisher; this.placementPolicy = createContainerPlacementPolicy(nodeManager, conf); - this.containerSize = OzoneConsts.GB * this.conf.getInt( - ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_GB, - ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT); + this.containerSize = (long)this.conf.getStorageSize( + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE, + ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, + StorageUnit.BYTES); + node2PipelineMap = new Node2PipelineMap(); + pipelineMap = new ConcurrentHashMap<>(); this.standaloneManager = new StandaloneManagerImpl(this.nodeManager, placementPolicy, containerSize); this.ratisManager = new RatisManagerImpl(this.nodeManager, placementPolicy, containerSize, conf); + long pipelineCreationLeaseTimeout = conf.getTimeDuration( + ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_LEASE_TIMEOUT, + ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_LEASE_TIMEOUT_DEFAULT, + TimeUnit.MILLISECONDS); + pipelineLeaseManager = new LeaseManager<>("PipelineCreation", + pipelineCreationLeaseTimeout); + pipelineLeaseManager.start(); + + stateManager = new PipelineStateManager(); + pipeline2ContainerMap = new HashMap<>(); + + // Write the container name to pipeline mapping. + File metaDir = getOzoneMetaDirPath(conf); + File containerDBPath = new File(metaDir, SCM_PIPELINE_DB); + pipelineStore = MetadataStoreBuilder.newBuilder() + .setConf(conf) + .setDbFile(containerDBPath) + .setCacheSize(cacheSizeMB * OzoneConsts.MB) + .build(); + + reloadExistingPipelines(); + } + + private void reloadExistingPipelines() throws IOException { + if (pipelineStore.isEmpty()) { + // Nothing to do just return + return; + } + + List> range = + pipelineStore.getSequentialRangeKVs(null, Integer.MAX_VALUE, null); + + // Transform the values into the pipelines. + // TODO: filter by pipeline state + for (Map.Entry entry : range) { + Pipeline pipeline = Pipeline.getFromProtoBuf( + HddsProtos.Pipeline.PARSER.parseFrom(entry.getValue())); + Preconditions.checkNotNull(pipeline); + addExistingPipeline(pipeline); + } + } + + public Set getOpenContainerIDsByPipeline(PipelineID pipelineID) { + return pipeline2ContainerMap.get(pipelineID); + } + + public void addContainerToPipeline(PipelineID pipelineID, long containerID) { + pipeline2ContainerMap.get(pipelineID) + .add(ContainerID.valueof(containerID)); + } + + public void removeContainerFromPipeline(PipelineID pipelineID, + long containerID) throws IOException { + pipeline2ContainerMap.get(pipelineID) + .remove(ContainerID.valueof(containerID)); + closePipelineIfNoOpenContainers(pipelineMap.get(pipelineID)); } /** @@ -86,15 +178,14 @@ public PipelineSelector(NodeManager nodeManager, Configuration conf) { * @return pipeline corresponding to nodes */ public static Pipeline newPipelineFromNodes( - List nodes, LifeCycleState state, - ReplicationType replicationType, ReplicationFactor replicationFactor, - String name) { + List nodes, ReplicationType replicationType, + ReplicationFactor replicationFactor, PipelineID id) { Preconditions.checkNotNull(nodes); Preconditions.checkArgument(nodes.size() > 0); String leaderId = nodes.get(0).getUuidString(); - Pipeline - pipeline = new Pipeline(leaderId, state, replicationType, - replicationFactor, name); + // A new pipeline always starts in allocated state + Pipeline pipeline = new Pipeline(leaderId, LifeCycleState.ALLOCATED, + replicationType, replicationFactor, id); for (DatanodeDetails node : nodes) { pipeline.addMember(node); } @@ -173,74 +264,207 @@ public Pipeline getReplicationPipeline(ReplicationType replicationType, LOG.debug("Getting replication pipeline forReplicationType {} :" + " ReplicationFactor {}", replicationType.toString(), replicationFactor.toString()); - return manager. - getPipeline(replicationFactor, replicationType); + + /** + * In the Ozone world, we have a very simple policy. + * + * 1. Try to create a pipeline if there are enough free nodes. + * + * 2. This allows all nodes to part of a pipeline quickly. + * + * 3. if there are not enough free nodes, return already allocated pipeline + * in a round-robin fashion. + * + * TODO: Might have to come up with a better algorithm than this. + * Create a new placement policy that returns pipelines in round robin + * fashion. + */ + Pipeline pipeline = + manager.createPipeline(replicationFactor, replicationType); + if (pipeline == null) { + // try to return a pipeline from already allocated pipelines + PipelineID pipelineId = + manager.getPipeline(replicationFactor, replicationType); + pipeline = pipelineMap.get(pipelineId); + Preconditions.checkArgument(pipeline.getLifeCycleState() == + LifeCycleState.OPEN); + } else { + pipelineStore.put(pipeline.getId().getProtobuf().toByteArray(), + pipeline.getProtobufMessage().toByteArray()); + // if a new pipeline is created, initialize its state machine + updatePipelineState(pipeline, HddsProtos.LifeCycleEvent.CREATE); + + //TODO: move the initialization of pipeline to Ozone Client + manager.initializePipeline(pipeline); + updatePipelineState(pipeline, HddsProtos.LifeCycleEvent.CREATED); + } + return pipeline; } /** - * This function to return pipeline for given pipeline name and replication - * type. + * This function to return pipeline for given pipeline id. */ - public Pipeline getPipeline(String pipelineName, - ReplicationType replicationType) throws IOException { - if (pipelineName == null) { - return null; - } - PipelineManager manager = getPipelineManager(replicationType); - Preconditions.checkNotNull(manager, "Found invalid pipeline manager"); - LOG.debug("Getting replication pipeline forReplicationType {} :" + - " pipelineName:{}", replicationType, pipelineName); - return manager.getPipeline(pipelineName); + public Pipeline getPipeline(PipelineID pipelineID) { + return pipelineMap.get(pipelineID); } + /** - * Creates a pipeline from a specified set of Nodes. + * Finalize a given pipeline. */ - - public void createPipeline(ReplicationType replicationType, String - pipelineID, List datanodes) throws IOException { - PipelineManager manager = getPipelineManager(replicationType); + public void finalizePipeline(Pipeline pipeline) throws IOException { + PipelineManager manager = getPipelineManager(pipeline.getType()); Preconditions.checkNotNull(manager, "Found invalid pipeline manager"); - LOG.debug("Creating a pipeline: {} with nodes:{}", pipelineID, - datanodes.stream().map(DatanodeDetails::toString) - .collect(Collectors.joining(","))); - manager.createPipeline(pipelineID, datanodes); + if (pipeline.getLifeCycleState() == LifeCycleState.CLOSING || + pipeline.getLifeCycleState() == LifeCycleState.CLOSED) { + LOG.debug("pipeline:{} already in closing state, skipping", + pipeline.getId()); + // already in closing/closed state + return; + } + + // Remove the pipeline from active allocation + manager.finalizePipeline(pipeline); + + LOG.info("Finalizing pipeline. pipelineID: {}", pipeline.getId()); + updatePipelineState(pipeline, HddsProtos.LifeCycleEvent.FINALIZE); + closePipelineIfNoOpenContainers(pipeline); } /** - * Close the pipeline with the given clusterId. + * Close a given pipeline. */ + public void closePipelineIfNoOpenContainers(Pipeline pipeline) + throws IOException { + if (pipeline.getLifeCycleState() != LifeCycleState.CLOSING) { + return; + } + HashSet containerIDS = + pipeline2ContainerMap.get(pipeline.getId()); + if (containerIDS.size() == 0) { + updatePipelineState(pipeline, HddsProtos.LifeCycleEvent.CLOSE); + LOG.info("Closing pipeline. pipelineID: {}", pipeline.getId()); + } + } - public void closePipeline(ReplicationType replicationType, String - pipelineID) throws IOException { - PipelineManager manager = getPipelineManager(replicationType); + /** + * Close a given pipeline. + */ + private void closePipeline(Pipeline pipeline) throws IOException { + PipelineManager manager = getPipelineManager(pipeline.getType()); Preconditions.checkNotNull(manager, "Found invalid pipeline manager"); - LOG.debug("Closing pipeline. pipelineID: {}", pipelineID); - manager.closePipeline(pipelineID); + LOG.debug("Closing pipeline. pipelineID: {}", pipeline.getId()); + HashSet containers = + pipeline2ContainerMap.get(pipeline.getId()); + Preconditions.checkArgument(containers.size() == 0); + manager.closePipeline(pipeline); } /** - * list members in the pipeline . + * Add to a given pipeline. */ - - public List getDatanodes(ReplicationType replicationType, - String pipelineID) throws IOException { - PipelineManager manager = getPipelineManager(replicationType); + private void addOpenPipeline(Pipeline pipeline) { + PipelineManager manager = getPipelineManager(pipeline.getType()); Preconditions.checkNotNull(manager, "Found invalid pipeline manager"); - LOG.debug("Getting data nodes from pipeline : {}", pipelineID); - return manager.getMembers(pipelineID); + LOG.debug("Adding Open pipeline. pipelineID: {}", pipeline.getId()); + manager.addOpenPipeline(pipeline); + } + + private void closeContainersByPipeline(Pipeline pipeline) { + HashSet containers = + pipeline2ContainerMap.get(pipeline.getId()); + for (ContainerID id : containers) { + eventPublisher.fireEvent(SCMEvents.CLOSE_CONTAINER, id); + } + } + + public Set getPipelineId(UUID dnId) { + return node2PipelineMap.getPipelines(dnId); + } + + private void addExistingPipeline(Pipeline pipeline) throws IOException { + LifeCycleState state = pipeline.getLifeCycleState(); + switch (state) { + case ALLOCATED: + // a pipeline in allocated state is only present in SCM and does not exist + // on datanode, on SCM restart, this pipeline can be ignored. + break; + case CREATING: + case OPEN: + case CLOSING: + //TODO: process pipeline report and move pipeline to active queue + // when all the nodes have reported. + pipelineMap.put(pipeline.getId(), pipeline); + pipeline2ContainerMap.put(pipeline.getId(), new HashSet<>()); + node2PipelineMap.addPipeline(pipeline); + break; + case CLOSED: + // if the pipeline is in closed state, nothing to do. + break; + default: + throw new IOException("invalid pipeline state:" + state); + } } /** - * Update the datanodes in the list of the pipeline. + * Update the Pipeline State to the next state. + * + * @param pipeline - Pipeline + * @param event - LifeCycle Event + * @throws SCMException on Failure. */ + public void updatePipelineState(Pipeline pipeline, + HddsProtos.LifeCycleEvent event) throws IOException { + try { + switch (event) { + case CREATE: + pipelineMap.put(pipeline.getId(), pipeline); + pipeline2ContainerMap.put(pipeline.getId(), new HashSet<>()); + node2PipelineMap.addPipeline(pipeline); + // Acquire lease on pipeline + Lease pipelineLease = pipelineLeaseManager.acquire(pipeline); + // Register callback to be executed in case of timeout + pipelineLease.registerCallBack(() -> { + updatePipelineState(pipeline, HddsProtos.LifeCycleEvent.TIMEOUT); + return null; + }); + break; + case CREATED: + // Release the lease on pipeline + pipelineLeaseManager.release(pipeline); + addOpenPipeline(pipeline); + break; - public void updateDatanodes(ReplicationType replicationType, String - pipelineID, List newDatanodes) throws IOException { - PipelineManager manager = getPipelineManager(replicationType); - Preconditions.checkNotNull(manager, "Found invalid pipeline manager"); - LOG.debug("Updating pipeline: {} with new nodes:{}", pipelineID, - newDatanodes.stream().map(DatanodeDetails::toString) - .collect(Collectors.joining(","))); - manager.updatePipeline(pipelineID, newDatanodes); + case FINALIZE: + closeContainersByPipeline(pipeline); + break; + + case CLOSE: + case TIMEOUT: + closePipeline(pipeline); + pipeline2ContainerMap.remove(pipeline.getId()); + node2PipelineMap.removePipeline(pipeline); + pipelineMap.remove(pipeline.getId()); + break; + default: + throw new SCMException("Unsupported pipeline LifeCycleEvent.", + FAILED_TO_CHANGE_PIPELINE_STATE); + } + + stateManager.updatePipelineState(pipeline, event); + pipelineStore.put(pipeline.getId().getProtobuf().toByteArray(), + pipeline.getProtobufMessage().toByteArray()); + } catch (LeaseException e) { + throw new IOException("Lease Exception.", e); + } + } + + public void shutdown() throws IOException { + if (pipelineLeaseManager != null) { + pipelineLeaseManager.shutdown(); + } + + if (pipelineStore != null) { + pipelineStore.close(); + } } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineStateManager.java new file mode 100644 index 00000000000..6054f165444 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/PipelineStateManager.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.pipelines; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.ozone.common.statemachine + .InvalidStateTransitionException; +import org.apache.hadoop.ozone.common.statemachine.StateMachine; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes + .FAILED_TO_CHANGE_PIPELINE_STATE; + +/** + * Manages Pipeline states. + */ +public class PipelineStateManager { + private static final Logger LOG = + LoggerFactory.getLogger(PipelineStateManager.class); + + private final StateMachine stateMachine; + + PipelineStateManager() { + // Initialize the container state machine. + Set finalStates = new HashSet<>(); + // These are the steady states of a container. + finalStates.add(HddsProtos.LifeCycleState.OPEN); + finalStates.add(HddsProtos.LifeCycleState.CLOSED); + + this.stateMachine = new StateMachine<>(HddsProtos.LifeCycleState.ALLOCATED, + finalStates); + initializeStateMachine(); + } + + /** + * Event and State Transition Mapping. + * + * State: ALLOCATED ---------------> CREATING + * Event: CREATE + * + * State: CREATING ---------------> OPEN + * Event: CREATED + * + * State: OPEN ---------------> CLOSING + * Event: FINALIZE + * + * State: CLOSING ---------------> CLOSED + * Event: CLOSE + * + * State: CREATING ---------------> CLOSED + * Event: TIMEOUT + * + * + * Container State Flow: + * + * [ALLOCATED]---->[CREATING]------>[OPEN]-------->[CLOSING] + * (CREATE) | (CREATED) (FINALIZE) | + * | | + * | | + * |(TIMEOUT) |(CLOSE) + * | | + * +--------> [CLOSED] <--------+ + */ + private void initializeStateMachine() { + stateMachine.addTransition(HddsProtos.LifeCycleState.ALLOCATED, + HddsProtos.LifeCycleState.CREATING, + HddsProtos.LifeCycleEvent.CREATE); + + stateMachine.addTransition(HddsProtos.LifeCycleState.CREATING, + HddsProtos.LifeCycleState.OPEN, + HddsProtos.LifeCycleEvent.CREATED); + + stateMachine.addTransition(HddsProtos.LifeCycleState.OPEN, + HddsProtos.LifeCycleState.CLOSING, + HddsProtos.LifeCycleEvent.FINALIZE); + + stateMachine.addTransition(HddsProtos.LifeCycleState.CLOSING, + HddsProtos.LifeCycleState.CLOSED, + HddsProtos.LifeCycleEvent.CLOSE); + + stateMachine.addTransition(HddsProtos.LifeCycleState.CREATING, + HddsProtos.LifeCycleState.CLOSED, + HddsProtos.LifeCycleEvent.TIMEOUT); + } + + + /** + * Update the Pipeline State to the next state. + * + * @param pipeline - Pipeline + * @param event - LifeCycle Event + * @throws SCMException on Failure. + */ + public void updatePipelineState(Pipeline pipeline, + HddsProtos.LifeCycleEvent event) throws IOException { + HddsProtos.LifeCycleState newState; + try { + newState = stateMachine.getNextState(pipeline.getLifeCycleState(), event); + } catch (InvalidStateTransitionException ex) { + String error = String.format("Failed to update pipeline state %s, " + + "reason: invalid state transition from state: %s upon " + + "event: %s.", + pipeline.getId(), pipeline.getLifeCycleState(), event); + LOG.error(error); + throw new SCMException(error, FAILED_TO_CHANGE_PIPELINE_STATE); + } + + // This is a post condition after executing getNextState. + Preconditions.checkNotNull(newState); + Preconditions.checkNotNull(pipeline); + pipeline.setLifeCycleState(newState); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/ratis/RatisManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/ratis/RatisManagerImpl.java index 189060e0f6f..d3cec882bb5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/ratis/RatisManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/ratis/RatisManagerImpl.java @@ -19,15 +19,14 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.scm.XceiverClientRatis; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import org.apache.hadoop.hdds.scm.container.placement.algorithms .ContainerPlacementPolicy; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipelines.PipelineManager; import org.apache.hadoop.hdds.scm.pipelines.PipelineSelector; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; @@ -39,7 +38,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Set; -import java.util.UUID; /** * Implementation of {@link PipelineManager}. @@ -49,7 +47,6 @@ public class RatisManagerImpl extends PipelineManager { private static final Logger LOG = LoggerFactory.getLogger(RatisManagerImpl.class); - private static final String PREFIX = "Ratis-"; private final Configuration conf; private final NodeManager nodeManager; private final Set ratisMembers; @@ -71,7 +68,7 @@ public RatisManagerImpl(NodeManager nodeManager, * Allocates a new ratis Pipeline from the free nodes. * * @param factor - One or Three - * @return PipelineChannel. + * @return Pipeline. */ public Pipeline allocatePipeline(ReplicationFactor factor) { List newNodesList = new LinkedList<>(); @@ -87,46 +84,37 @@ public Pipeline allocatePipeline(ReplicationFactor factor) { // once a datanode has been added to a pipeline, exclude it from // further allocations ratisMembers.addAll(newNodesList); - LOG.info("Allocating a new ratis pipeline of size: {}", count); - // Start all channel names with "Ratis", easy to grep the logs. - String conduitName = PREFIX + - UUID.randomUUID().toString().substring(PREFIX.length()); - Pipeline pipeline= - PipelineSelector.newPipelineFromNodes(newNodesList, - LifeCycleState.OPEN, ReplicationType.RATIS, factor, conduitName); - try (XceiverClientRatis client = - XceiverClientRatis.newXceiverClientRatis(pipeline, conf)) { - client.createPipeline(pipeline.getPipelineName(), newNodesList); - } catch (IOException e) { - return null; - } - return pipeline; + PipelineID pipelineID = PipelineID.randomId(); + LOG.info("Allocating a new ratis pipeline of size: {} id: {}", + count, pipelineID); + return PipelineSelector.newPipelineFromNodes(newNodesList, + ReplicationType.RATIS, factor, pipelineID); } } } return null; } - /** - * Creates a pipeline from a specified set of Nodes. - * - * @param pipelineID - Name of the pipeline - * @param datanodes - The list of datanodes that make this pipeline. - */ - @Override - public void createPipeline(String pipelineID, - List datanodes) { - + public void initializePipeline(Pipeline pipeline) throws IOException { + //TODO:move the initialization from SCM to client + try (XceiverClientRatis client = + XceiverClientRatis.newXceiverClientRatis(pipeline, conf)) { + client.createPipeline(); + } } /** - * Close the pipeline with the given clusterId. - * - * @param pipelineID + * Close the pipeline. */ - @Override - public void closePipeline(String pipelineID) throws IOException { - + public void closePipeline(Pipeline pipeline) throws IOException { + try (XceiverClientRatis client = + XceiverClientRatis.newXceiverClientRatis(pipeline, conf)) { + client.destroyPipeline(); + } + for (DatanodeDetails node : pipeline.getMachines()) { + // A node should always be the in ratis members list. + Preconditions.checkArgument(ratisMembers.remove(node)); + } } /** @@ -136,7 +124,7 @@ public void closePipeline(String pipelineID) throws IOException { * @return the datanode */ @Override - public List getMembers(String pipelineID) + public List getMembers(PipelineID pipelineID) throws IOException { return null; } @@ -148,7 +136,7 @@ public void closePipeline(String pipelineID) throws IOException { * @param newDatanodes */ @Override - public void updatePipeline(String pipelineID, + public void updatePipeline(PipelineID pipelineID, List newDatanodes) throws IOException { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/standalone/StandaloneManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/standalone/StandaloneManagerImpl.java index 579a3a260db..ed2fc2fe683 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/standalone/StandaloneManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipelines/standalone/StandaloneManagerImpl.java @@ -17,15 +17,14 @@ package org.apache.hadoop.hdds.scm.pipelines.standalone; import com.google.common.base.Preconditions; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import org.apache.hadoop.hdds.scm.container.placement.algorithms .ContainerPlacementPolicy; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipelines.PipelineManager; import org.apache.hadoop.hdds.scm.pipelines.PipelineSelector; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; @@ -37,7 +36,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Set; -import java.util.UUID; /** * Standalone Manager Impl to prove that pluggable interface @@ -85,39 +83,29 @@ public Pipeline allocatePipeline(ReplicationFactor factor) { // once a datanode has been added to a pipeline, exclude it from // further allocations standAloneMembers.addAll(newNodesList); - LOG.info("Allocating a new standalone pipeline channel of size: {}", - count); - String channelName = - "SA-" + UUID.randomUUID().toString().substring(3); + PipelineID pipelineID = PipelineID.randomId(); + LOG.info("Allocating a new standalone pipeline of size: {} id: {}", + count, pipelineID); return PipelineSelector.newPipelineFromNodes(newNodesList, - LifeCycleState.OPEN, ReplicationType.STAND_ALONE, - ReplicationFactor.ONE, channelName); + ReplicationType.STAND_ALONE, ReplicationFactor.ONE, pipelineID); } } } return null; } - /** - * Creates a pipeline from a specified set of Nodes. - * - * @param pipelineID - Name of the pipeline - * @param datanodes - The list of datanodes that make this pipeline. - */ - @Override - public void createPipeline(String pipelineID, - List datanodes) { - //return newPipelineFromNodes(datanodes, pipelineID); + public void initializePipeline(Pipeline pipeline) { + // Nothing to be done for standalone pipeline } /** - * Close the pipeline with the given clusterId. - * - * @param pipelineID + * Close the pipeline. */ - @Override - public void closePipeline(String pipelineID) throws IOException { - + public void closePipeline(Pipeline pipeline) throws IOException { + for (DatanodeDetails node : pipeline.getMachines()) { + // A node should always be the in standalone members list. + Preconditions.checkArgument(standAloneMembers.remove(node)); + } } /** @@ -127,7 +115,7 @@ public void closePipeline(String pipelineID) throws IOException { * @return the datanode */ @Override - public List getMembers(String pipelineID) + public List getMembers(PipelineID pipelineID) throws IOException { return null; } @@ -139,7 +127,7 @@ public void closePipeline(String pipelineID) throws IOException { * @param newDatanodes */ @Override - public void updatePipeline(String pipelineID, List + public void updatePipeline(PipelineID pipelineID, List newDatanodes) throws IOException { } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ChillModePrecheck.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ChillModePrecheck.java new file mode 100644 index 00000000000..b92413e80ca --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ChillModePrecheck.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.server; + +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; +import org.apache.hadoop.hdds.scm.server.SCMChillModeManager.ChillModeRestrictedOps; + +/** + * Chill mode pre-check for SCM operations. + * */ +public class ChillModePrecheck implements Precheck { + + private AtomicBoolean inChillMode = new AtomicBoolean(true); + public static final String PRECHECK_TYPE = "ChillModePrecheck"; + + public boolean check(ScmOps op) throws SCMException { + if (inChillMode.get() && ChillModeRestrictedOps + .isRestrictedInChillMode(op)) { + throw new SCMException("ChillModePrecheck failed for " + op, + ResultCodes.CHILL_MODE_EXCEPTION); + } + return inChillMode.get(); + } + + @Override + public String type() { + return PRECHECK_TYPE; + } + + public boolean isInChillMode() { + return inChillMode.get(); + } + + public void setInChillMode(boolean inChillMode) { + this.inChillMode.set(inChillMode); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/Precheck.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/Precheck.java new file mode 100644 index 00000000000..16549908309 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/Precheck.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.server; + +import org.apache.hadoop.hdds.scm.exceptions.SCMException; + +/** + * Precheck for SCM operations. + * */ +public interface Precheck { + boolean check(T t) throws SCMException; + String type(); +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java new file mode 100644 index 00000000000..b35ac1bdc0d --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.server; + +import com.google.common.annotations.VisibleForTesting; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer + .NodeRegistrationContainerReport; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * StorageContainerManager enters chill mode on startup to allow system to + * reach a stable state before becoming fully functional. SCM will wait + * for certain resources to be reported before coming out of chill mode. + * + * ChillModeExitRule defines format to define new rules which must be satisfied + * to exit Chill mode. + * ContainerChillModeRule defines the only exit criteria right now. + * On every new datanode registration event this class adds replicas + * for reported containers and validates if cutoff threshold for + * containers is meet. + */ +public class SCMChillModeManager implements + EventHandler { + + private static final Logger LOG = + LoggerFactory.getLogger(SCMChillModeManager.class); + private AtomicBoolean inChillMode = new AtomicBoolean(true); + private AtomicLong containerWithMinReplicas = new AtomicLong(0); + private Map exitRules = new HashMap(1); + private Configuration config; + private static final String CONT_EXIT_RULE = "ContainerChillModeRule"; + private final EventQueue eventPublisher; + + SCMChillModeManager(Configuration conf, List allContainers, + EventQueue eventQueue) { + this.config = conf; + this.eventPublisher = eventQueue; + exitRules + .put(CONT_EXIT_RULE, new ContainerChillModeRule(config, allContainers)); + if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, + HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) { + exitChillMode(eventQueue); + } + emitChillModeStatus(); + } + + /** + * Emit Chill mode status. + */ + @VisibleForTesting + public void emitChillModeStatus() { + eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, inChillMode.get()); + } + + private void validateChillModeExitRules(EventPublisher eventQueue) { + for (ChillModeExitRule exitRule : exitRules.values()) { + if (!exitRule.validate()) { + return; + } + } + exitChillMode(eventQueue); + } + + /** + * Exit chill mode. It does following actions: + * 1. Set chill mode status to fale. + * 2. Emits START_REPLICATION for ReplicationManager. + * 3. Cleanup resources. + * 4. Emit chill mode status. + * @param eventQueue + */ + @VisibleForTesting + public void exitChillMode(EventPublisher eventQueue) { + LOG.info("SCM exiting chill mode."); + setInChillMode(false); + + // TODO: Remove handler registration as there is no need to listen to + // register events anymore. + + for (ChillModeExitRule e : exitRules.values()) { + e.cleanup(); + } + emitChillModeStatus(); + } + + @Override + public void onMessage( + NodeRegistrationContainerReport nodeRegistrationContainerReport, + EventPublisher publisher) { + if (getInChillMode()) { + exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport); + validateChillModeExitRules(publisher); + } + } + + public boolean getInChillMode() { + return inChillMode.get(); + } + + /** + * Set chill mode status. + */ + public void setInChillMode(boolean inChillMode) { + this.inChillMode.set(inChillMode); + } + + /** + * Interface for defining chill mode exit rules. + * + * @param + */ + public interface ChillModeExitRule { + + boolean validate(); + + void process(T report); + + void cleanup(); + } + + /** + * Class defining Chill mode exit criteria for Containers. + */ + public class ContainerChillModeRule implements + ChillModeExitRule { + + // Required cutoff % for containers with at least 1 reported replica. + private double chillModeCutoff; + // Containers read from scm db. + private Map containerMap; + private double maxContainer; + + public ContainerChillModeRule(Configuration conf, + List containers) { + chillModeCutoff = conf + .getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT, + HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT); + containerMap = new ConcurrentHashMap<>(); + if(containers != null) { + containers.forEach(c -> { + if (c != null) { + containerMap.put(c.getContainerID(), c); + } + }); + maxContainer = containers.size(); + } + } + + @Override + public boolean validate() { + if (maxContainer == 0) { + return true; + } + return getCurrentContainerThreshold() >= chillModeCutoff; + } + + @VisibleForTesting + public double getCurrentContainerThreshold() { + return (containerWithMinReplicas.doubleValue() / maxContainer); + } + + @Override + public void process(NodeRegistrationContainerReport reportsProto) { + if (maxContainer == 0) { + // No container to check. + return; + } + + reportsProto.getReport().getReportsList().forEach(c -> { + if (containerMap.containsKey(c.getContainerID())) { + if(containerMap.remove(c.getContainerID()) != null) { + containerWithMinReplicas.getAndAdd(1); + } + } + }); + + LOG.info("SCM in chill mode. {} % containers have at least one reported " + + "replica.", (containerWithMinReplicas.get() / maxContainer) * 100); + } + + @Override + public void cleanup() { + containerMap.clear(); + } + } + + @VisibleForTesting + public static Logger getLogger() { + return LOG; + } + + @VisibleForTesting + public double getCurrentContainerThreshold() { + return ((ContainerChillModeRule) exitRules.get(CONT_EXIT_RULE)) + .getCurrentContainerThreshold(); + } + + /** + * Operations restricted in SCM chill mode. + */ + public static class ChillModeRestrictedOps { + private static EnumSet restrictedOps = EnumSet.noneOf(ScmOps.class); + + static { + restrictedOps.add(ScmOps.allocateBlock); + restrictedOps.add(ScmOps.allocateContainer); + } + + public static boolean isRestrictedInChillMode(ScmOps opName) { + return restrictedOps.contains(opName); + } + } + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index aefcf1b6ec1..3f1943ce2f0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -27,15 +27,21 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps; import org.apache.hadoop.hdds.protocol.proto .StorageContainerLocationProtocolProtos; import org.apache.hadoop.hdds.scm.HddsServerUtil; import org.apache.hadoop.hdds.scm.ScmInfo; +import org.apache.hadoop.hdds.scm.ScmUtils; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.RPC; @@ -71,13 +77,14 @@ * The RPC server that listens to requests from clients. */ public class SCMClientProtocolServer implements - StorageContainerLocationProtocol { + StorageContainerLocationProtocol, EventHandler { private static final Logger LOG = LoggerFactory.getLogger(SCMClientProtocolServer.class); private final RPC.Server clientRpcServer; private final InetSocketAddress clientRpcAddress; private final StorageContainerManager scm; private final OzoneConfiguration conf; + private ChillModePrecheck chillModePrecheck = new ChillModePrecheck(); public SCMClientProtocolServer(OzoneConfiguration conf, StorageContainerManager scm) throws IOException { @@ -149,6 +156,7 @@ public String getRpcRemoteUsername() { public ContainerWithPipeline allocateContainer(HddsProtos.ReplicationType replicationType, HddsProtos.ReplicationFactor factor, String owner) throws IOException { + ScmUtils.preCheck(ScmOps.allocateContainer, chillModePrecheck); String remoteUser = getRpcRemoteUsername(); getScm().checkAdminAccess(remoteUser); @@ -165,13 +173,41 @@ public ContainerInfo getContainer(long containerID) throws IOException { } @Override - public ContainerWithPipeline getContainerWithPipeline(long containerID) throws IOException { + public ContainerWithPipeline getContainerWithPipeline(long containerID) + throws IOException { + if (chillModePrecheck.isInChillMode()) { + ContainerInfo contInfo = scm.getScmContainerManager() + .getContainer(containerID); + if (contInfo.isContainerOpen()) { + if (!hasRequiredReplicas(contInfo)) { + throw new SCMException("Open container " + containerID + " doesn't" + + " have enough replicas to service this operation in " + + "Chill mode.", ResultCodes.CHILL_MODE_EXCEPTION); + } + } + } String remoteUser = getRpcRemoteUsername(); getScm().checkAdminAccess(remoteUser); return scm.getScmContainerManager() .getContainerWithPipeline(containerID); } + /** + * Check if container reported replicas are equal or greater than required + * replication factor. + */ + private boolean hasRequiredReplicas(ContainerInfo contInfo) { + try{ + return getScm().getScmContainerManager().getStateManager() + .getContainerReplicas(contInfo.containerID()) + .size() >= contInfo.getReplicationFactor().getNumber(); + } catch (SCMException ex) { + // getContainerReplicas throws exception if no replica's exist for given + // container. + return false; + } + } + @Override public List listContainer(long startContainerID, int count) throws IOException { @@ -289,6 +325,22 @@ public StorageContainerManager getScm() { return scm; } + /** + * Set chill mode status based on SCMEvents.CHILL_MODE_STATUS event. + */ + @Override + public void onMessage(Boolean inChillMOde, EventPublisher publisher) { + chillModePrecheck.setInChillMode(inChillMOde); + } + + /** + * Set chill mode status based on . + */ + public boolean getChillModeStatus() { + return chillModePrecheck.isInChillMode(); + } + + /** * Query the System for Nodes. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java index f221584bddf..a651f62371a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java @@ -17,20 +17,36 @@ package org.apache.hadoop.hdds.scm.server; +import com.google.common.base.Preconditions; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.PipelineActionsProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerActionsProto; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.NodeReportProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto; +import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.server.events.EventPublisher; -import org.apache.hadoop.hdds.server.events.TypedEvent; import com.google.protobuf.GeneratedMessage; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.List; + +import static org.apache.hadoop.hdds.scm.events.SCMEvents.CONTAINER_ACTIONS; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.CONTAINER_REPORT; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.NODE_REPORT; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.CMD_STATUS_REPORT; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.PIPELINE_ACTIONS; + /** * This class is responsible for dispatching heartbeat from datanode to * appropriate EventHandler at SCM. @@ -40,15 +56,15 @@ private static final Logger LOG = LoggerFactory.getLogger(SCMDatanodeHeartbeatDispatcher.class); - private EventPublisher eventPublisher; - - public static final TypedEvent NODE_REPORT = - new TypedEvent<>(NodeReportFromDatanode.class); + private final NodeManager nodeManager; + private final EventPublisher eventPublisher; - public static final TypedEvent CONTAINER_REPORT = - new TypedEvent(ContainerReportFromDatanode.class); - public SCMDatanodeHeartbeatDispatcher(EventPublisher eventPublisher) { + public SCMDatanodeHeartbeatDispatcher(NodeManager nodeManager, + EventPublisher eventPublisher) { + Preconditions.checkNotNull(nodeManager); + Preconditions.checkNotNull(eventPublisher); + this.nodeManager = nodeManager; this.eventPublisher = eventPublisher; } @@ -57,23 +73,50 @@ public SCMDatanodeHeartbeatDispatcher(EventPublisher eventPublisher) { * Dispatches heartbeat to registered event handlers. * * @param heartbeat heartbeat to be dispatched. + * + * @return list of SCMCommand */ - public void dispatch(SCMHeartbeatRequestProto heartbeat) { + public List dispatch(SCMHeartbeatRequestProto heartbeat) { DatanodeDetails datanodeDetails = DatanodeDetails.getFromProtoBuf(heartbeat.getDatanodeDetails()); // should we dispatch heartbeat through eventPublisher? + List commands = nodeManager.processHeartbeat(datanodeDetails); if (heartbeat.hasNodeReport()) { + LOG.debug("Dispatching Node Report."); eventPublisher.fireEvent(NODE_REPORT, new NodeReportFromDatanode(datanodeDetails, heartbeat.getNodeReport())); } if (heartbeat.hasContainerReport()) { + LOG.debug("Dispatching Container Report."); eventPublisher.fireEvent(CONTAINER_REPORT, new ContainerReportFromDatanode(datanodeDetails, heartbeat.getContainerReport())); } + + if (heartbeat.hasContainerActions()) { + LOG.debug("Dispatching Container Actions."); + eventPublisher.fireEvent(CONTAINER_ACTIONS, + new ContainerActionsFromDatanode(datanodeDetails, + heartbeat.getContainerActions())); + } + + if (heartbeat.hasPipelineActions()) { + LOG.debug("Dispatching Pipeline Actions."); + eventPublisher.fireEvent(PIPELINE_ACTIONS, + new PipelineActionsFromDatanode(datanodeDetails, + heartbeat.getPipelineActions())); + } + + if (heartbeat.hasCommandStatusReport()) { + eventPublisher.fireEvent(CMD_STATUS_REPORT, + new CommandStatusReportFromDatanode(datanodeDetails, + heartbeat.getCommandStatusReport())); + } + + return commands; } /** @@ -123,4 +166,40 @@ public ContainerReportFromDatanode(DatanodeDetails datanodeDetails, } } + /** + * Container action event payload with origin. + */ + public static class ContainerActionsFromDatanode + extends ReportFromDatanode { + + public ContainerActionsFromDatanode(DatanodeDetails datanodeDetails, + ContainerActionsProto actions) { + super(datanodeDetails, actions); + } + } + + /** + * Pipeline action event payload with origin. + */ + public static class PipelineActionsFromDatanode + extends ReportFromDatanode { + + public PipelineActionsFromDatanode(DatanodeDetails datanodeDetails, + PipelineActionsProto actions) { + super(datanodeDetails, actions); + } + } + + /** + * Container report event payload with origin. + */ + public static class CommandStatusReportFromDatanode + extends ReportFromDatanode { + + public CommandStatusReportFromDatanode(DatanodeDetails datanodeDetails, + CommandStatusReportsProto report) { + super(datanodeDetails, report); + } + } + } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java index aef5b03ce22..8a09dc899d8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeProtocolServer.java @@ -73,6 +73,8 @@ import org.apache.hadoop.hdds.scm.HddsServerUtil; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.ReportFromDatanode; import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.ProtobufRpcEngine; @@ -91,9 +93,9 @@ import java.io.IOException; import java.net.InetSocketAddress; -import java.util.Collections; import java.util.LinkedList; import java.util.List; +import java.util.UUID; import java.util.stream.Collectors; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DATANODE_ADDRESS_KEY; @@ -120,6 +122,7 @@ private final StorageContainerManager scm; private final InetSocketAddress datanodeRpcAddress; private final SCMDatanodeHeartbeatDispatcher heartbeatDispatcher; + private final EventPublisher eventPublisher; public SCMDatanodeProtocolServer(final OzoneConfiguration conf, StorageContainerManager scm, EventPublisher eventPublisher) @@ -129,11 +132,13 @@ public SCMDatanodeProtocolServer(final OzoneConfiguration conf, Preconditions.checkNotNull(eventPublisher, "EventPublisher cannot be null"); this.scm = scm; + this.eventPublisher = eventPublisher; final int handlerCount = conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_DEFAULT); - heartbeatDispatcher = new SCMDatanodeHeartbeatDispatcher(eventPublisher); + heartbeatDispatcher = new SCMDatanodeHeartbeatDispatcher( + scm.getScmNodeManager(), eventPublisher); RPC.setProtocolEngine(conf, StorageContainerDatanodeProtocolPB.class, ProtobufRpcEngine.class); @@ -195,7 +200,10 @@ public SCMRegisteredResponseProto register( if (registeredCommand.getError() == SCMRegisteredResponseProto.ErrorCode.success) { scm.getScmContainerManager().processContainerReports(datanodeDetails, - containerReportsProto); + containerReportsProto, true); + eventPublisher.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + new NodeRegistrationContainerReport(datanodeDetails, + containerReportsProto)); } return getRegisteredResponse(registeredCommand); } @@ -214,22 +222,13 @@ public static SCMRegisteredResponseProto getRegisteredResponse( @Override public SCMHeartbeatResponseProto sendHeartbeat( - SCMHeartbeatRequestProto heartbeat) - throws IOException { - heartbeatDispatcher.dispatch(heartbeat); - - // TODO: Remove the below code after SCM refactoring. - DatanodeDetails datanodeDetails = DatanodeDetails - .getFromProtoBuf(heartbeat.getDatanodeDetails()); - NodeReportProto nodeReport = heartbeat.getNodeReport(); - List commands = - scm.getScmNodeManager().processHeartbeat(datanodeDetails); + SCMHeartbeatRequestProto heartbeat) throws IOException { List cmdResponses = new LinkedList<>(); - for (SCMCommand cmd : commands) { + for (SCMCommand cmd : heartbeatDispatcher.dispatch(heartbeat)) { cmdResponses.add(getCommandResponse(cmd)); } return SCMHeartbeatResponseProto.newBuilder() - .setDatanodeUUID(datanodeDetails.getUuidString()) + .setDatanodeUUID(heartbeat.getDatanodeDetails().getUuid()) .addAllCommands(cmdResponses).build(); } @@ -238,21 +237,8 @@ public ContainerBlocksDeletionACKResponseProto sendContainerBlocksDeletionACK( ContainerBlocksDeletionACKProto acks) throws IOException { if (acks.getResultsCount() > 0) { List resultList = acks.getResultsList(); - for (DeleteBlockTransactionResult result : resultList) { - if (LOG.isDebugEnabled()) { - LOG.debug("Got block deletion ACK from datanode, TXIDs={}, " - + "success={}", result.getTxID(), result.getSuccess()); - } - if (result.getSuccess()) { - LOG.debug("Purging TXID={} from block deletion log", - result.getTxID()); - scm.getScmBlockManager().getDeletedBlockLog() - .commitTransactions(Collections.singletonList(result.getTxID())); - } else { - LOG.warn("Got failed ACK for TXID={}, prepare to resend the " - + "TX in next interval", result.getTxID()); - } - } + scm.getScmBlockManager().getDeletedBlockLog() + .commitTransactions(resultList, UUID.fromString(acks.getDnId())); } return ContainerBlocksDeletionACKResponseProto.newBuilder() .getDefaultInstanceForType(); @@ -326,4 +312,16 @@ public void stop() { IOUtils.cleanupWithLogger(LOG, scm.getScmNodeManager()); } + /** + * Wrapper class for events with the datanode origin. + */ + public static class NodeRegistrationContainerReport extends + ReportFromDatanode { + + public NodeRegistrationContainerReport(DatanodeDetails datanodeDetails, + ContainerReportsProto report) { + super(datanodeDetails, report); + } + } + } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 568a86ab4fc..8e76606387d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -31,18 +31,41 @@ import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.block.BlockManager; import org.apache.hadoop.hdds.scm.block.BlockManagerImpl; +import org.apache.hadoop.hdds.scm.block.PendingDeleteHandler; +import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler; +import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler; +import org.apache.hadoop.hdds.scm.container.CloseContainerWatcher; +import org.apache.hadoop.hdds.scm.container.ContainerActionsHandler; import org.apache.hadoop.hdds.scm.container.ContainerMapping; +import org.apache.hadoop.hdds.scm.container.ContainerReportHandler; import org.apache.hadoop.hdds.scm.container.Mapping; +import org.apache.hadoop.hdds.scm.container.replication + .ReplicationActivityStatus; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.placement.algorithms + .ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.container.placement.algorithms + .SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.scm.container.placement.metrics.ContainerStat; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMMetrics; +import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; +import org.apache.hadoop.hdds.scm.node.DeadNodeHandler; +import org.apache.hadoop.hdds.scm.node.NewNodeHandler; import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeReportHandler; import org.apache.hadoop.hdds.scm.node.SCMNodeManager; +import org.apache.hadoop.hdds.scm.node.StaleNodeHandler; +import org.apache.hadoop.hdds.scm.node.states.Node2ContainerMap; +import org.apache.hadoop.hdds.scm.pipelines.PipelineCloseHandler; +import org.apache.hadoop.hdds.scm.pipelines.PipelineActionEventHandler; import org.apache.hadoop.hdds.server.ServiceRuntimeInfoImpl; +import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.io.IOUtils; @@ -52,9 +75,13 @@ import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.common.Storage.StorageState; import org.apache.hadoop.ozone.common.StorageInfo; +import org.apache.hadoop.ozone.lease.LeaseManager; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.StringUtils; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys + .HDDS_SCM_WATCHER_TIMEOUT_DEFAULT; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,6 +97,7 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DB_CACHE_SIZE_MB; + import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ENABLED; import static org.apache.hadoop.util.ExitUtil.terminate; @@ -124,6 +152,8 @@ private final Mapping scmContainerManager; private final BlockManager scmBlockManager; private final SCMStorage scmStorage; + + private final EventQueue eventQueue; /* * HTTP endpoint for JMX access. */ @@ -142,6 +172,13 @@ */ private Cache containerReportCache; + private final ReplicationManager replicationManager; + + private final LeaseManager commandWatcherLeaseManager; + + private final ReplicationActivityStatus replicationStatus; + private final SCMChillModeManager scmChillModeManager; + /** * Creates a new StorageContainerManager. Configuration will be updated * with information on the @@ -162,17 +199,70 @@ private StorageContainerManager(OzoneConfiguration conf) throws IOException { throw new SCMException("SCM not initialized.", ResultCodes .SCM_NOT_INITIALIZED); } - EventQueue eventQueue = new EventQueue(); - SCMNodeManager nm = new SCMNodeManager(conf, scmStorage.getClusterID(), this); - scmNodeManager = nm; - eventQueue.addHandler(SCMNodeManager.DATANODE_COMMAND, nm); + eventQueue = new EventQueue(); + + scmNodeManager = new SCMNodeManager( + conf, scmStorage.getClusterID(), this, eventQueue); + scmContainerManager = new ContainerMapping( + conf, getScmNodeManager(), cacheSize, eventQueue); + scmBlockManager = new BlockManagerImpl( + conf, getScmNodeManager(), scmContainerManager, eventQueue); + + Node2ContainerMap node2ContainerMap = new Node2ContainerMap(); + + replicationStatus = new ReplicationActivityStatus(); + + CloseContainerEventHandler closeContainerHandler = + new CloseContainerEventHandler(scmContainerManager); + NodeReportHandler nodeReportHandler = + new NodeReportHandler(scmNodeManager); + + CommandStatusReportHandler cmdStatusReportHandler = + new CommandStatusReportHandler(); + + NewNodeHandler newNodeHandler = new NewNodeHandler(node2ContainerMap); + StaleNodeHandler staleNodeHandler = + new StaleNodeHandler(node2ContainerMap, scmContainerManager); + DeadNodeHandler deadNodeHandler = new DeadNodeHandler(node2ContainerMap, + getScmContainerManager().getStateManager()); + ContainerActionsHandler actionsHandler = new ContainerActionsHandler(); + PendingDeleteHandler pendingDeleteHandler = + new PendingDeleteHandler(scmBlockManager.getSCMBlockDeletingService()); + + ContainerReportHandler containerReportHandler = + new ContainerReportHandler(scmContainerManager, node2ContainerMap, + replicationStatus); + scmChillModeManager = new SCMChillModeManager(conf, + getScmContainerManager().getStateManager().getAllContainers(), + eventQueue); + PipelineActionEventHandler pipelineActionEventHandler = + new PipelineActionEventHandler(); + + PipelineCloseHandler pipelineCloseHandler = + new PipelineCloseHandler(scmContainerManager); + + long watcherTimeout = + conf.getTimeDuration(ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT, + HDDS_SCM_WATCHER_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS); + + commandWatcherLeaseManager = new LeaseManager<>("CommandWatcher", + watcherTimeout); + + //TODO: support configurable containerPlacement policy + ContainerPlacementPolicy containerPlacementPolicy = + new SCMContainerPlacementCapacity(scmNodeManager, conf); - scmContainerManager = new ContainerMapping(conf, getScmNodeManager(), - cacheSize); + replicationManager = new ReplicationManager(containerPlacementPolicy, + scmContainerManager.getStateManager(), eventQueue, + commandWatcherLeaseManager); - scmBlockManager = - new BlockManagerImpl(conf, getScmNodeManager(), scmContainerManager); + // setup CloseContainer watcher + CloseContainerWatcher closeContainerWatcher = + new CloseContainerWatcher(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + SCMEvents.CLOSE_CONTAINER_STATUS, commandWatcherLeaseManager, + scmContainerManager); + closeContainerWatcher.start(eventQueue); scmAdminUsernames = conf.getTrimmedStringCollection(OzoneConfigKeys .OZONE_ADMINISTRATORS); @@ -187,6 +277,30 @@ private StorageContainerManager(OzoneConfiguration conf) throws IOException { clientProtocolServer = new SCMClientProtocolServer(conf, this); httpServer = new StorageContainerManagerHttpServer(conf); + eventQueue.addHandler(SCMEvents.DATANODE_COMMAND, scmNodeManager); + eventQueue.addHandler(SCMEvents.NODE_REPORT, nodeReportHandler); + eventQueue.addHandler(SCMEvents.CONTAINER_REPORT, containerReportHandler); + eventQueue.addHandler(SCMEvents.CONTAINER_ACTIONS, actionsHandler); + eventQueue.addHandler(SCMEvents.CLOSE_CONTAINER, closeContainerHandler); + eventQueue.addHandler(SCMEvents.NEW_NODE, newNodeHandler); + eventQueue.addHandler(SCMEvents.STALE_NODE, staleNodeHandler); + eventQueue.addHandler(SCMEvents.DEAD_NODE, deadNodeHandler); + eventQueue.addHandler(SCMEvents.CMD_STATUS_REPORT, cmdStatusReportHandler); + eventQueue.addHandler(SCMEvents.START_REPLICATION, + replicationStatus.getReplicationStatusListener()); + eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, + replicationStatus.getChillModeStatusListener()); + eventQueue + .addHandler(SCMEvents.PENDING_DELETE_STATUS, pendingDeleteHandler); + eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS, + pipelineActionEventHandler); + eventQueue.addHandler(SCMEvents.PIPELINE_CLOSE, pipelineCloseHandler); + eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + scmChillModeManager); + eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, + (BlockManagerImpl) scmBlockManager); + eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, clientProtocolServer); + registerMXBean(); } @@ -255,8 +369,6 @@ public static void main(String[] argv) throws IOException { hParser.printGenericCommandUsage(System.err); System.exit(1); } - StringUtils.startupShutdownMessage(StorageContainerManager.class, argv, - LOG); StorageContainerManager scm = createSCM(hParser.getRemainingArgs(), conf); if (scm != null) { scm.start(); @@ -272,9 +384,10 @@ private static void printUsage(PrintStream out) { out.println(USAGE + "\n"); } - public static StorageContainerManager createSCM(String[] argv, + public static StorageContainerManager createSCM(String[] args, OzoneConfiguration conf) throws IOException { + String[] argv = (args == null) ? new String[0] : args; if (!HddsUtils.isHddsEnabled(conf)) { System.err.println( "SCM cannot be started in secure mode or when " + OZONE_ENABLED + "" + @@ -289,9 +402,13 @@ public static StorageContainerManager createSCM(String[] argv, } switch (startOpt) { case INIT: + StringUtils.startupShutdownMessage(StorageContainerManager.class, argv, + LOG); terminate(scmInit(conf) ? 0 : 1); return null; case GENCLUSTERID: + StringUtils.startupShutdownMessage(StorageContainerManager.class, argv, + LOG); System.out.println("Generating new cluster id:"); System.out.println(StorageInfo.newClusterID()); terminate(0); @@ -301,6 +418,8 @@ public static StorageContainerManager createSCM(String[] argv, terminate(0); return null; default: + StringUtils.startupShutdownMessage(StorageContainerManager.class, argv, + LOG); return new StorageContainerManager(conf); } } @@ -508,6 +627,8 @@ public void start() throws IOException { "StorageContainerLocationProtocol RPC server", getClientRpcAddress())); DefaultMetricsSystem.initialize("StorageContainerManager"); + + commandWatcherLeaseManager.start(); getClientProtocolServer().start(); LOG.info(buildRpcServerStartMessage("ScmBlockLocationProtocol RPC " + @@ -520,7 +641,8 @@ public void start() throws IOException { httpServer.start(); scmBlockManager.start(); - + replicationStatus.start(); + replicationManager.start(); setStartTime(); } @@ -529,6 +651,28 @@ public void start() throws IOException { */ public void stop() { + try { + LOG.info("Stopping Replication Activity Status tracker."); + replicationStatus.close(); + } catch (Exception ex) { + LOG.error("Replication Activity Status tracker stop failed.", ex); + } + + + try { + LOG.info("Stopping Replication Manager Service."); + replicationManager.stop(); + } catch (Exception ex) { + LOG.error("Replication manager service stop failed.", ex); + } + + try { + LOG.info("Stopping Lease Manager of the command watchers"); + commandWatcherLeaseManager.shutdown(); + } catch (Exception ex) { + LOG.error("Lease Manager of the command watchers stop failed"); + } + try { LOG.info("Stopping datanode service RPC server"); getDatanodeProtocolServer().stop(); @@ -685,6 +829,22 @@ public ContainerStat getContainerReport(String datanodeUuid) { return id2StatMap; } + public boolean isInChillMode() { + return scmChillModeManager.getInChillMode(); + } + + /** + * Returns EventPublisher. + */ + public EventPublisher getEventQueue(){ + return eventQueue; + } + + @VisibleForTesting + public double getCurrentContainerThreshold() { + return scmChillModeManager.getCurrentContainerThreshold(); + } + /** * Startup options. */ diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java new file mode 100644 index 00000000000..50d1eedbbe7 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/HddsTestUtils.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm; + +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer + .NodeRegistrationContainerReport; + +/** + * Stateless helper functions for Hdds tests. + */ +public final class HddsTestUtils { + + private HddsTestUtils() { + } + + /** + * Create Command Status report object. + * + * @param numOfContainers number of containers to be included in report. + * @return CommandStatusReportsProto + */ + public static NodeRegistrationContainerReport + createNodeRegistrationContainerReport(int numOfContainers) { + return new NodeRegistrationContainerReport( + TestUtils.randomDatanodeDetails(), + TestUtils.getRandomContainerReports(numOfContainers)); + } + + /** + * Create NodeRegistrationContainerReport object. + * + * @param dnContainers List of containers to be included in report + * @return NodeRegistrationContainerReport + */ + public static NodeRegistrationContainerReport + createNodeRegistrationContainerReport(List dnContainers) { + List + containers = new ArrayList<>(); + dnContainers.forEach(c -> { + containers.add(TestUtils.getRandomContainerInfo(c.getContainerID())); + }); + return new NodeRegistrationContainerReport( + TestUtils.randomDatanodeDetails(), + TestUtils.getContainerReports(containers)); + } + + /** + * Creates list of ContainerInfo. + * + * @param numContainers number of ContainerInfo to be included in list. + * @return List + */ + public static List getContainerInfo(int numContainers) { + List containerInfoList = new ArrayList<>(); + for (int i = 0; i < numContainers; i++) { + ContainerInfo.Builder builder = new ContainerInfo.Builder(); + containerInfoList.add(builder + .setContainerID(RandomUtils.nextLong()) + .build()); + } + return containerInfoList; + } + +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestUtils.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestUtils.java index 7568bf313ba..7af9dda4fb0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestUtils.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/TestUtils.java @@ -17,113 +17,101 @@ package org.apache.hadoop.hdds.scm; import com.google.common.base.Preconditions; +import org.mockito.Mockito; +import static org.mockito.Mockito.when; + +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerInfo; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.protocol .proto.StorageContainerDatanodeProtocolProtos.NodeReportProto; +import org.apache.hadoop.hdds.protocol + .proto.StorageContainerDatanodeProtocolProtos.CommandStatus; +import org.apache.hadoop.hdds.protocol + .proto.StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.StorageReportProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.StorageTypeProto; +import org.apache.hadoop.hdds.scm.container.ContainerStateManager; +import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.node.SCMNodeManager; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.pipelines.PipelineSelector; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand; +import java.io.File; +import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; -import java.util.Random; import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; /** * Stateless helper functions to handler scm/datanode connection. */ public final class TestUtils { - private TestUtils() { - } - - public static DatanodeDetails getDatanodeDetails(SCMNodeManager nodeManager) { + private static ThreadLocalRandom random = ThreadLocalRandom.current(); - return getDatanodeDetails(nodeManager, UUID.randomUUID().toString()); + private TestUtils() { } /** - * Create a new DatanodeDetails with NodeID set to the string. + * Creates DatanodeDetails with random UUID. * - * @param uuid - node ID, it is generally UUID. - * @return DatanodeID. - */ - public static DatanodeDetails getDatanodeDetails(SCMNodeManager nodeManager, - String uuid) { - DatanodeDetails datanodeDetails = getDatanodeDetails(uuid); - nodeManager.register(datanodeDetails, null); - return datanodeDetails; - } - - /** - * Create Node Report object. - * @return NodeReportProto + * @return DatanodeDetails */ - public static NodeReportProto createNodeReport( - List reports) { - NodeReportProto.Builder nodeReport = NodeReportProto.newBuilder(); - nodeReport.addAllStorageReport(reports); - return nodeReport.build(); + public static DatanodeDetails randomDatanodeDetails() { + return createDatanodeDetails(UUID.randomUUID()); } /** - * Create SCM Storage Report object. - * @return list of SCMStorageReport + * Creates DatanodeDetails using the given UUID. + * + * @param uuid Datanode's UUID + * + * @return DatanodeDetails */ - public static List createStorageReport(long capacity, - long used, long remaining, String path, StorageTypeProto type, String id, - int count) { - List reportList = new ArrayList<>(); - for (int i = 0; i < count; i++) { - Preconditions.checkNotNull(path); - Preconditions.checkNotNull(id); - StorageReportProto.Builder srb = StorageReportProto.newBuilder(); - srb.setStorageUuid(id).setStorageLocation(path).setCapacity(capacity) - .setScmUsed(used).setRemaining(remaining); - StorageTypeProto storageTypeProto = - type == null ? StorageTypeProto.DISK : type; - srb.setStorageType(storageTypeProto); - reportList.add(srb.build()); - } - return reportList; + private static DatanodeDetails createDatanodeDetails(UUID uuid) { + String ipAddress = random.nextInt(256) + + "." + random.nextInt(256) + + "." + random.nextInt(256) + + "." + random.nextInt(256); + return createDatanodeDetails(uuid.toString(), "localhost", ipAddress); } - /** - * Get specified number of DatanodeDetails and registered them with node - * manager. + * Generates DatanodeDetails from RegisteredCommand. + * + * @param registeredCommand registration response from SCM * - * @param nodeManager - node manager to register the datanode ids. - * @param count - number of DatanodeDetails needed. - * @return + * @return DatanodeDetails */ - public static List getListOfRegisteredDatanodeDetails( - SCMNodeManager nodeManager, int count) { - ArrayList datanodes = new ArrayList<>(); - for (int i = 0; i < count; i++) { - datanodes.add(getDatanodeDetails(nodeManager)); - } - return datanodes; + public static DatanodeDetails getDatanodeDetails( + RegisteredCommand registeredCommand) { + return createDatanodeDetails(registeredCommand.getDatanodeUUID(), + registeredCommand.getHostName(), registeredCommand.getIpAddress()); } /** - * Get a datanode details. + * Creates DatanodeDetails with the given information. + * + * @param uuid Datanode's UUID + * @param hostname hostname of Datanode + * @param ipAddress ip address of Datanode * * @return DatanodeDetails */ - public static DatanodeDetails getDatanodeDetails() { - return getDatanodeDetails(UUID.randomUUID().toString()); - } - - private static DatanodeDetails getDatanodeDetails(String uuid) { - Random random = new Random(); - String ipAddress = - random.nextInt(256) + "." + random.nextInt(256) + "." + random - .nextInt(256) + "." + random.nextInt(256); - - String hostName = uuid; + private static DatanodeDetails createDatanodeDetails(String uuid, + String hostname, String ipAddress) { DatanodeDetails.Port containerPort = DatanodeDetails.newPort( DatanodeDetails.Port.Name.STANDALONE, 0); DatanodeDetails.Port ratisPort = DatanodeDetails.newPort( @@ -132,7 +120,7 @@ private static DatanodeDetails getDatanodeDetails(String uuid) { DatanodeDetails.Port.Name.REST, 0); DatanodeDetails.Builder builder = DatanodeDetails.newBuilder(); builder.setUuid(uuid) - .setHostName("localhost") + .setHostName(hostname) .setIpAddress(ipAddress) .addPort(containerPort) .addPort(ratisPort) @@ -141,16 +129,297 @@ private static DatanodeDetails getDatanodeDetails(String uuid) { } /** - * Get specified number of list of DatanodeDetails. + * Creates a random DatanodeDetails and register it with the given + * NodeManager. * - * @param count - number of datanode IDs needed. - * @return + * @param nodeManager NodeManager + * + * @return DatanodeDetails + */ + public static DatanodeDetails createRandomDatanodeAndRegister( + SCMNodeManager nodeManager) { + return getDatanodeDetails( + nodeManager.register(randomDatanodeDetails(), null)); + } + + /** + * Get specified number of DatanodeDetails and register them with node + * manager. + * + * @param nodeManager node manager to register the datanode ids. + * @param count number of DatanodeDetails needed. + * + * @return list of DatanodeDetails */ - public static List getListOfDatanodeDetails(int count) { + public static List getListOfRegisteredDatanodeDetails( + SCMNodeManager nodeManager, int count) { ArrayList datanodes = new ArrayList<>(); for (int i = 0; i < count; i++) { - datanodes.add(getDatanodeDetails()); + datanodes.add(createRandomDatanodeAndRegister(nodeManager)); } return datanodes; } + + /** + * Generates a random NodeReport. + * + * @return NodeReportProto + */ + public static NodeReportProto getRandomNodeReport() { + return getRandomNodeReport(1); + } + + /** + * Generates random NodeReport with the given number of storage report in it. + * + * @param numberOfStorageReport number of storage report this node report + * should have + * @return NodeReportProto + */ + public static NodeReportProto getRandomNodeReport(int numberOfStorageReport) { + UUID nodeId = UUID.randomUUID(); + return getRandomNodeReport(nodeId, File.separator + nodeId, + numberOfStorageReport); + } + + /** + * Generates random NodeReport for the given nodeId with the given + * base path and number of storage report in it. + * + * @param nodeId datanode id + * @param basePath base path of storage directory + * @param numberOfStorageReport number of storage report + * + * @return NodeReportProto + */ + public static NodeReportProto getRandomNodeReport(UUID nodeId, + String basePath, int numberOfStorageReport) { + List storageReports = new ArrayList<>(); + for (int i = 0; i < numberOfStorageReport; i++) { + storageReports.add(getRandomStorageReport(nodeId, + basePath + File.separator + i)); + } + return createNodeReport(storageReports); + } + + /** + * Creates NodeReport with the given storage reports. + * + * @param reports one or more storage report + * + * @return NodeReportProto + */ + public static NodeReportProto createNodeReport( + StorageReportProto... reports) { + return createNodeReport(Arrays.asList(reports)); + } + + /** + * Creates NodeReport with the given storage reports. + * + * @param reports storage reports to be included in the node report. + * + * @return NodeReportProto + */ + public static NodeReportProto createNodeReport( + List reports) { + NodeReportProto.Builder nodeReport = NodeReportProto.newBuilder(); + nodeReport.addAllStorageReport(reports); + return nodeReport.build(); + } + + /** + * Generates random storage report. + * + * @param nodeId datanode id for which the storage report belongs to + * @param path path of the storage + * + * @return StorageReportProto + */ + public static StorageReportProto getRandomStorageReport(UUID nodeId, + String path) { + return createStorageReport(nodeId, path, + random.nextInt(1000), + random.nextInt(500), + random.nextInt(500), + StorageTypeProto.DISK); + } + + /** + * Creates storage report with the given information. + * + * @param nodeId datanode id + * @param path storage dir + * @param capacity storage size + * @param used space used + * @param remaining space remaining + * @param type type of storage + * + * @return StorageReportProto + */ + public static StorageReportProto createStorageReport(UUID nodeId, String path, + long capacity, long used, long remaining, StorageTypeProto type) { + Preconditions.checkNotNull(nodeId); + Preconditions.checkNotNull(path); + StorageReportProto.Builder srb = StorageReportProto.newBuilder(); + srb.setStorageUuid(nodeId.toString()) + .setStorageLocation(path) + .setCapacity(capacity) + .setScmUsed(used) + .setRemaining(remaining); + StorageTypeProto storageTypeProto = + type == null ? StorageTypeProto.DISK : type; + srb.setStorageType(storageTypeProto); + return srb.build(); + } + + + /** + * Generates random container reports. + * + * @return ContainerReportsProto + */ + public static ContainerReportsProto getRandomContainerReports() { + return getRandomContainerReports(1); + } + + /** + * Generates random container report with the given number of containers. + * + * @param numberOfContainers number of containers to be in container report + * + * @return ContainerReportsProto + */ + public static ContainerReportsProto getRandomContainerReports( + int numberOfContainers) { + List containerInfos = new ArrayList<>(); + for (int i = 0; i < numberOfContainers; i++) { + containerInfos.add(getRandomContainerInfo(i)); + } + return getContainerReports(containerInfos); + } + + /** + * Creates container report with the given ContainerInfo(s). + * + * @param containerInfos one or more ContainerInfo + * + * @return ContainerReportsProto + */ + public static ContainerReportsProto getContainerReports( + ContainerInfo... containerInfos) { + return getContainerReports(Arrays.asList(containerInfos)); + } + + /** + * Creates container report with the given ContainerInfo(s). + * + * @param containerInfos list of ContainerInfo + * + * @return ContainerReportsProto + */ + public static ContainerReportsProto getContainerReports( + List containerInfos) { + ContainerReportsProto.Builder + reportsBuilder = ContainerReportsProto.newBuilder(); + for (ContainerInfo containerInfo : containerInfos) { + reportsBuilder.addReports(containerInfo); + } + return reportsBuilder.build(); + } + + /** + * Generates random ContainerInfo. + * + * @param containerId container id of the ContainerInfo + * + * @return ContainerInfo + */ + public static ContainerInfo getRandomContainerInfo(long containerId) { + return createContainerInfo(containerId, + OzoneConsts.GB * 5, + random.nextLong(1000), + OzoneConsts.GB * random.nextInt(5), + random.nextLong(1000), + OzoneConsts.GB * random.nextInt(2), + random.nextLong(1000), + OzoneConsts.GB * random.nextInt(5)); + } + + /** + * Creates ContainerInfo with the given details. + * + * @param containerId id of the container + * @param size size of container + * @param keyCount number of keys + * @param bytesUsed bytes used by the container + * @param readCount number of reads + * @param readBytes bytes read + * @param writeCount number of writes + * @param writeBytes bytes written + * + * @return ContainerInfo + */ + public static ContainerInfo createContainerInfo( + long containerId, long size, long keyCount, long bytesUsed, + long readCount, long readBytes, long writeCount, long writeBytes) { + return ContainerInfo.newBuilder() + .setContainerID(containerId) + .setSize(size) + .setKeyCount(keyCount) + .setUsed(bytesUsed) + .setReadCount(readCount) + .setReadBytes(readBytes) + .setWriteCount(writeCount) + .setWriteBytes(writeBytes) + .build(); + } + + /** + * Create Command Status report object. + * @return CommandStatusReportsProto + */ + public static CommandStatusReportsProto createCommandStatusReport( + List reports) { + CommandStatusReportsProto.Builder report = CommandStatusReportsProto + .newBuilder(); + report.addAllCmdStatus(reports); + return report.build(); + } + + public static + org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo + allocateContainer(ContainerStateManager containerStateManager) + throws IOException { + + PipelineSelector pipelineSelector = Mockito.mock(PipelineSelector.class); + + Pipeline pipeline = new Pipeline("leader", HddsProtos.LifeCycleState.CLOSED, + HddsProtos.ReplicationType.STAND_ALONE, + HddsProtos.ReplicationFactor.THREE, + PipelineID.randomId()); + + when(pipelineSelector + .getReplicationPipeline(HddsProtos.ReplicationType.STAND_ALONE, + HddsProtos.ReplicationFactor.THREE)).thenReturn(pipeline); + + return containerStateManager + .allocateContainer(pipelineSelector, + HddsProtos.ReplicationType.STAND_ALONE, + HddsProtos.ReplicationFactor.THREE, "root").getContainerInfo(); + + } + + public static void closeContainer(ContainerStateManager containerStateManager, + org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo + container) + throws SCMException { + + containerStateManager.getContainerStateMap() + .updateState(container, container.getState(), LifeCycleState.CLOSING); + + containerStateManager.getContainerStateMap() + .updateState(container, container.getState(), LifeCycleState.CLOSED); + + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java index 9fbb9fa5f55..e70e44405d2 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java @@ -17,20 +17,28 @@ package org.apache.hadoop.hdds.scm.block; +import java.util.UUID; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerMapping; import org.apache.hadoop.hdds.scm.container.MockNodeManager; import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.server.SCMStorage; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.common.Storage.StorageState; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.test.GenericTestUtils; -import org.junit.AfterClass; +import org.junit.After; import org.junit.Assert; import org.junit.Before; -import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -38,8 +46,8 @@ import java.io.File; import java.io.IOException; import java.nio.file.Paths; -import java.util.Collections; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ENABLED; import static org.apache.hadoop.ozone.OzoneConsts.GB; import static org.apache.hadoop.ozone.OzoneConsts.MB; @@ -47,7 +55,7 @@ /** * Tests for SCM Block Manager. */ -public class TestBlockManager { +public class TestBlockManager implements EventHandler { private static ContainerMapping mapping; private static MockNodeManager nodeManager; private static BlockManagerImpl blockManager; @@ -56,25 +64,30 @@ private static HddsProtos.ReplicationFactor factor; private static HddsProtos.ReplicationType type; private static String containerOwner = "OZONE"; + private static EventQueue eventQueue; @Rule public ExpectedException thrown = ExpectedException.none(); - @BeforeClass - public static void setUp() throws Exception { + + @Before + public void setUp() throws Exception { Configuration conf = SCMTestUtils.getConf(); String path = GenericTestUtils .getTempPath(TestBlockManager.class.getSimpleName()); - - conf.set(OzoneConfigKeys.OZONE_METADATA_DIRS, path); testDir = Paths.get(path).toFile(); + conf.set(OzoneConfigKeys.OZONE_METADATA_DIRS, path); + eventQueue = new EventQueue(); boolean folderExisted = testDir.exists() || testDir.mkdirs(); if (!folderExisted) { throw new IOException("Unable to create test directory path"); } nodeManager = new MockNodeManager(true, 10); - mapping = new ContainerMapping(conf, nodeManager, 128); - blockManager = new BlockManagerImpl(conf, nodeManager, mapping); + mapping = new ContainerMapping(conf, nodeManager, 128, eventQueue); + blockManager = new BlockManagerImpl(conf, + nodeManager, mapping, eventQueue); + eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, blockManager); + eventQueue.addHandler(SCMEvents.START_REPLICATION, this); if(conf.getBoolean(ScmConfigKeys.DFS_CONTAINER_RATIS_ENABLED_KEY, ScmConfigKeys.DFS_CONTAINER_RATIS_ENABLED_DEFAULT)){ factor = HddsProtos.ReplicationFactor.THREE; @@ -85,52 +98,45 @@ public static void setUp() throws Exception { } } - @AfterClass - public static void cleanup() throws IOException { + @After + public void cleanup() throws IOException { blockManager.close(); mapping.close(); FileUtil.fullyDelete(testDir); } - @Before - public void clearChillMode() { - nodeManager.setChillmode(false); + private static StorageContainerManager getScm(OzoneConfiguration conf) + throws IOException { + conf.setBoolean(OZONE_ENABLED, true); + SCMStorage scmStore = new SCMStorage(conf); + if(scmStore.getState() != StorageState.INITIALIZED) { + String clusterId = UUID.randomUUID().toString(); + String scmId = UUID.randomUUID().toString(); + scmStore.setClusterId(clusterId); + scmStore.setScmId(scmId); + // writes the version file properties + scmStore.initialize(); + } + return StorageContainerManager.createSCM(null, conf); } @Test public void testAllocateBlock() throws Exception { + eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, false); + GenericTestUtils.waitFor(() -> { + return !blockManager.isScmInChillMode(); + }, 10, 1000 * 5); AllocatedBlock block = blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, type, factor, containerOwner); Assert.assertNotNull(block); } @Test - public void testDeleteBlock() throws Exception { - AllocatedBlock block = blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, - type, factor, containerOwner); - Assert.assertNotNull(block); - long transactionId = - mapping.getContainer(block.getBlockID().getContainerID()) - .getDeleteTransactionId(); - Assert.assertEquals(0, transactionId); - blockManager.deleteBlocks(Collections.singletonList( - block.getBlockID())); - Assert.assertEquals(++transactionId, - mapping.getContainer(block.getBlockID().getContainerID()) - .getDeleteTransactionId()); - - block = blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, - type, factor, containerOwner); - Assert.assertNotNull(block); - blockManager.deleteBlocks(Collections.singletonList( - block.getBlockID())); - Assert.assertEquals(++transactionId, - mapping.getContainer(block.getBlockID().getContainerID()) - .getDeleteTransactionId()); - } - - @Test - public void testAllocateOversizedBlock() throws IOException { + public void testAllocateOversizedBlock() throws Exception { + eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, false); + GenericTestUtils.waitFor(() -> { + return !blockManager.isScmInChillMode(); + }, 10, 1000 * 5); long size = 6 * GB; thrown.expectMessage("Unsupported block size"); AllocatedBlock block = blockManager.allocateBlock(size, @@ -139,10 +145,31 @@ public void testAllocateOversizedBlock() throws IOException { @Test - public void testChillModeAllocateBlockFails() throws IOException { - nodeManager.setChillmode(true); - thrown.expectMessage("Unable to create block while in chill mode"); + public void testAllocateBlockFailureInChillMode() throws Exception { + eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, true); + GenericTestUtils.waitFor(() -> { + return blockManager.isScmInChillMode(); + }, 10, 1000 * 5); + // Test1: In chill mode expect an SCMException. + thrown.expectMessage("ChillModePrecheck failed for " + + "allocateBlock"); blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, type, factor, containerOwner); } + + @Test + public void testAllocateBlockSucInChillMode() throws Exception { + // Test2: Exit chill mode and then try allocateBock again. + eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, false); + GenericTestUtils.waitFor(() -> { + return !blockManager.isScmInChillMode(); + }, 10, 1000 * 5); + Assert.assertNotNull(blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, + type, factor, containerOwner)); + } + + @Override + public void onMessage(Boolean aBoolean, EventPublisher publisher) { + System.out.println("test"); + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java index 9255ec701ff..9f0e336df1f 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -32,6 +33,9 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto + .DeleteBlockTransactionResult; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.utils.MetadataKeyFilters; import org.apache.hadoop.utils.MetadataStore; @@ -45,6 +49,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -56,7 +61,8 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_BLOCK_DELETION_MAX_RETRY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_DIRS; -import static org.mockito.Mockito.mock; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.when; /** * Tests for DeletedBlockLog. @@ -66,6 +72,8 @@ private static DeletedBlockLogImpl deletedBlockLog; private OzoneConfiguration conf; private File testDir; + private Mapping containerManager; + private List dnList; @Before public void setup() throws Exception { @@ -74,7 +82,36 @@ public void setup() throws Exception { conf = new OzoneConfiguration(); conf.setInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20); conf.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath()); - deletedBlockLog = new DeletedBlockLogImpl(conf); + containerManager = Mockito.mock(ContainerMapping.class); + deletedBlockLog = new DeletedBlockLogImpl(conf, containerManager); + dnList = new ArrayList<>(3); + setupContainerManager(); + } + + private void setupContainerManager() throws IOException { + dnList.add( + DatanodeDetails.newBuilder().setUuid(UUID.randomUUID().toString()) + .build()); + dnList.add( + DatanodeDetails.newBuilder().setUuid(UUID.randomUUID().toString()) + .build()); + dnList.add( + DatanodeDetails.newBuilder().setUuid(UUID.randomUUID().toString()) + .build()); + + ContainerInfo containerInfo = + new ContainerInfo.Builder().setContainerID(1).build(); + Pipeline pipeline = + new Pipeline(null, LifeCycleState.CLOSED, + ReplicationType.RATIS, ReplicationFactor.THREE, null); + pipeline.addMember(dnList.get(0)); + pipeline.addMember(dnList.get(1)); + pipeline.addMember(dnList.get(2)); + ContainerWithPipeline containerWithPipeline = + new ContainerWithPipeline(containerInfo, pipeline); + when(containerManager.getContainerWithPipeline(anyLong())) + .thenReturn(containerWithPipeline); + when(containerManager.getContainer(anyLong())).thenReturn(containerInfo); } @After @@ -101,45 +138,50 @@ public void tearDown() throws Exception { return blockMap; } - @Test - public void testGetTransactions() throws Exception { - List blocks = - deletedBlockLog.getTransactions(30); - Assert.assertEquals(0, blocks.size()); - - // Creates 40 TX in the log. - for (Map.Entry> entry : generateData(40).entrySet()){ - deletedBlockLog.addTransaction(entry.getKey(), entry.getValue()); + private void commitTransactions( + List transactionResults, + DatanodeDetails... dns) { + for (DatanodeDetails dnDetails : dns) { + deletedBlockLog + .commitTransactions(transactionResults, dnDetails.getUuid()); } + } - // Get first 30 TXs. - blocks = deletedBlockLog.getTransactions(30); - Assert.assertEquals(30, blocks.size()); - for (int i = 0; i < 30; i++) { - Assert.assertEquals(i + 1, blocks.get(i).getTxID()); - } + private void commitTransactions( + List transactionResults) { + commitTransactions(transactionResults, + dnList.toArray(new DatanodeDetails[3])); + } - // Get another 30 TXs. - // The log only 10 left, so this time it will only return 10 TXs. - blocks = deletedBlockLog.getTransactions(30); - Assert.assertEquals(10, blocks.size()); - for (int i = 30; i < 40; i++) { - Assert.assertEquals(i + 1, blocks.get(i - 30).getTxID()); - } + private void commitTransactions( + Collection deletedBlocksTransactions, + DatanodeDetails... dns) { + commitTransactions(deletedBlocksTransactions.stream() + .map(this::createDeleteBlockTransactionResult) + .collect(Collectors.toList()), dns); + } - // Get another 50 TXs. - // By now the position should have moved to the beginning, - // this call will return all 40 TXs. - blocks = deletedBlockLog.getTransactions(50); - Assert.assertEquals(40, blocks.size()); - for (int i = 0; i < 40; i++) { - Assert.assertEquals(i + 1, blocks.get(i).getTxID()); - } - List txIDs = new ArrayList<>(); - for (DeletedBlocksTransaction block : blocks) { - txIDs.add(block.getTxID()); - } - deletedBlockLog.commitTransactions(txIDs); + private void commitTransactions( + Collection deletedBlocksTransactions) { + commitTransactions(deletedBlocksTransactions.stream() + .map(this::createDeleteBlockTransactionResult) + .collect(Collectors.toList())); + } + + private DeleteBlockTransactionResult createDeleteBlockTransactionResult( + DeletedBlocksTransaction transaction) { + return DeleteBlockTransactionResult.newBuilder() + .setContainerID(transaction.getContainerID()).setSuccess(true) + .setTxID(transaction.getTxID()).build(); + } + + private List getTransactions( + int maximumAllowedTXNum) throws IOException { + DatanodeDeletedBlockTransactions transactions = + new DatanodeDeletedBlockTransactions(containerManager, + maximumAllowedTXNum, 3); + deletedBlockLog.getTransactions(transactions); + return transactions.getDatanodeTransactions(dnList.get(0).getUuid()); } @Test @@ -153,7 +195,7 @@ public void testIncrementCount() throws Exception { // This will return all TXs, total num 30. List blocks = - deletedBlockLog.getTransactions(40); + getTransactions(40); List txIDs = blocks.stream().map(DeletedBlocksTransaction::getTxID) .collect(Collectors.toList()); @@ -164,13 +206,13 @@ public void testIncrementCount() throws Exception { // Increment another time so it exceed the maxRetry. // On this call, count will be set to -1 which means TX eventually fails. deletedBlockLog.incrementCount(txIDs); - blocks = deletedBlockLog.getTransactions(40); + blocks = getTransactions(40); for (DeletedBlocksTransaction block : blocks) { Assert.assertEquals(-1, block.getCount()); } // If all TXs are failed, getTransactions call will always return nothing. - blocks = deletedBlockLog.getTransactions(40); + blocks = getTransactions(40); Assert.assertEquals(blocks.size(), 0); } @@ -180,16 +222,26 @@ public void testCommitTransactions() throws Exception { deletedBlockLog.addTransaction(entry.getKey(), entry.getValue()); } List blocks = - deletedBlockLog.getTransactions(20); - List txIDs = new ArrayList<>(); - for (DeletedBlocksTransaction block : blocks) { - txIDs.add(block.getTxID()); - } - // Add an invalid txID. - txIDs.add(70L); - deletedBlockLog.commitTransactions(txIDs); - blocks = deletedBlockLog.getTransactions(50); + getTransactions(20); + // Add an invalid txn. + blocks.add( + DeletedBlocksTransaction.newBuilder().setContainerID(1).setTxID(70) + .setCount(0).addLocalID(0).build()); + commitTransactions(blocks); + blocks.remove(blocks.size() - 1); + + blocks = getTransactions(50); + Assert.assertEquals(30, blocks.size()); + commitTransactions(blocks, dnList.get(1), dnList.get(2), + DatanodeDetails.newBuilder().setUuid(UUID.randomUUID().toString()) + .build()); + + blocks = getTransactions(50); Assert.assertEquals(30, blocks.size()); + commitTransactions(blocks, dnList.get(0)); + + blocks = getTransactions(50); + Assert.assertEquals(0, blocks.size()); } @Test @@ -213,20 +265,16 @@ public void testRandomOperateTransactions() throws Exception { } added += 10; } else if (state == 1) { - blocks = deletedBlockLog.getTransactions(20); + blocks = getTransactions(20); txIDs = new ArrayList<>(); for (DeletedBlocksTransaction block : blocks) { txIDs.add(block.getTxID()); } deletedBlockLog.incrementCount(txIDs); } else if (state == 2) { - txIDs = new ArrayList<>(); - for (DeletedBlocksTransaction block : blocks) { - txIDs.add(block.getTxID()); - } + commitTransactions(blocks); + committed += blocks.size(); blocks = new ArrayList<>(); - committed += txIDs.size(); - deletedBlockLog.commitTransactions(txIDs); } else { // verify the number of added and committed. List> result = @@ -234,6 +282,8 @@ public void testRandomOperateTransactions() throws Exception { Assert.assertEquals(added, result.size() + committed); } } + blocks = getTransactions(1000); + commitTransactions(blocks); } @Test @@ -244,16 +294,13 @@ public void testPersistence() throws Exception { // close db and reopen it again to make sure // transactions are stored persistently. deletedBlockLog.close(); - deletedBlockLog = new DeletedBlockLogImpl(conf); + deletedBlockLog = new DeletedBlockLogImpl(conf, containerManager); List blocks = - deletedBlockLog.getTransactions(10); - List txIDs = new ArrayList<>(); - for (DeletedBlocksTransaction block : blocks) { - txIDs.add(block.getTxID()); - } - deletedBlockLog.commitTransactions(txIDs); - blocks = deletedBlockLog.getTransactions(10); - Assert.assertEquals(10, blocks.size()); + getTransactions(10); + commitTransactions(blocks); + blocks = getTransactions(100); + Assert.assertEquals(40, blocks.size()); + commitTransactions(blocks); } @Test @@ -262,32 +309,11 @@ public void testDeletedBlockTransactions() throws IOException { int maximumAllowedTXNum = 5; List blocks = null; List containerIDs = new LinkedList<>(); + DatanodeDetails dnId1 = dnList.get(0), dnId2 = dnList.get(1); int count = 0; long containerID = 0L; - DatanodeDetails.Port containerPort = DatanodeDetails.newPort( - DatanodeDetails.Port.Name.STANDALONE, 0); - DatanodeDetails.Port ratisPort = DatanodeDetails.newPort( - DatanodeDetails.Port.Name.RATIS, 0); - DatanodeDetails.Port restPort = DatanodeDetails.newPort( - DatanodeDetails.Port.Name.REST, 0); - DatanodeDetails dnId1 = DatanodeDetails.newBuilder() - .setUuid(UUID.randomUUID().toString()) - .setIpAddress("127.0.0.1") - .setHostName("localhost") - .addPort(containerPort) - .addPort(ratisPort) - .addPort(restPort) - .build(); - DatanodeDetails dnId2 = DatanodeDetails.newBuilder() - .setUuid(UUID.randomUUID().toString()) - .setIpAddress("127.0.0.1") - .setHostName("localhost") - .addPort(containerPort) - .addPort(ratisPort) - .addPort(restPort) - .build(); - Mapping mappingService = mock(ContainerMapping.class); + // Creates {TXNum} TX in the log. for (Map.Entry> entry : generateData(txNum) .entrySet()) { @@ -298,29 +324,25 @@ public void testDeletedBlockTransactions() throws IOException { // make TX[1-6] for datanode1; TX[7-10] for datanode2 if (count <= (maximumAllowedTXNum + 1)) { - mockContainerInfo(mappingService, containerID, dnId1); + mockContainerInfo(containerID, dnId1); } else { - mockContainerInfo(mappingService, containerID, dnId2); + mockContainerInfo(containerID, dnId2); } } DatanodeDeletedBlockTransactions transactions = - new DatanodeDeletedBlockTransactions(mappingService, + new DatanodeDeletedBlockTransactions(containerManager, maximumAllowedTXNum, 2); deletedBlockLog.getTransactions(transactions); - List txIDs = new LinkedList<>(); for (UUID id : transactions.getDatanodeIDs()) { List txs = transactions .getDatanodeTransactions(id); - for (DeletedBlocksTransaction tx : txs) { - txIDs.add(tx.getTxID()); - } + // delete TX ID + commitTransactions(txs); } - // delete TX ID - deletedBlockLog.commitTransactions(txIDs); - blocks = deletedBlockLog.getTransactions(txNum); + blocks = getTransactions(txNum); // There should be one block remained since dnID1 reaches // the maximum value (5). Assert.assertEquals(1, blocks.size()); @@ -337,7 +359,8 @@ public void testDeletedBlockTransactions() throws IOException { builder.setTxID(11); builder.setContainerID(containerID); builder.setCount(0); - transactions.addTransaction(builder.build()); + transactions.addTransaction(builder.build(), + null); // The number of TX in dnID2 should not be changed. Assert.assertEquals(size, @@ -349,30 +372,32 @@ public void testDeletedBlockTransactions() throws IOException { builder.setTxID(12); builder.setContainerID(containerID); builder.setCount(0); - mockContainerInfo(mappingService, containerID, dnId2); - transactions.addTransaction(builder.build()); + mockContainerInfo(containerID, dnId2); + transactions.addTransaction(builder.build(), + null); // Since all node are full, then transactions is full. Assert.assertTrue(transactions.isFull()); } - private void mockContainerInfo(Mapping mappingService, long containerID, - DatanodeDetails dd) throws IOException { + private void mockContainerInfo(long containerID, DatanodeDetails dd) + throws IOException { Pipeline pipeline = new Pipeline("fake", LifeCycleState.OPEN, - ReplicationType.STAND_ALONE, ReplicationFactor.ONE, "fake"); + ReplicationType.STAND_ALONE, ReplicationFactor.ONE, + PipelineID.randomId()); pipeline.addMember(dd); ContainerInfo.Builder builder = new ContainerInfo.Builder(); - builder.setPipelineName(pipeline.getPipelineName()) + builder.setPipelineID(pipeline.getId()) .setReplicationType(pipeline.getType()) .setReplicationFactor(pipeline.getFactor()); ContainerInfo containerInfo = builder.build(); ContainerWithPipeline containerWithPipeline = new ContainerWithPipeline( containerInfo, pipeline); - Mockito.doReturn(containerInfo).when(mappingService) + Mockito.doReturn(containerInfo).when(containerManager) .getContainer(containerID); - Mockito.doReturn(containerWithPipeline).when(mappingService) + Mockito.doReturn(containerWithPipeline).when(containerManager) .getContainerWithPipeline(containerID); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/command/TestCommandStatusReportHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/command/TestCommandStatusReportHandler.java new file mode 100644 index 00000000000..65a2e296318 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/command/TestCommandStatusReportHandler.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.command; + +import org.apache.hadoop.hdds.HddsIdFactory; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatus; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .CommandStatusReportFromDatanode; + +import org.apache.hadoop.hdds.server.events.Event; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + +public class TestCommandStatusReportHandler implements EventPublisher { + + private static final Logger LOG = LoggerFactory + .getLogger(TestCommandStatusReportHandler.class); + private CommandStatusReportHandler cmdStatusReportHandler; + private String storagePath = GenericTestUtils.getRandomizedTempPath() + .concat("/" + UUID.randomUUID().toString()); + + @Before + public void setup() { + cmdStatusReportHandler = new CommandStatusReportHandler(); + } + + @Test + public void testCommandStatusReport() { + GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer + .captureLogs(LOG); + + CommandStatusReportFromDatanode report = this.getStatusReport(Collections + .emptyList()); + cmdStatusReportHandler.onMessage(report, this); + assertFalse(logCapturer.getOutput().contains("DeleteBlockCommandStatus")); + assertFalse(logCapturer.getOutput().contains( + "CloseContainerCommandStatus")); + assertFalse(logCapturer.getOutput().contains("ReplicateCommandStatus")); + + + report = this.getStatusReport(this.getCommandStatusList()); + cmdStatusReportHandler.onMessage(report, this); + assertTrue(logCapturer.getOutput().contains("firing event of type " + + "DeleteBlockCommandStatus")); + assertTrue(logCapturer.getOutput().contains("firing event of type " + + "CloseContainerCommandStatus")); + assertTrue(logCapturer.getOutput().contains("firing event of type " + + "ReplicateCommandStatus")); + + assertTrue(logCapturer.getOutput().contains("type: " + + "closeContainerCommand")); + assertTrue(logCapturer.getOutput().contains("type: " + + "deleteBlocksCommand")); + assertTrue(logCapturer.getOutput().contains("type: " + + "replicateContainerCommand")); + + } + + private CommandStatusReportFromDatanode getStatusReport( + List reports) { + CommandStatusReportsProto report = TestUtils.createCommandStatusReport( + reports); + DatanodeDetails dn = TestUtils.randomDatanodeDetails(); + return new SCMDatanodeHeartbeatDispatcher.CommandStatusReportFromDatanode( + dn, report); + } + + @Override + public > void fireEvent + (EVENT_TYPE event, PAYLOAD payload) { + LOG.info("firing event of type {}, payload {}", event.getName(), payload + .toString()); + } + + private List getCommandStatusList() { + List reports = new ArrayList<>(3); + + // Add status message for replication, close container and delete block + // command. + CommandStatus.Builder builder = CommandStatus.newBuilder(); + + builder.setCmdId(HddsIdFactory.getLongId()) + .setStatus(CommandStatus.Status.EXECUTED) + .setType(Type.deleteBlocksCommand); + reports.add(builder.build()); + + builder.setCmdId(HddsIdFactory.getLongId()) + .setStatus(CommandStatus.Status.EXECUTED) + .setType(Type.closeContainerCommand); + reports.add(builder.build()); + + builder.setMsg("Not enough space") + .setCmdId(HddsIdFactory.getLongId()) + .setStatus(CommandStatus.Status.FAILED) + .setType(Type.replicateContainerCommand); + reports.add(builder.build()); + return reports; + } + +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/command/package-info.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/command/package-info.java new file mode 100644 index 00000000000..f529c20e74e --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/command/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +/** + * Make CheckStyle Happy. + */ +package org.apache.hadoop.hdds.scm.command; \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java index 33579928798..088b7005cd0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.hdds.scm.container; +import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; import org.apache.hadoop.hdds.scm.node.NodeManager; @@ -26,8 +27,10 @@ .StorageContainerDatanodeProtocolProtos.NodeReportProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto; +import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.protocol.VersionResponse; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.assertj.core.util.Preconditions; @@ -39,7 +42,6 @@ import java.util.Map; import java.util.UUID; -import static org.apache.hadoop.hdds.scm.TestUtils.getDatanodeDetails; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState .HEALTHY; @@ -78,7 +80,7 @@ public MockNodeManager(boolean initializeFakeNodes, int nodeCount) { aggregateStat = new SCMNodeStat(); if (initializeFakeNodes) { for (int x = 0; x < nodeCount; x++) { - DatanodeDetails dd = getDatanodeDetails(); + DatanodeDetails dd = TestUtils.randomDatanodeDetails(); populateNodeMetric(dd, x); } } @@ -293,6 +295,17 @@ public void addDatanodeCommand(UUID dnId, SCMCommand command) { } } + /** + * Empty implementation for processNodeReport. + * + * @param dnUuid + * @param nodeReport + */ + @Override + public void processNodeReport(UUID dnUuid, NodeReportProto nodeReport) { + // do nothing + } + // Returns the number of commands that is queued to this node manager. public int getCommandCount(DatanodeDetails dd) { List list = commandMap.get(dd.getUuid()); @@ -399,6 +412,13 @@ public void delContainer(DatanodeDetails datanodeDetails, long size) { } } + @Override + public void onMessage(CommandForDatanode commandForDatanode, + EventPublisher publisher) { + addDatanodeCommand(commandForDatanode.getDatanodeId(), + commandForDatanode.getCommand()); + } + /** * A class to declare some values for the nodes so that our tests * won't fail. diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java index 721dbf6b582..38050c9d037 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestCloseContainerEventHandler.java @@ -17,11 +17,14 @@ package org.apache.hadoop.hdds.scm.container; +import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers + .ContainerWithPipeline; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.container.common.SCMTestUtils; @@ -33,12 +36,12 @@ import java.io.File; import java.io.IOException; -import java.util.Random; -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.CREATE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.CREATED; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_GB; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.CLOSE_CONTAINER; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.DATANODE_COMMAND; /** * Tests the closeContainerEventHandler class. @@ -55,18 +58,19 @@ @BeforeClass public static void setUp() throws Exception { configuration = SCMTestUtils.getConf(); - size = configuration - .getLong(OZONE_SCM_CONTAINER_SIZE_GB, OZONE_SCM_CONTAINER_SIZE_DEFAULT) - * 1024 * 1024 * 1024; + size = (long)configuration.getStorageSize(OZONE_SCM_CONTAINER_SIZE, + OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES); testDir = GenericTestUtils .getTestDir(TestCloseContainerEventHandler.class.getSimpleName()); configuration .set(OzoneConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath()); nodeManager = new MockNodeManager(true, 10); - mapping = new ContainerMapping(configuration, nodeManager, 128); + mapping = new ContainerMapping(configuration, nodeManager, 128, + new EventQueue()); eventQueue = new EventQueue(); - eventQueue.addHandler(CloseContainerEventHandler.CLOSE_CONTAINER_EVENT, + eventQueue.addHandler(CLOSE_CONTAINER, new CloseContainerEventHandler(mapping)); + eventQueue.addHandler(DATANODE_COMMAND, nodeManager); } @AfterClass @@ -81,8 +85,8 @@ public static void tearDown() throws Exception { public void testIfCloseContainerEventHadnlerInvoked() { GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer .captureLogs(CloseContainerEventHandler.LOG); - eventQueue.fireEvent(CloseContainerEventHandler.CLOSE_CONTAINER_EVENT, - new ContainerID(Math.abs(new Random().nextLong()))); + eventQueue.fireEvent(CLOSE_CONTAINER, + new ContainerID(Math.abs(RandomUtils.nextInt()))); eventQueue.processAll(1000); Assert.assertTrue(logCapturer.getOutput() .contains("Close container Event triggered for container")); @@ -90,10 +94,10 @@ public void testIfCloseContainerEventHadnlerInvoked() { @Test public void testCloseContainerEventWithInvalidContainer() { - long id = Math.abs(new Random().nextLong()); + long id = Math.abs(RandomUtils.nextInt()); GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer .captureLogs(CloseContainerEventHandler.LOG); - eventQueue.fireEvent(CloseContainerEventHandler.CLOSE_CONTAINER_EVENT, + eventQueue.fireEvent(CLOSE_CONTAINER, new ContainerID(id)); eventQueue.processAll(1000); Assert.assertTrue(logCapturer.getOutput() @@ -112,24 +116,20 @@ public void testCloseContainerEventWithValidContainers() throws IOException { containerWithPipeline.getContainerInfo().getContainerID()); DatanodeDetails datanode = containerWithPipeline.getPipeline().getLeader(); int closeCount = nodeManager.getCommandCount(datanode); - eventQueue.fireEvent(CloseContainerEventHandler.CLOSE_CONTAINER_EVENT, id); + eventQueue.fireEvent(CLOSE_CONTAINER, id); eventQueue.processAll(1000); // At this point of time, the allocated container is not in open // state, so firing close container event should not queue CLOSE // command in the Datanode Assert.assertEquals(0, nodeManager.getCommandCount(datanode)); - // Make sure the information is logged - Assert.assertTrue(logCapturer.getOutput().contains( - "container with id : " + id.getId() - + " is in ALLOCATED state and need not be closed")); //Execute these state transitions so that we can close the container. - mapping.updateContainerState(id.getId(), CREATE); mapping.updateContainerState(id.getId(), CREATED); - eventQueue.fireEvent(CloseContainerEventHandler.CLOSE_CONTAINER_EVENT, + eventQueue.fireEvent(CLOSE_CONTAINER, new ContainerID( containerWithPipeline.getContainerInfo().getContainerID())); eventQueue.processAll(1000); - Assert.assertEquals(closeCount + 1, nodeManager.getCommandCount(datanode)); + Assert.assertEquals(closeCount + 1, + nodeManager.getCommandCount(datanode)); Assert.assertEquals(HddsProtos.LifeCycleState.CLOSING, mapping.getStateManager().getContainer(id).getState()); } @@ -145,7 +145,7 @@ public void testCloseContainerEventWithRatis() throws IOException { ContainerID id = new ContainerID( containerWithPipeline.getContainerInfo().getContainerID()); int[] closeCount = new int[3]; - eventQueue.fireEvent(CloseContainerEventHandler.CLOSE_CONTAINER_EVENT, id); + eventQueue.fireEvent(CLOSE_CONTAINER, id); eventQueue.processAll(1000); int i = 0; for (DatanodeDetails details : containerWithPipeline.getPipeline() @@ -159,14 +159,9 @@ public void testCloseContainerEventWithRatis() throws IOException { Assert.assertEquals(closeCount[i], nodeManager.getCommandCount(details)); i++; } - // Make sure the information is logged - Assert.assertTrue(logCapturer.getOutput().contains( - "container with id : " + id.getId() - + " is in ALLOCATED state and need not be closed")); //Execute these state transitions so that we can close the container. - mapping.updateContainerState(id.getId(), CREATE); mapping.updateContainerState(id.getId(), CREATED); - eventQueue.fireEvent(CloseContainerEventHandler.CLOSE_CONTAINER_EVENT, id); + eventQueue.fireEvent(CLOSE_CONTAINER, id); eventQueue.processAll(1000); i = 0; // Make sure close is queued for each datanode on the pipeline diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerActionsHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerActionsHandler.java new file mode 100644 index 00000000000..0997e1f5bc9 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerActionsHandler.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container; + +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerActionsProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerAction; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.ContainerActionsFromDatanode; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.junit.Test; +import org.mockito.Mockito; + +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +/** + * Tests ContainerActionsHandler. + */ +public class TestContainerActionsHandler { + + @Test + public void testCloseContainerAction() { + EventQueue queue = new EventQueue(); + ContainerActionsHandler actionsHandler = new ContainerActionsHandler(); + CloseContainerEventHandler closeContainerEventHandler = Mockito.mock( + CloseContainerEventHandler.class); + queue.addHandler(SCMEvents.CLOSE_CONTAINER, closeContainerEventHandler); + queue.addHandler(SCMEvents.CONTAINER_ACTIONS, actionsHandler); + + ContainerAction action = ContainerAction.newBuilder() + .setContainerID(1L) + .setAction(ContainerAction.Action.CLOSE) + .setReason(ContainerAction.Reason.CONTAINER_FULL) + .build(); + + ContainerActionsProto cap = ContainerActionsProto.newBuilder() + .addContainerActions(action) + .build(); + + ContainerActionsFromDatanode containerActions = + new ContainerActionsFromDatanode( + TestUtils.randomDatanodeDetails(), cap); + + queue.fireEvent(SCMEvents.CONTAINER_ACTIONS, containerActions); + + verify(closeContainerEventHandler, times(1)) + .onMessage(ContainerID.valueof(1L), queue); + + } + +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerMapping.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerMapping.java index 42ab126d4d7..224f6dddcac 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerMapping.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerMapping.java @@ -18,6 +18,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.scm.XceiverClientManager; @@ -30,9 +32,12 @@ .StorageContainerDatanodeProtocolProtos; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; @@ -84,7 +89,8 @@ public static void setUp() throws Exception { throw new IOException("Unable to create test directory path"); } nodeManager = new MockNodeManager(true, 10); - mapping = new ContainerMapping(conf, nodeManager, 128); + mapping = new ContainerMapping(conf, nodeManager, 128, + new EventQueue()); xceiverClientManager = new XceiverClientManager(conf); random = new Random(); } @@ -148,17 +154,55 @@ public void testGetContainer() throws IOException { } @Test - public void testgetNoneExistentContainer() throws IOException { - thrown.expectMessage("Specified key does not exist."); - mapping.getContainer(random.nextLong()); + public void testGetContainerWithPipeline() throws Exception { + ContainerWithPipeline containerWithPipeline = mapping.allocateContainer( + xceiverClientManager.getType(), + xceiverClientManager.getFactor(), + containerOwner); + ContainerInfo contInfo = containerWithPipeline.getContainerInfo(); + // Add dummy replicas for container. + DatanodeDetails dn1 = DatanodeDetails.newBuilder() + .setHostName("host1") + .setIpAddress("1.1.1.1") + .setUuid(UUID.randomUUID().toString()).build(); + DatanodeDetails dn2 = DatanodeDetails.newBuilder() + .setHostName("host2") + .setIpAddress("2.2.2.2") + .setUuid(UUID.randomUUID().toString()).build(); + mapping + .updateContainerState(contInfo.getContainerID(), LifeCycleEvent.CREATE); + mapping.updateContainerState(contInfo.getContainerID(), + LifeCycleEvent.CREATED); + mapping.updateContainerState(contInfo.getContainerID(), + LifeCycleEvent.FINALIZE); + mapping + .updateContainerState(contInfo.getContainerID(), LifeCycleEvent.CLOSE); + ContainerInfo finalContInfo = contInfo; + LambdaTestUtils.intercept(SCMException.class, "No entry exist for " + + "containerId:", () -> mapping.getContainerWithPipeline( + finalContInfo.getContainerID())); + + mapping.getStateManager().getContainerStateMap() + .addContainerReplica(contInfo.containerID(), dn1, dn2); + + contInfo = mapping.getContainer(contInfo.getContainerID()); + Assert.assertEquals(contInfo.getState(), LifeCycleState.CLOSED); + Pipeline pipeline = containerWithPipeline.getPipeline(); + mapping.getPipelineSelector().finalizePipeline(pipeline); + + ContainerWithPipeline containerWithPipeline2 = mapping + .getContainerWithPipeline(contInfo.getContainerID()); + pipeline = containerWithPipeline2.getPipeline(); + Assert.assertNotEquals(containerWithPipeline, containerWithPipeline2); + Assert.assertNotNull("Pipeline should not be null", pipeline); + Assert.assertTrue(pipeline.getDatanodeHosts().contains(dn1.getHostName())); + Assert.assertTrue(pipeline.getDatanodeHosts().contains(dn2.getHostName())); } @Test - public void testChillModeAllocateContainerFails() throws IOException { - nodeManager.setChillmode(true); - thrown.expectMessage("Unable to create container while in chill mode"); - mapping.allocateContainer(xceiverClientManager.getType(), - xceiverClientManager.getFactor(), containerOwner); + public void testgetNoneExistentContainer() throws IOException { + thrown.expectMessage("Specified key does not exist."); + mapping.getContainer(random.nextLong()); } @Test @@ -190,9 +234,9 @@ public void testContainerCreationLeaseTimeout() throws IOException, } @Test - public void testFullContainerReport() throws IOException { + public void testFullContainerReport() throws Exception { ContainerInfo info = createContainer(); - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails(); + DatanodeDetails datanodeDetails = TestUtils.randomDatanodeDetails(); List reports = new ArrayList<>(); StorageContainerDatanodeProtocolProtos.ContainerInfo.Builder ciBuilder = @@ -214,56 +258,26 @@ public void testFullContainerReport() throws IOException { .newBuilder(); crBuilder.addAllReports(reports); - mapping.processContainerReports(datanodeDetails, crBuilder.build()); + mapping.processContainerReports(datanodeDetails, crBuilder.build(), false); ContainerInfo updatedContainer = mapping.getContainer(info.getContainerID()); Assert.assertEquals(100000000L, updatedContainer.getNumberOfKeys()); Assert.assertEquals(2000000000L, updatedContainer.getUsedBytes()); - } - - @Test - public void testContainerCloseWithContainerReport() throws IOException { - ContainerInfo info = createContainer(); - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails(); - List reports = - new ArrayList<>(); - - StorageContainerDatanodeProtocolProtos.ContainerInfo.Builder ciBuilder = - StorageContainerDatanodeProtocolProtos.ContainerInfo.newBuilder(); - ciBuilder.setFinalhash("7c45eb4d7ed5e0d2e89aaab7759de02e") - .setSize(5368709120L) - .setUsed(5368705120L) - .setKeyCount(500000000L) - .setReadCount(500000000L) - .setWriteCount(500000000L) - .setReadBytes(5368705120L) - .setWriteBytes(5368705120L) - .setContainerID(info.getContainerID()) - .setDeleteTransactionId(0); - - reports.add(ciBuilder.build()); - ContainerReportsProto.Builder crBuilder = - ContainerReportsProto.newBuilder(); - crBuilder.addAllReports(reports); - - mapping.processContainerReports(datanodeDetails, crBuilder.build()); + for (StorageContainerDatanodeProtocolProtos.ContainerInfo c : reports) { + LambdaTestUtils.intercept(SCMException.class, "No entry " + + "exist for containerId:", () -> mapping.getStateManager() + .getContainerReplicas(ContainerID.valueof(c.getContainerID()))); + } - ContainerInfo updatedContainer = - mapping.getContainer(info.getContainerID()); - Assert.assertEquals(500000000L, - updatedContainer.getNumberOfKeys()); - Assert.assertEquals(5368705120L, updatedContainer.getUsedBytes()); - NavigableSet pendingCloseContainers = mapping.getStateManager() - .getMatchingContainerIDs( - containerOwner, - xceiverClientManager.getType(), - xceiverClientManager.getFactor(), - HddsProtos.LifeCycleState.CLOSING); - Assert.assertTrue( - pendingCloseContainers.contains(updatedContainer.containerID())); + mapping.processContainerReports(TestUtils.randomDatanodeDetails(), + crBuilder.build(), true); + for (StorageContainerDatanodeProtocolProtos.ContainerInfo c : reports) { + Assert.assertTrue(mapping.getStateManager().getContainerReplicas( + ContainerID.valueof(c.getContainerID())).size() > 0); + } } @Test @@ -308,4 +322,13 @@ private ContainerInfo createContainer() return containerInfo; } + @Test + public void testFlushAllContainers() throws IOException { + ContainerInfo info = createContainer(); + List containers = mapping.getStateManager() + .getAllContainers(); + Assert.assertTrue(containers.size() > 0); + mapping.flushContainerInfo(); + } + } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java new file mode 100644 index 00000000000..a59179bdffa --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java @@ -0,0 +1,239 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.container; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo + .Builder; +import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; +import org.apache.hadoop.hdds.scm.container.replication + .ReplicationActivityStatus; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationRequest; +import org.apache.hadoop.hdds.scm.node.states.Node2ContainerMap; +import org.apache.hadoop.hdds.scm.pipelines.PipelineSelector; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher + .ContainerReportFromDatanode; +import org.apache.hadoop.hdds.server.events.Event; +import org.apache.hadoop.hdds.server.events.EventPublisher; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import static org.mockito.Matchers.anyLong; +import org.mockito.Mockito; +import static org.mockito.Mockito.when; +import org.mockito.stubbing.Answer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test the behaviour of the ContainerReportHandler. + */ +public class TestContainerReportHandler implements EventPublisher { + + private List publishedEvents = new ArrayList<>(); + + private static final Logger LOG = + LoggerFactory.getLogger(TestContainerReportHandler.class); + + @Before + public void resetEventCollector() { + publishedEvents.clear(); + } + + @Test + public void test() throws IOException { + //GIVEN + OzoneConfiguration conf = new OzoneConfiguration(); + Node2ContainerMap node2ContainerMap = new Node2ContainerMap(); + Mapping mapping = Mockito.mock(Mapping.class); + PipelineSelector selector = Mockito.mock(PipelineSelector.class); + + when(mapping.getContainer(anyLong())) + .thenAnswer( + (Answer) invocation -> + new Builder() + .setReplicationFactor(ReplicationFactor.THREE) + .setContainerID((Long) invocation.getArguments()[0]) + .setState(LifeCycleState.CLOSED) + .build() + ); + + ContainerStateManager containerStateManager = + new ContainerStateManager(conf, mapping, selector); + + when(mapping.getStateManager()).thenReturn(containerStateManager); + + ReplicationActivityStatus replicationActivityStatus = + new ReplicationActivityStatus(); + + ContainerReportHandler reportHandler = + new ContainerReportHandler(mapping, node2ContainerMap, + replicationActivityStatus); + + DatanodeDetails dn1 = TestUtils.randomDatanodeDetails(); + DatanodeDetails dn2 = TestUtils.randomDatanodeDetails(); + DatanodeDetails dn3 = TestUtils.randomDatanodeDetails(); + DatanodeDetails dn4 = TestUtils.randomDatanodeDetails(); + node2ContainerMap.insertNewDatanode(dn1.getUuid(), new HashSet<>()); + node2ContainerMap.insertNewDatanode(dn2.getUuid(), new HashSet<>()); + node2ContainerMap.insertNewDatanode(dn3.getUuid(), new HashSet<>()); + node2ContainerMap.insertNewDatanode(dn4.getUuid(), new HashSet<>()); + PipelineSelector pipelineSelector = Mockito.mock(PipelineSelector.class); + + Pipeline pipeline = new Pipeline("leader", LifeCycleState.CLOSED, + ReplicationType.STAND_ALONE, ReplicationFactor.THREE, + PipelineID.randomId()); + + when(pipelineSelector.getReplicationPipeline(ReplicationType.STAND_ALONE, + ReplicationFactor.THREE)).thenReturn(pipeline); + + ContainerInfo cont1 = containerStateManager + .allocateContainer(pipelineSelector, ReplicationType.STAND_ALONE, + ReplicationFactor.THREE, "root").getContainerInfo(); + ContainerInfo cont2 = containerStateManager + .allocateContainer(pipelineSelector, ReplicationType.STAND_ALONE, + ReplicationFactor.THREE, "root").getContainerInfo(); + // Open Container + ContainerInfo cont3 = containerStateManager + .allocateContainer(pipelineSelector, ReplicationType.STAND_ALONE, + ReplicationFactor.THREE, "root").getContainerInfo(); + + long c1 = cont1.getContainerID(); + long c2 = cont2.getContainerID(); + long c3 = cont3.getContainerID(); + + // Close remaining containers + TestUtils.closeContainer(containerStateManager, cont1); + TestUtils.closeContainer(containerStateManager, cont2); + + //when + + //initial reports before replication is enabled. 2 containers w 3 replicas. + reportHandler.onMessage( + new ContainerReportFromDatanode(dn1, + createContainerReport(new long[] {c1, c2, c3})), this); + + reportHandler.onMessage( + new ContainerReportFromDatanode(dn2, + createContainerReport(new long[] {c1, c2, c3})), this); + + reportHandler.onMessage( + new ContainerReportFromDatanode(dn3, + createContainerReport(new long[] {c1, c2})), this); + + reportHandler.onMessage( + new ContainerReportFromDatanode(dn4, + createContainerReport(new long[] {})), this); + + Assert.assertEquals(0, publishedEvents.size()); + + replicationActivityStatus.enableReplication(); + + //no problem here + reportHandler.onMessage( + new ContainerReportFromDatanode(dn1, + createContainerReport(new long[] {c1, c2})), this); + + Assert.assertEquals(0, publishedEvents.size()); + + //container is missing from d2 + reportHandler.onMessage( + new ContainerReportFromDatanode(dn2, + createContainerReport(new long[] {c1})), this); + + Assert.assertEquals(1, publishedEvents.size()); + ReplicationRequest replicationRequest = + (ReplicationRequest) publishedEvents.get(0); + + Assert.assertEquals(c2, replicationRequest.getContainerId()); + Assert.assertEquals(3, replicationRequest.getExpecReplicationCount()); + Assert.assertEquals(2, replicationRequest.getReplicationCount()); + + //container was replicated to dn4 + reportHandler.onMessage( + new ContainerReportFromDatanode(dn4, + createContainerReport(new long[] {c2})), this); + + //no more event, everything is perfect + Assert.assertEquals(1, publishedEvents.size()); + + //c2 was found at dn2 (it was missing before, magic) + reportHandler.onMessage( + new ContainerReportFromDatanode(dn2, + createContainerReport(new long[] {c1, c2})), this); + + //c2 is over replicated (dn1,dn2,dn3,dn4) + Assert.assertEquals(2, publishedEvents.size()); + + replicationRequest = + (ReplicationRequest) publishedEvents.get(1); + + Assert.assertEquals(c2, replicationRequest.getContainerId()); + Assert.assertEquals(3, replicationRequest.getExpecReplicationCount()); + Assert.assertEquals(4, replicationRequest.getReplicationCount()); + + } + + private ContainerReportsProto createContainerReport(long[] containerIds) { + + ContainerReportsProto.Builder crBuilder = + ContainerReportsProto.newBuilder(); + + for (long containerId : containerIds) { + org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerInfo.Builder + ciBuilder = org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.ContainerInfo.newBuilder(); + ciBuilder.setFinalhash("e16cc9d6024365750ed8dbd194ea46d2") + .setSize(5368709120L) + .setUsed(2000000000L) + .setKeyCount(100000000L) + .setReadCount(100000000L) + .setWriteCount(100000000L) + .setReadBytes(2000000000L) + .setWriteBytes(2000000000L) + .setContainerID(containerId) + .setDeleteTransactionId(0); + + crBuilder.addReports(ciBuilder.build()); + } + + return crBuilder.build(); + } + + @Override + public > void fireEvent( + EVENT_TYPE event, PAYLOAD payload) { + LOG.info("Event is published: {}", payload); + publishedEvents.add(payload); + } +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java new file mode 100644 index 00000000000..b857740a5fd --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.hdds.scm.container; + +import java.io.IOException; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationRequest; + +import org.apache.hadoop.hdds.scm.pipelines.PipelineSelector; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +/** + * Testing ContainerStatemanager. + */ +public class TestContainerStateManager { + + private ContainerStateManager containerStateManager; + + @Before + public void init() throws IOException { + OzoneConfiguration conf = new OzoneConfiguration(); + Mapping mapping = Mockito.mock(Mapping.class); + PipelineSelector selector = Mockito.mock(PipelineSelector.class); + containerStateManager = new ContainerStateManager(conf, mapping, selector); + + } + + @Test + public void checkReplicationStateOK() throws IOException { + //GIVEN + ContainerInfo c1 = TestUtils.allocateContainer(containerStateManager); + + DatanodeDetails d1 = TestUtils.randomDatanodeDetails(); + DatanodeDetails d2 = TestUtils.randomDatanodeDetails(); + DatanodeDetails d3 = TestUtils.randomDatanodeDetails(); + + addReplica(c1, d1); + addReplica(c1, d2); + addReplica(c1, d3); + + //WHEN + ReplicationRequest replicationRequest = containerStateManager + .checkReplicationState(new ContainerID(c1.getContainerID())); + + //THEN + Assert.assertNull(replicationRequest); + } + + @Test + public void checkReplicationStateMissingReplica() throws IOException { + //GIVEN + + ContainerInfo c1 = TestUtils.allocateContainer(containerStateManager); + + DatanodeDetails d1 = TestUtils.randomDatanodeDetails(); + DatanodeDetails d2 = TestUtils.randomDatanodeDetails(); + + addReplica(c1, d1); + addReplica(c1, d2); + + //WHEN + ReplicationRequest replicationRequest = containerStateManager + .checkReplicationState(new ContainerID(c1.getContainerID())); + + Assert + .assertEquals(c1.getContainerID(), replicationRequest.getContainerId()); + Assert.assertEquals(2, replicationRequest.getReplicationCount()); + Assert.assertEquals(3, replicationRequest.getExpecReplicationCount()); + } + + private void addReplica(ContainerInfo c1, DatanodeDetails d1) { + containerStateManager + .addContainerReplica(new ContainerID(c1.getContainerID()), d1); + } + +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/closer/TestContainerCloser.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/closer/TestContainerCloser.java deleted file mode 100644 index 74238a77084..00000000000 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/closer/TestContainerCloser.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package org.apache.hadoop.hdds.scm.container.closer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hdds.scm.TestUtils; -import org.apache.hadoop.hdds.scm.container.ContainerMapping; -import org.apache.hadoop.hdds.scm.container.MockNodeManager; -import org.apache.hadoop.hdds.scm.container.TestContainerMapping; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; -import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.container.common.SCMTestUtils; -import org.apache.hadoop.test.GenericTestUtils; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.util.concurrent.TimeUnit; - -import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_CONTAINER_SIZE_DEFAULT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_CONTAINER_SIZE_GB; -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent - .CREATE; -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent - .CREATED; -import static org.apache.hadoop.ozone.OzoneConfigKeys - .OZONE_CONTAINER_REPORT_INTERVAL; - -/** - * Test class for Closing Container. - */ -public class TestContainerCloser { - - private static final long GIGABYTE = 1024L * 1024L * 1024L; - private static Configuration configuration; - private static MockNodeManager nodeManager; - private static ContainerMapping mapping; - private static long size; - private static File testDir; - - @BeforeClass - public static void setUp() throws Exception { - configuration = SCMTestUtils.getConf(); - size = configuration.getLong(OZONE_SCM_CONTAINER_SIZE_GB, - OZONE_SCM_CONTAINER_SIZE_DEFAULT) * 1024 * 1024 * 1024; - configuration.setTimeDuration(OZONE_CONTAINER_REPORT_INTERVAL, - 1, TimeUnit.SECONDS); - testDir = GenericTestUtils - .getTestDir(TestContainerMapping.class.getSimpleName()); - configuration.set(OzoneConfigKeys.OZONE_METADATA_DIRS, - testDir.getAbsolutePath()); - nodeManager = new MockNodeManager(true, 10); - mapping = new ContainerMapping(configuration, nodeManager, 128); - } - - @AfterClass - public static void tearDown() throws Exception { - if (mapping != null) { - mapping.close(); - } - FileUtil.fullyDelete(testDir); - } - - @Test - public void testClose() throws IOException { - ContainerWithPipeline containerWithPipeline = mapping.allocateContainer( - HddsProtos.ReplicationType.STAND_ALONE, - HddsProtos.ReplicationFactor.ONE, "ozone"); - ContainerInfo info = containerWithPipeline.getContainerInfo(); - - //Execute these state transitions so that we can close the container. - mapping.updateContainerState(info.getContainerID(), CREATE); - mapping.updateContainerState(info.getContainerID(), CREATED); - long currentCount = mapping.getCloser().getCloseCount(); - long runCount = mapping.getCloser().getThreadRunCount(); - - DatanodeDetails datanode = containerWithPipeline.getPipeline().getLeader(); - // Send a container report with used set to 1 GB. This should not close. - sendContainerReport(info, 1 * GIGABYTE); - - // with only one container the cleaner thread should not run. - Assert.assertEquals(0, mapping.getCloser().getThreadRunCount()); - - // With only 1 GB, the container should not be queued for closing. - Assert.assertEquals(0, mapping.getCloser().getCloseCount()); - - // Assert that the Close command was not queued for this Datanode. - Assert.assertEquals(0, nodeManager.getCommandCount(datanode)); - - long newUsed = (long) (size * 0.91f); - sendContainerReport(info, newUsed); - - // with only one container the cleaner thread should not run. - Assert.assertEquals(runCount, mapping.getCloser().getThreadRunCount()); - - // and close count will be one. - Assert.assertEquals(1, - mapping.getCloser().getCloseCount() - currentCount); - - // Assert that the Close command was Queued for this Datanode. - Assert.assertEquals(1, nodeManager.getCommandCount(datanode)); - } - - @Test - public void testRepeatedClose() throws IOException, - InterruptedException { - // This test asserts that if we queue more than one report then the - // second report is discarded by the system if it lands in the 3 * report - // frequency window. - - configuration.setTimeDuration(OZONE_CONTAINER_REPORT_INTERVAL, 1, - TimeUnit.SECONDS); - - ContainerWithPipeline containerWithPipeline = mapping.allocateContainer( - HddsProtos.ReplicationType.STAND_ALONE, - HddsProtos.ReplicationFactor.ONE, "ozone"); - ContainerInfo info = containerWithPipeline.getContainerInfo(); - - //Execute these state transitions so that we can close the container. - mapping.updateContainerState(info.getContainerID(), CREATE); - - long currentCount = mapping.getCloser().getCloseCount(); - long runCount = mapping.getCloser().getThreadRunCount(); - - DatanodeDetails datanodeDetails = containerWithPipeline.getPipeline() - .getLeader(); - - // Send this command twice and assert we have only one command in queue. - sendContainerReport(info, 5 * GIGABYTE); - sendContainerReport(info, 5 * GIGABYTE); - - // Assert that the Close command was Queued for this Datanode. - Assert.assertEquals(1, - nodeManager.getCommandCount(datanodeDetails)); - // And close count will be one. - Assert.assertEquals(1, - mapping.getCloser().getCloseCount() - currentCount); - Thread.sleep(TimeUnit.SECONDS.toMillis(4)); - - //send another close and the system will queue this to the command queue. - sendContainerReport(info, 5 * GIGABYTE); - Assert.assertEquals(2, - nodeManager.getCommandCount(datanodeDetails)); - // but the close count will still be one, since from the point of view of - // closer we are closing only one container even if we have send multiple - // close commands to the datanode. - Assert.assertEquals(1, mapping.getCloser().getCloseCount() - - currentCount); - } - - @Test - public void testCleanupThreadRuns() throws IOException, - InterruptedException { - // This test asserts that clean up thread runs once we have closed a - // number above cleanup water mark. - - long runCount = mapping.getCloser().getThreadRunCount(); - - for (int x = 0; x < ContainerCloser.getCleanupWaterMark() + 10; x++) { - ContainerWithPipeline containerWithPipeline = mapping.allocateContainer( - HddsProtos.ReplicationType.STAND_ALONE, - HddsProtos.ReplicationFactor.ONE, "ozone"); - ContainerInfo info = containerWithPipeline.getContainerInfo(); - mapping.updateContainerState(info.getContainerID(), CREATE); - mapping.updateContainerState(info.getContainerID(), CREATED); - sendContainerReport(info, 5 * GIGABYTE); - } - - Thread.sleep(TimeUnit.SECONDS.toMillis(1)); - - // Assert that cleanup thread ran at least once. - Assert.assertTrue(mapping.getCloser().getThreadRunCount() - runCount > 0); - } - - private void sendContainerReport(ContainerInfo info, long used) throws - IOException { - ContainerReportsProto.Builder - reports = ContainerReportsProto.newBuilder(); - - StorageContainerDatanodeProtocolProtos.ContainerInfo.Builder ciBuilder = - StorageContainerDatanodeProtocolProtos.ContainerInfo.newBuilder(); - ciBuilder.setContainerID(info.getContainerID()) - .setFinalhash("e16cc9d6024365750ed8dbd194ea46d2") - .setSize(size) - .setUsed(used) - .setKeyCount(100000000L) - .setReadCount(100000000L) - .setWriteCount(100000000L) - .setReadBytes(2000000000L) - .setWriteBytes(2000000000L) - .setDeleteTransactionId(0); - reports.addReports(ciBuilder); - mapping.processContainerReports(TestUtils.getDatanodeDetails(), - reports.build()); - } -} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementCapacity.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementCapacity.java new file mode 100644 index 00000000000..764daff7758 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementCapacity.java @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.container.placement.algorithms; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.NodeManager; + +import org.junit.Assert; +import org.junit.Test; +import static org.mockito.Matchers.anyObject; +import org.mockito.Mockito; +import static org.mockito.Mockito.when; + +public class TestSCMContainerPlacementCapacity { + @Test + public void chooseDatanodes() throws SCMException { + //given + Configuration conf = new OzoneConfiguration(); + + List datanodes = new ArrayList<>(); + for (int i = 0; i < 7; i++) { + datanodes.add(TestUtils.randomDatanodeDetails()); + } + + NodeManager mockNodeManager = Mockito.mock(NodeManager.class); + when(mockNodeManager.getNodes(NodeState.HEALTHY)) + .thenReturn(new ArrayList<>(datanodes)); + + when(mockNodeManager.getNodeStat(anyObject())) + .thenReturn(new SCMNodeMetric(100L, 0L, 100L)); + when(mockNodeManager.getNodeStat(datanodes.get(2))) + .thenReturn(new SCMNodeMetric(100L, 90L, 10L)); + when(mockNodeManager.getNodeStat(datanodes.get(3))) + .thenReturn(new SCMNodeMetric(100L, 80L, 20L)); + when(mockNodeManager.getNodeStat(datanodes.get(4))) + .thenReturn(new SCMNodeMetric(100L, 70L, 30L)); + + SCMContainerPlacementCapacity scmContainerPlacementRandom = + new SCMContainerPlacementCapacity(mockNodeManager, conf); + + List existingNodes = new ArrayList<>(); + existingNodes.add(datanodes.get(0)); + existingNodes.add(datanodes.get(1)); + + Map selectedCount = new HashMap<>(); + for (DatanodeDetails datanode : datanodes) { + selectedCount.put(datanode, 0); + } + + for (int i = 0; i < 1000; i++) { + + //when + List datanodeDetails = + scmContainerPlacementRandom.chooseDatanodes(existingNodes, 1, 15); + + //then + Assert.assertEquals(1, datanodeDetails.size()); + DatanodeDetails datanode0Details = datanodeDetails.get(0); + + Assert.assertNotEquals( + "Datanode 0 should not been selected: excluded by parameter", + datanodes.get(0), datanode0Details); + Assert.assertNotEquals( + "Datanode 1 should not been selected: excluded by parameter", + datanodes.get(1), datanode0Details); + Assert.assertNotEquals( + "Datanode 2 should not been selected: not enough space there", + datanodes.get(2), datanode0Details); + + selectedCount + .put(datanode0Details, selectedCount.get(datanode0Details) + 1); + + } + + //datanode 4 has less space. Should be selected less times. + Assert.assertTrue(selectedCount.get(datanodes.get(3)) > selectedCount + .get(datanodes.get(6))); + Assert.assertTrue(selectedCount.get(datanodes.get(4)) > selectedCount + .get(datanodes.get(6))); + } +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRandom.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRandom.java new file mode 100644 index 00000000000..b652b6b76b5 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRandom.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.container.placement.algorithms; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.NodeManager; + +import org.junit.Assert; +import org.junit.Test; +import static org.mockito.Matchers.anyObject; +import org.mockito.Mockito; +import static org.mockito.Mockito.when; + +public class TestSCMContainerPlacementRandom { + + @Test + public void chooseDatanodes() throws SCMException { + //given + Configuration conf = new OzoneConfiguration(); + + List datanodes = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + datanodes.add(TestUtils.randomDatanodeDetails()); + } + + NodeManager mockNodeManager = Mockito.mock(NodeManager.class); + when(mockNodeManager.getNodes(NodeState.HEALTHY)) + .thenReturn(new ArrayList<>(datanodes)); + + when(mockNodeManager.getNodeStat(anyObject())) + .thenReturn(new SCMNodeMetric(100L, 0L, 100L)); + when(mockNodeManager.getNodeStat(datanodes.get(2))) + .thenReturn(new SCMNodeMetric(100L, 90L, 10L)); + + SCMContainerPlacementRandom scmContainerPlacementRandom = + new SCMContainerPlacementRandom(mockNodeManager, conf); + + List existingNodes = new ArrayList<>(); + existingNodes.add(datanodes.get(0)); + existingNodes.add(datanodes.get(1)); + + for (int i = 0; i < 100; i++) { + //when + List datanodeDetails = + scmContainerPlacementRandom.chooseDatanodes(existingNodes, 1, 15); + + //then + Assert.assertEquals(1, datanodeDetails.size()); + DatanodeDetails datanode0Details = datanodeDetails.get(0); + + Assert.assertNotEquals( + "Datanode 0 should not been selected: excluded by parameter", + datanodes.get(0), datanode0Details); + Assert.assertNotEquals( + "Datanode 1 should not been selected: excluded by parameter", + datanodes.get(1), datanode0Details); + Assert.assertNotEquals( + "Datanode 2 should not been selected: not enough space there", + datanodes.get(2), datanode0Details); + + } + } +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationActivityStatus.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationActivityStatus.java new file mode 100644 index 00000000000..a4615fc1a7b --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationActivityStatus.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import static org.junit.Assert.*; + +import java.util.concurrent.TimeoutException; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Tests for ReplicationActivityStatus. + */ +public class TestReplicationActivityStatus { + + private static EventQueue eventQueue; + private static ReplicationActivityStatus replicationActivityStatus; + + @BeforeClass + public static void setup() { + eventQueue = new EventQueue(); + replicationActivityStatus = new ReplicationActivityStatus(); + eventQueue.addHandler(SCMEvents.START_REPLICATION, + replicationActivityStatus.getReplicationStatusListener()); + eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, + replicationActivityStatus.getChillModeStatusListener()); + } + + @Test + public void testReplicationStatusForChillMode() + throws TimeoutException, InterruptedException { + assertFalse(replicationActivityStatus.isReplicationEnabled()); + // In chill mode replication process should be stopped. + eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, true); + assertFalse(replicationActivityStatus.isReplicationEnabled()); + + // Replication should be enabled when chill mode if off. + eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, false); + GenericTestUtils.waitFor(() -> { + return replicationActivityStatus.isReplicationEnabled(); + }, 10, 1000*5); + assertTrue(replicationActivityStatus.isReplicationEnabled()); + } +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java new file mode 100644 index 00000000000..06beb7c1742 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.container.replication; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ReplicateContainerCommandProto; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerStateManager; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; +import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager.ReplicationRequestToRepeat; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.ozone.lease.LeaseManager; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; + +import com.google.common.base.Preconditions; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.TRACK_REPLICATE_COMMAND; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import static org.mockito.Matchers.anyObject; +import org.mockito.Mockito; +import static org.mockito.Mockito.when; + +/** + * Test behaviour of the TestReplication. + */ +public class TestReplicationManager { + + private EventQueue queue; + + private List trackReplicationEvents; + + private List> copyEvents; + + private ContainerStateManager containerStateManager; + + private ContainerPlacementPolicy containerPlacementPolicy; + private List listOfDatanodeDetails; + private LeaseManager leaseManager; + private ReplicationManager replicationManager; + + @Before + public void initReplicationManager() throws IOException { + + listOfDatanodeDetails = new ArrayList<>(); + listOfDatanodeDetails.add(TestUtils.randomDatanodeDetails()); + listOfDatanodeDetails.add(TestUtils.randomDatanodeDetails()); + listOfDatanodeDetails.add(TestUtils.randomDatanodeDetails()); + listOfDatanodeDetails.add(TestUtils.randomDatanodeDetails()); + listOfDatanodeDetails.add(TestUtils.randomDatanodeDetails()); + + containerPlacementPolicy = + (excludedNodes, nodesRequired, sizeRequired) -> listOfDatanodeDetails + .subList(2, 2 + nodesRequired); + + containerStateManager = Mockito.mock(ContainerStateManager.class); + + ContainerInfo containerInfo = new ContainerInfo.Builder() + .setState(LifeCycleState.CLOSED) + .build(); + + when(containerStateManager.getContainer(anyObject())) + .thenReturn(containerInfo); + + when(containerStateManager.getContainerReplicas(new ContainerID(1L))) + .thenReturn(new HashSet<>(Arrays.asList( + listOfDatanodeDetails.get(0), + listOfDatanodeDetails.get(1) + ))); + + + when(containerStateManager.getContainerReplicas(new ContainerID(3L))) + .thenReturn(new HashSet<>()); + + queue = new EventQueue(); + + trackReplicationEvents = new ArrayList<>(); + queue.addHandler(TRACK_REPLICATE_COMMAND, + (event, publisher) -> trackReplicationEvents.add(event)); + + copyEvents = new ArrayList<>(); + queue.addHandler(SCMEvents.DATANODE_COMMAND, + (event, publisher) -> copyEvents.add(event)); + + leaseManager = new LeaseManager<>("Test", 100000L); + + replicationManager = new ReplicationManager(containerPlacementPolicy, + containerStateManager, queue, leaseManager); + + + + } + + /** + * Container should be replicated but no source replicas. + */ + @Test() + public void testNoExistingReplicas() throws InterruptedException { + try { + leaseManager.start(); + replicationManager.start(); + + //WHEN + queue.fireEvent(SCMEvents.REPLICATE_CONTAINER, + new ReplicationRequest(3L, (short) 2, System.currentTimeMillis(), + (short) 3)); + + Thread.sleep(500L); + queue.processAll(1000L); + + //THEN + Assert.assertEquals(0, trackReplicationEvents.size()); + Assert.assertEquals(0, copyEvents.size()); + + } finally { + if (leaseManager != null) { + leaseManager.shutdown(); + } + } + } + + @Test + public void testEventSending() throws InterruptedException, IOException { + + //GIVEN + try { + leaseManager.start(); + + replicationManager.start(); + + //WHEN + queue.fireEvent(SCMEvents.REPLICATE_CONTAINER, + new ReplicationRequest(1L, (short) 2, System.currentTimeMillis(), + (short) 3)); + + Thread.sleep(500L); + queue.processAll(1000L); + + //THEN + Assert.assertEquals(1, trackReplicationEvents.size()); + Assert.assertEquals(1, copyEvents.size()); + } finally { + if (leaseManager != null) { + leaseManager.shutdown(); + } + } + } + + @Test + public void testCommandWatcher() throws InterruptedException, IOException { + LeaseManager rapidLeaseManager = + new LeaseManager<>("Test", 1000L); + + replicationManager = new ReplicationManager(containerPlacementPolicy, + containerStateManager, queue, rapidLeaseManager); + + try { + rapidLeaseManager.start(); + replicationManager.start(); + + queue.fireEvent(SCMEvents.REPLICATE_CONTAINER, + new ReplicationRequest(1L, (short) 2, System.currentTimeMillis(), + (short) 3)); + + Thread.sleep(500L); + + queue.processAll(1000L); + + Assert.assertEquals(1, trackReplicationEvents.size()); + Assert.assertEquals(1, copyEvents.size()); + + Assert.assertEquals(trackReplicationEvents.get(0).getId(), + copyEvents.get(0).getCommand().getId()); + + //event is timed out + Thread.sleep(1500); + + queue.processAll(1000L); + + //original copy command + retry + Assert.assertEquals(2, trackReplicationEvents.size()); + Assert.assertEquals(2, copyEvents.size()); + + } finally { + if (rapidLeaseManager != null) { + rapidLeaseManager.shutdown(); + } + } + } + + public static Pipeline createPipeline(Iterable ids) + throws IOException { + Objects.requireNonNull(ids, "ids == null"); + final Iterator i = ids.iterator(); + Preconditions.checkArgument(i.hasNext()); + final DatanodeDetails leader = i.next(); + final Pipeline pipeline = + new Pipeline(leader.getUuidString(), LifeCycleState.OPEN, + ReplicationType.STAND_ALONE, ReplicationFactor.ONE, + PipelineID.randomId()); + pipeline.addMember(leader); + while (i.hasNext()) { + pipeline.addMember(i.next()); + } + return pipeline; + } + +} \ No newline at end of file diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationQueue.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationQueue.java similarity index 91% rename from hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationQueue.java rename to hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationQueue.java index 6d74c683eeb..9dd4fe31c50 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationQueue.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationQueue.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.ozone.container.replication; +package org.apache.hadoop.hdds.scm.container.replication; import java.util.Random; import java.util.UUID; @@ -39,7 +39,7 @@ public void setUp() { } @Test - public void testDuplicateAddOp() { + public void testDuplicateAddOp() throws InterruptedException { long contId = random.nextLong(); String nodeId = UUID.randomUUID().toString(); ReplicationRequest obj1, obj2, obj3; @@ -53,12 +53,12 @@ public void testDuplicateAddOp() { replicationQueue.add(obj3); Assert.assertEquals("Should add only 1 msg as second one is duplicate", 1, replicationQueue.size()); - ReplicationRequest temp = replicationQueue.poll(); + ReplicationRequest temp = replicationQueue.take(); Assert.assertEquals(temp, obj3); } @Test - public void testPollOp() { + public void testPollOp() throws InterruptedException { long contId = random.nextLong(); String nodeId = UUID.randomUUID().toString(); ReplicationRequest msg1, msg2, msg3, msg4, msg5; @@ -82,19 +82,19 @@ public void testPollOp() { // Since Priority queue orders messages according to replication count, // message with lowest replication should be first ReplicationRequest temp; - temp = replicationQueue.poll(); + temp = replicationQueue.take(); Assert.assertEquals("Should have 2 objects", 2, replicationQueue.size()); Assert.assertEquals(temp, msg3); - temp = replicationQueue.poll(); + temp = replicationQueue.take(); Assert.assertEquals("Should have 1 objects", 1, replicationQueue.size()); Assert.assertEquals(temp, msg5); - // Message 2 should be ordered before message 5 as both have same replication - // number but message 2 has earlier timestamp. - temp = replicationQueue.poll(); + // Message 2 should be ordered before message 5 as both have same + // replication number but message 2 has earlier timestamp. + temp = replicationQueue.take(); Assert.assertEquals("Should have 0 objects", replicationQueue.size(), 0); Assert.assertEquals(temp, msg4); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/package-info.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/package-info.java similarity index 89% rename from hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/package-info.java rename to hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/package-info.java index 5b1fd0f43a9..1423c999381 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/package-info.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/package-info.java @@ -19,5 +19,5 @@ /** * SCM Testing and Mocking Utils. */ -package org.apache.hadoop.ozone.container.replication; +package org.apache.hadoop.hdds.scm.container.replication; // Test classes for Replication functionality. \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java index c6ea2af1216..6f8534d9be2 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestContainerPlacement.java @@ -32,8 +32,8 @@ .SCMContainerPlacementCapacity; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.test.PathUtils; @@ -41,6 +41,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.mockito.Mockito; import java.io.File; import java.io.IOException; @@ -86,8 +87,15 @@ OzoneConfiguration getConf() { SCMNodeManager createNodeManager(OzoneConfiguration config) throws IOException { + EventQueue eventQueue = new EventQueue(); + eventQueue.addHandler(SCMEvents.NEW_NODE, + Mockito.mock(NewNodeHandler.class)); + eventQueue.addHandler(SCMEvents.STALE_NODE, + Mockito.mock(StaleNodeHandler.class)); + eventQueue.addHandler(SCMEvents.DEAD_NODE, + Mockito.mock(DeadNodeHandler.class)); SCMNodeManager nodeManager = new SCMNodeManager(config, - UUID.randomUUID().toString(), null); + UUID.randomUUID().toString(), null, eventQueue); assertFalse("Node manager should be in chill mode", nodeManager.isOutOfChillMode()); return nodeManager; @@ -95,9 +103,10 @@ SCMNodeManager createNodeManager(OzoneConfiguration config) ContainerMapping createContainerManager(Configuration config, NodeManager scmNodeManager) throws IOException { + EventQueue eventQueue = new EventQueue(); final int cacheSize = config.getInt(OZONE_SCM_DB_CACHE_SIZE_MB, OZONE_SCM_DB_CACHE_SIZE_DEFAULT); - return new ContainerMapping(config, scmNodeManager, cacheSize); + return new ContainerMapping(config, scmNodeManager, cacheSize, eventQueue); } @@ -132,10 +141,6 @@ public void testContainerPlacementCapacity() throws IOException, TestUtils.getListOfRegisteredDatanodeDetails(nodeManager, nodeCount); try { for (DatanodeDetails datanodeDetails : datanodes) { - String id = UUID.randomUUID().toString(); - String path = testDir.getAbsolutePath() + "/" + id; - List reports = TestUtils - .createStorageReport(capacity, used, remaining, path, null, id, 1); nodeManager.processHeartbeat(datanodeDetails); } @@ -151,7 +156,8 @@ public void testContainerPlacementCapacity() throws IOException, assertTrue(nodeManager.isOutOfChillMode()); - ContainerWithPipeline containerWithPipeline = containerManager.allocateContainer( + ContainerWithPipeline containerWithPipeline = containerManager + .allocateContainer( xceiverClientManager.getType(), xceiverClientManager.getFactor(), "OZONE"); assertEquals(xceiverClientManager.getFactor().getNumber(), diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java new file mode 100644 index 00000000000..5ca9cb769a4 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.node; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerStateManager; +import org.apache.hadoop.hdds.scm.container.Mapping; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.replication.ReplicationRequest; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.node.states.Node2ContainerMap; +import org.apache.hadoop.hdds.scm.pipelines.PipelineSelector; +import org.apache.hadoop.hdds.server.events.EventPublisher; + +import org.junit.Assert; +import org.junit.Test; +import org.mockito.ArgumentCaptor; +import static org.mockito.Matchers.eq; +import org.mockito.Mockito; + +/** + * Test DeadNodeHandler. + */ +public class TestDeadNodeHandler { + + private List sentEvents = new ArrayList<>(); + + @Test + public void testOnMessage() throws IOException { + //GIVEN + DatanodeDetails datanode1 = TestUtils.randomDatanodeDetails(); + DatanodeDetails datanode2 = TestUtils.randomDatanodeDetails(); + + Node2ContainerMap node2ContainerMap = new Node2ContainerMap(); + ContainerStateManager containerStateManager = new ContainerStateManager( + new OzoneConfiguration(), + Mockito.mock(Mapping.class), + Mockito.mock(PipelineSelector.class) + ); + + ContainerInfo container1 = + TestUtils.allocateContainer(containerStateManager); + ContainerInfo container2 = + TestUtils.allocateContainer(containerStateManager); + ContainerInfo container3 = + TestUtils.allocateContainer(containerStateManager); + + DeadNodeHandler handler = + new DeadNodeHandler(node2ContainerMap, containerStateManager); + + registerReplicas(node2ContainerMap, datanode1, container1, container2); + registerReplicas(node2ContainerMap, datanode2, container1, container3); + + registerReplicas(containerStateManager, container1, datanode1, datanode2); + registerReplicas(containerStateManager, container2, datanode1); + registerReplicas(containerStateManager, container3, datanode2); + + TestUtils.closeContainer(containerStateManager, container1); + + EventPublisher publisher = Mockito.mock(EventPublisher.class); + + //WHEN datanode1 is dead + handler.onMessage(datanode1, publisher); + + //THEN + //node2ContainerMap has not been changed + Assert.assertEquals(2, node2ContainerMap.size()); + + Set container1Replicas = + containerStateManager.getContainerStateMap() + .getContainerReplicas(new ContainerID(container1.getContainerID())); + Assert.assertEquals(1, container1Replicas.size()); + Assert.assertEquals(datanode2, container1Replicas.iterator().next()); + + Set container2Replicas = + containerStateManager.getContainerStateMap() + .getContainerReplicas(new ContainerID(container2.getContainerID())); + Assert.assertEquals(0, container2Replicas.size()); + + Set container3Replicas = + containerStateManager.getContainerStateMap() + .getContainerReplicas(new ContainerID(container3.getContainerID())); + Assert.assertEquals(1, container3Replicas.size()); + Assert.assertEquals(datanode2, container3Replicas.iterator().next()); + + ArgumentCaptor replicationRequestParameter = + ArgumentCaptor.forClass(ReplicationRequest.class); + + Mockito.verify(publisher) + .fireEvent(eq(SCMEvents.REPLICATE_CONTAINER), + replicationRequestParameter.capture()); + + Assert + .assertEquals(container1.getContainerID(), + replicationRequestParameter.getValue().getContainerId()); + Assert + .assertEquals(1, + replicationRequestParameter.getValue().getReplicationCount()); + Assert + .assertEquals(3, + replicationRequestParameter.getValue().getExpecReplicationCount()); + } + + private void registerReplicas(ContainerStateManager containerStateManager, + ContainerInfo container, DatanodeDetails... datanodes) { + containerStateManager.getContainerStateMap() + .addContainerReplica(new ContainerID(container.getContainerID()), + datanodes); + } + + private void registerReplicas(Node2ContainerMap node2ContainerMap, + DatanodeDetails datanode, + ContainerInfo... containers) + throws SCMException { + node2ContainerMap + .insertNewDatanode(datanode.getUuid(), + Arrays.stream(containers) + .map(container -> new ContainerID(container.getContainerID())) + .collect(Collectors.toSet())); + } + +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeManager.java index 0a4e33d444c..f438c8bc238 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeManager.java @@ -24,12 +24,14 @@ .StorageContainerDatanodeProtocolProtos.SCMCommandProto; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand; @@ -45,6 +47,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.mockito.Mockito; import java.io.File; import java.io.IOException; @@ -56,10 +59,9 @@ import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_DEADNODE_INTERVAL; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys @@ -68,6 +70,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState .HEALTHY; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.DATANODE_COMMAND; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.core.StringStartsWith.startsWith; import static org.junit.Assert.assertEquals; @@ -123,8 +126,15 @@ OzoneConfiguration getConf() { SCMNodeManager createNodeManager(OzoneConfiguration config) throws IOException { + EventQueue eventQueue = new EventQueue(); + eventQueue.addHandler(SCMEvents.NEW_NODE, + Mockito.mock(NewNodeHandler.class)); + eventQueue.addHandler(SCMEvents.STALE_NODE, + Mockito.mock(StaleNodeHandler.class)); + eventQueue.addHandler(SCMEvents.DEAD_NODE, + Mockito.mock(DeadNodeHandler.class)); SCMNodeManager nodeManager = new SCMNodeManager(config, - UUID.randomUUID().toString(), null); + UUID.randomUUID().toString(), null, eventQueue); assertFalse("Node manager should be in chill mode", nodeManager.isOutOfChillMode()); return nodeManager; @@ -145,8 +155,8 @@ public void testScmHeartbeat() throws IOException, try (SCMNodeManager nodeManager = createNodeManager(getConf())) { // Send some heartbeats from different nodes. for (int x = 0; x < nodeManager.getMinimumChillModeNodes(); x++) { - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails( - nodeManager); + DatanodeDetails datanodeDetails = TestUtils + .createRandomDatanodeAndRegister(nodeManager); nodeManager.processHeartbeat(datanodeDetails); } @@ -191,7 +201,8 @@ public void testScmNotEnoughHeartbeats() throws IOException, // Need 100 nodes to come out of chill mode, only one node is sending HB. nodeManager.setMinimumChillModeNodes(100); - nodeManager.processHeartbeat(TestUtils.getDatanodeDetails(nodeManager)); + nodeManager.processHeartbeat(TestUtils + .createRandomDatanodeAndRegister(nodeManager)); //TODO: wait for heartbeat to be processed Thread.sleep(4 * 1000); assertFalse("Not enough heartbeat, Node manager should have" + @@ -214,7 +225,7 @@ public void testScmSameNodeHeartbeats() throws IOException, try (SCMNodeManager nodeManager = createNodeManager(getConf())) { nodeManager.setMinimumChillModeNodes(3); DatanodeDetails datanodeDetails = TestUtils - .getDatanodeDetails(nodeManager); + .createRandomDatanodeAndRegister(nodeManager); // Send 10 heartbeat from same node, and assert we never leave chill mode. for (int x = 0; x < 10; x++) { @@ -244,7 +255,8 @@ public void testScmShutdown() throws IOException, InterruptedException, conf.getTimeDuration(ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100, TimeUnit.MILLISECONDS); SCMNodeManager nodeManager = createNodeManager(conf); - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails(nodeManager); + DatanodeDetails datanodeDetails = TestUtils + .createRandomDatanodeAndRegister(nodeManager); nodeManager.close(); // These should never be processed. @@ -267,14 +279,14 @@ public void testScmHeartbeatAfterRestart() throws Exception { OzoneConfiguration conf = getConf(); conf.getTimeDuration(ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100, TimeUnit.MILLISECONDS); - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails(); - String dnId = datanodeDetails.getUuidString(); + DatanodeDetails datanodeDetails = TestUtils.randomDatanodeDetails(); + UUID dnId = datanodeDetails.getUuid(); String storagePath = testDir.getAbsolutePath() + "/" + dnId; - List reports = - TestUtils.createStorageReport(100, 10, 90, storagePath, null, dnId, 1); + StorageReportProto report = + TestUtils.createStorageReport(dnId, storagePath, 100, 10, 90, null); try (SCMNodeManager nodemanager = createNodeManager(conf)) { nodemanager.register(datanodeDetails, - TestUtils.createNodeReport(reports)); + TestUtils.createNodeReport(report)); List command = nodemanager.processHeartbeat(datanodeDetails); Assert.assertTrue(nodemanager.getAllNodes().contains(datanodeDetails)); Assert.assertTrue("On regular HB calls, SCM responses a " @@ -322,8 +334,8 @@ public void testScmHealthyNodeCount() throws IOException, try (SCMNodeManager nodeManager = createNodeManager(conf)) { for (int x = 0; x < count; x++) { - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails( - nodeManager); + DatanodeDetails datanodeDetails = TestUtils + .createRandomDatanodeAndRegister(nodeManager); nodeManager.processHeartbeat(datanodeDetails); } //TODO: wait for heartbeat to be processed @@ -349,7 +361,7 @@ public void testScmSanityOfUserConfig1() throws IOException, final int interval = 100; conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, interval, MILLISECONDS); - conf.setTimeDuration(OZONE_SCM_HEARTBEAT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS); // This should be 5 times more than OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL // and 3 times more than OZONE_SCM_HEARTBEAT_INTERVAL @@ -378,7 +390,7 @@ public void testScmSanityOfUserConfig2() throws IOException, final int interval = 100; conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, interval, TimeUnit.MILLISECONDS); - conf.setTimeDuration(OZONE_SCM_HEARTBEAT_INTERVAL, 1, TimeUnit.SECONDS); + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, TimeUnit.SECONDS); // This should be 5 times more than OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL // and 3 times more than OZONE_SCM_HEARTBEAT_INTERVAL @@ -403,7 +415,7 @@ public void testScmDetectStaleAndDeadNode() throws IOException, OzoneConfiguration conf = getConf(); conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, interval, MILLISECONDS); - conf.setTimeDuration(OZONE_SCM_HEARTBEAT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS); conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); @@ -412,7 +424,8 @@ public void testScmDetectStaleAndDeadNode() throws IOException, List nodeList = createNodeSet(nodeManager, nodeCount); - DatanodeDetails staleNode = TestUtils.getDatanodeDetails(nodeManager); + DatanodeDetails staleNode = TestUtils.createRandomDatanodeAndRegister( + nodeManager); // Heartbeat once nodeManager.processHeartbeat(staleNode); @@ -541,7 +554,7 @@ public void testScmClusterIsInExpectedState1() throws IOException, OzoneConfiguration conf = getConf(); conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100, MILLISECONDS); - conf.setTimeDuration(OZONE_SCM_HEARTBEAT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS); conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); @@ -551,11 +564,11 @@ public void testScmClusterIsInExpectedState1() throws IOException, */ try (SCMNodeManager nodeManager = createNodeManager(conf)) { DatanodeDetails healthyNode = - TestUtils.getDatanodeDetails(nodeManager); + TestUtils.createRandomDatanodeAndRegister(nodeManager); DatanodeDetails staleNode = - TestUtils.getDatanodeDetails(nodeManager); + TestUtils.createRandomDatanodeAndRegister(nodeManager); DatanodeDetails deadNode = - TestUtils.getDatanodeDetails(nodeManager); + TestUtils.createRandomDatanodeAndRegister(nodeManager); nodeManager.processHeartbeat(healthyNode); nodeManager.processHeartbeat(staleNode); nodeManager.processHeartbeat(deadNode); @@ -684,8 +697,9 @@ private void heartbeatNodeSet(SCMNodeManager manager, count) { List list = new LinkedList<>(); for (int x = 0; x < count; x++) { - list.add(TestUtils.getDatanodeDetails(nodeManager, UUID.randomUUID() - .toString())); + DatanodeDetails datanodeDetails = TestUtils + .createRandomDatanodeAndRegister(nodeManager); + list.add(datanodeDetails); } return list; } @@ -719,7 +733,7 @@ public void testScmClusterIsInExpectedState2() throws IOException, OzoneConfiguration conf = getConf(); conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100, MILLISECONDS); - conf.setTimeDuration(OZONE_SCM_HEARTBEAT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS); conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); @@ -810,7 +824,7 @@ public void testScmCanHandleScale() throws IOException, OzoneConfiguration conf = getConf(); conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100, MILLISECONDS); - conf.setTimeDuration(OZONE_SCM_HEARTBEAT_INTERVAL, 1, + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS); conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3 * 1000, MILLISECONDS); @@ -867,8 +881,8 @@ public void testScmEnterAndExitChillMode() throws IOException, try (SCMNodeManager nodeManager = createNodeManager(conf)) { nodeManager.setMinimumChillModeNodes(10); - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails( - nodeManager); + DatanodeDetails datanodeDetails = TestUtils + .createRandomDatanodeAndRegister(nodeManager); nodeManager.processHeartbeat(datanodeDetails); String status = nodeManager.getChillModeStatus(); Assert.assertThat(status, containsString("Still in chill " + @@ -895,7 +909,8 @@ public void testScmEnterAndExitChillMode() throws IOException, // Assert that node manager force enter cannot be overridden by nodes HBs. for (int x = 0; x < 20; x++) { - DatanodeDetails datanode = TestUtils.getDatanodeDetails(nodeManager); + DatanodeDetails datanode = TestUtils + .createRandomDatanodeAndRegister(nodeManager); nodeManager.processHeartbeat(datanode); } @@ -934,14 +949,13 @@ public void testScmStatsFromNodeReport() throws IOException, try (SCMNodeManager nodeManager = createNodeManager(conf)) { for (int x = 0; x < nodeCount; x++) { - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails( - nodeManager); - String dnId = datanodeDetails.getUuidString(); + DatanodeDetails datanodeDetails = TestUtils + .createRandomDatanodeAndRegister(nodeManager); + UUID dnId = datanodeDetails.getUuid(); long free = capacity - used; String storagePath = testDir.getAbsolutePath() + "/" + dnId; - List reports = TestUtils - .createStorageReport(capacity, used, free, storagePath, - null, dnId, 1); + StorageReportProto report = TestUtils + .createStorageReport(dnId, storagePath, capacity, used, free, null); nodeManager.processHeartbeat(datanodeDetails); } //TODO: wait for heartbeat to be processed @@ -975,23 +989,23 @@ public void testScmNodeReportUpdate() throws IOException, conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, interval, MILLISECONDS); - conf.setTimeDuration(OZONE_SCM_HEARTBEAT_INTERVAL, 1, SECONDS); + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 1, SECONDS); conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, SECONDS); conf.setTimeDuration(OZONE_SCM_DEADNODE_INTERVAL, 6, SECONDS); try (SCMNodeManager nodeManager = createNodeManager(conf)) { DatanodeDetails datanodeDetails = - TestUtils.getDatanodeDetails(nodeManager); + TestUtils.createRandomDatanodeAndRegister(nodeManager); final long capacity = 2000; final long usedPerHeartbeat = 100; - String dnId = datanodeDetails.getUuidString(); + UUID dnId = datanodeDetails.getUuid(); for (int x = 0; x < heartbeatCount; x++) { long scmUsed = x * usedPerHeartbeat; long remaining = capacity - scmUsed; String storagePath = testDir.getAbsolutePath() + "/" + dnId; - List reports = TestUtils - .createStorageReport(capacity, scmUsed, remaining, storagePath, - null, dnId, 1); + StorageReportProto report = TestUtils + .createStorageReport(dnId, storagePath, capacity, scmUsed, + remaining, null); nodeManager.processHeartbeat(datanodeDetails); Thread.sleep(100); @@ -1068,11 +1082,6 @@ public void testScmNodeReportUpdate() throws IOException, foundRemaining = nodeManager.getStats().getRemaining().get(); assertEquals(0, foundRemaining); - // Send a new report to bring the dead node back to healthy - String storagePath = testDir.getAbsolutePath() + "/" + dnId; - List reports = TestUtils - .createStorageReport(capacity, expectedScmUsed, expectedRemaining, - storagePath, null, dnId, 1); nodeManager.processHeartbeat(datanodeDetails); // Wait up to 5 seconds so that the dead node becomes healthy @@ -1102,22 +1111,22 @@ public void testHandlingSCMCommandEvent() { conf.getTimeDuration(ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 100, TimeUnit.MILLISECONDS); - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails(); - String dnId = datanodeDetails.getUuidString(); + DatanodeDetails datanodeDetails = TestUtils.randomDatanodeDetails(); + UUID dnId = datanodeDetails.getUuid(); String storagePath = testDir.getAbsolutePath() + "/" + dnId; - List reports = - TestUtils.createStorageReport(100, 10, 90, - storagePath, null, dnId, 1); + StorageReportProto report = + TestUtils.createStorageReport(dnId, storagePath, 100, 10, 90, null); EventQueue eq = new EventQueue(); try (SCMNodeManager nodemanager = createNodeManager(conf)) { - eq.addHandler(SCMNodeManager.DATANODE_COMMAND, nodemanager); + eq.addHandler(DATANODE_COMMAND, nodemanager); nodemanager - .register(datanodeDetails, TestUtils.createNodeReport(reports)); - eq.fireEvent(SCMNodeManager.DATANODE_COMMAND, - new CommandForDatanode(datanodeDetails.getUuid(), - new CloseContainerCommand(1L, ReplicationType.STAND_ALONE))); + .register(datanodeDetails, TestUtils.createNodeReport(report)); + eq.fireEvent(DATANODE_COMMAND, + new CommandForDatanode<>(datanodeDetails.getUuid(), + new CloseContainerCommand(1L, ReplicationType.STAND_ALONE, + PipelineID.randomId()))); eq.processAll(1000L); List command = diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeReportHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeReportHandler.java new file mode 100644 index 00000000000..e50b8234bcc --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeReportHandler.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.scm.node; + +import java.io.IOException; +import java.util.UUID; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.NodeReportFromDatanode; +import org.apache.hadoop.hdds.server.events.Event; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestNodeReportHandler implements EventPublisher { + + private static final Logger LOG = LoggerFactory + .getLogger(TestNodeReportHandler.class); + private NodeReportHandler nodeReportHandler; + private SCMNodeManager nodeManager; + private String storagePath = GenericTestUtils.getRandomizedTempPath() + .concat("/" + UUID.randomUUID().toString()); + + @Before + public void resetEventCollector() throws IOException { + OzoneConfiguration conf = new OzoneConfiguration(); + nodeManager = new SCMNodeManager(conf, "cluster1", null, new EventQueue()); + nodeReportHandler = new NodeReportHandler(nodeManager); + } + + @Test + public void testNodeReport() throws IOException { + DatanodeDetails dn = TestUtils.randomDatanodeDetails(); + StorageReportProto storageOne = TestUtils + .createStorageReport(dn.getUuid(), storagePath, 100, 10, 90, null); + + nodeReportHandler.onMessage( + getNodeReport(dn, storageOne), this); + SCMNodeMetric nodeMetric = nodeManager.getNodeStat(dn); + + Assert.assertTrue(nodeMetric.get().getCapacity().get() == 100); + Assert.assertTrue(nodeMetric.get().getRemaining().get() == 90); + Assert.assertTrue(nodeMetric.get().getScmUsed().get() == 10); + + StorageReportProto storageTwo = TestUtils + .createStorageReport(dn.getUuid(), storagePath, 100, 10, 90, null); + nodeReportHandler.onMessage( + getNodeReport(dn, storageOne, storageTwo), this); + nodeMetric = nodeManager.getNodeStat(dn); + + Assert.assertTrue(nodeMetric.get().getCapacity().get() == 200); + Assert.assertTrue(nodeMetric.get().getRemaining().get() == 180); + Assert.assertTrue(nodeMetric.get().getScmUsed().get() == 20); + + } + + private NodeReportFromDatanode getNodeReport(DatanodeDetails dn, + StorageReportProto... reports) { + NodeReportProto nodeReportProto = TestUtils.createNodeReport(reports); + return new NodeReportFromDatanode(dn, nodeReportProto); + } + + @Override + public > void fireEvent( + EVENT_TYPE event, PAYLOAD payload) { + LOG.info("Event is published: {}", payload); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeStorageStatMap.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeStorageStatMap.java index 072dee7c7fe..623fc16a924 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeStorageStatMap.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeStorageStatMap.java @@ -139,18 +139,17 @@ public void testProcessNodeReportCheckOneNode() throws IOException { SCMNodeStorageStatMap map = new SCMNodeStorageStatMap(conf); map.insertNewDatanode(key, reportSet); Assert.assertTrue(map.isKnownDatanode(key)); - String storageId = UUID.randomUUID().toString(); + UUID storageId = UUID.randomUUID(); String path = GenericTestUtils.getRandomizedTempPath().concat("/" + storageId); StorageLocationReport report = reportSet.iterator().next(); long reportCapacity = report.getCapacity(); long reportScmUsed = report.getScmUsed(); long reportRemaining = report.getRemaining(); - List reports = TestUtils - .createStorageReport(reportCapacity, reportScmUsed, reportRemaining, - path, null, storageId, 1); + StorageReportProto storageReport = TestUtils.createStorageReport(storageId, + path, reportCapacity, reportScmUsed, reportRemaining, null); StorageReportResult result = - map.processNodeReport(key, TestUtils.createNodeReport(reports)); + map.processNodeReport(key, TestUtils.createNodeReport(storageReport)); Assert.assertEquals(result.getStatus(), SCMNodeStorageStatMap.ReportStatus.ALL_IS_WELL); StorageContainerDatanodeProtocolProtos.NodeReportProto.Builder nrb = @@ -162,8 +161,8 @@ public void testProcessNodeReportCheckOneNode() throws IOException { SCMNodeStorageStatMap.ReportStatus.ALL_IS_WELL); reportList.add(TestUtils - .createStorageReport(reportCapacity, reportCapacity, 0, path, null, - UUID.randomUUID().toString(), 1).get(0)); + .createStorageReport(UUID.randomUUID(), path, reportCapacity, + reportCapacity, 0, null)); result = map.processNodeReport(key, TestUtils.createNodeReport(reportList)); Assert.assertEquals(result.getStatus(), SCMNodeStorageStatMap.ReportStatus.STORAGE_OUT_OF_SPACE); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/states/Node2ContainerMapTest.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/states/TestNode2ContainerMap.java similarity index 91% rename from hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/states/Node2ContainerMapTest.java rename to hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/states/TestNode2ContainerMap.java index 79f1b40db03..14a74e9b16c 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/states/Node2ContainerMapTest.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/states/TestNode2ContainerMap.java @@ -38,7 +38,7 @@ /** * Test classes for Node2ContainerMap. */ -public class Node2ContainerMapTest { +public class TestNode2ContainerMap { private final static int DATANODE_COUNT = 300; private final static int CONTAINER_COUNT = 1000; private final Map> testData = new @@ -119,6 +119,26 @@ public void testProcessReportCheckOneNode() throws SCMException { Node2ContainerMap.ReportStatus.ALL_IS_WELL); } + @Test + public void testUpdateDatanodeMap() throws SCMException { + UUID datanodeId = getFirstKey(); + Set values = testData.get(datanodeId); + Node2ContainerMap map = new Node2ContainerMap(); + map.insertNewDatanode(datanodeId, values); + Assert.assertTrue(map.isKnownDatanode(datanodeId)); + Assert.assertEquals(CONTAINER_COUNT, map.getContainers(datanodeId).size()); + + //remove one container + values.remove(values.iterator().next()); + Assert.assertEquals(CONTAINER_COUNT - 1, values.size()); + Assert.assertEquals(CONTAINER_COUNT, map.getContainers(datanodeId).size()); + + map.setContainersForDatanode(datanodeId, values); + + Assert.assertEquals(values.size(), map.getContainers(datanodeId).size()); + Assert.assertEquals(values, map.getContainers(datanodeId)); + } + @Test public void testProcessReportInsertAll() throws SCMException { Node2ContainerMap map = new Node2ContainerMap(); @@ -182,8 +202,7 @@ public void testProcessReportDetectNewContainers() throws SCMException { map.insertNewDatanode(key, values); final int newCount = 100; - // This is not a mistake, the treeset seems to be reverse sorted. - ContainerID last = values.pollFirst(); + ContainerID last = values.last(); TreeSet addedContainers = new TreeSet<>(); for (int x = 1; x <= newCount; x++) { long cTemp = last.getId() + x; @@ -224,7 +243,7 @@ public void testProcessReportDetectMissingContainers() throws SCMException { final int removeCount = 100; Random r = new Random(); - ContainerID first = values.pollLast(); + ContainerID first = values.first(); TreeSet removedContainers = new TreeSet<>(); // Pick a random container to remove it is ok to collide no issues. @@ -270,7 +289,7 @@ public void testProcessReportDetectNewAndMissingContainers() throws final int removeCount = 100; Random r = new Random(); - ContainerID first = values.pollLast(); + ContainerID first = values.first(); TreeSet removedContainers = new TreeSet<>(); // Pick a random container to remove it is ok to collide no issues. diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java new file mode 100644 index 00000000000..486c604cd06 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.server; + +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.HddsTestUtils; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + +/** Test class for SCMChillModeManager. + */ +public class TestSCMChillModeManager { + + private static EventQueue queue; + private SCMChillModeManager scmChillModeManager; + private static Configuration config; + private List containers; + + @Rule + public Timeout timeout = new Timeout(1000 * 20); + + @BeforeClass + public static void setUp() { + queue = new EventQueue(); + config = new OzoneConfiguration(); + } + + @Test + public void testChillModeState() throws Exception { + // Test 1: test for 0 containers + testChillMode(0); + + // Test 2: test for 20 containers + testChillMode(20); + } + + @Test + public void testChillModeStateWithNullContainers() { + new SCMChillModeManager(config, null, queue); + } + + private void testChillMode(int numContainers) throws Exception { + containers = new ArrayList<>(); + containers.addAll(HddsTestUtils.getContainerInfo(numContainers)); + scmChillModeManager = new SCMChillModeManager(config, containers, queue); + queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + scmChillModeManager); + assertTrue(scmChillModeManager.getInChillMode()); + queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + HddsTestUtils.createNodeRegistrationContainerReport(containers)); + GenericTestUtils.waitFor(() -> { + return !scmChillModeManager.getInChillMode(); + }, 100, 1000 * 5); + } + + @Test + public void testChillModeExitRule() throws Exception { + containers = new ArrayList<>(); + containers.addAll(HddsTestUtils.getContainerInfo(25 * 4)); + scmChillModeManager = new SCMChillModeManager(config, containers, queue); + queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + scmChillModeManager); + assertTrue(scmChillModeManager.getInChillMode()); + + testContainerThreshold(containers.subList(0, 25), 0.25); + assertTrue(scmChillModeManager.getInChillMode()); + testContainerThreshold(containers.subList(25, 50), 0.50); + assertTrue(scmChillModeManager.getInChillMode()); + testContainerThreshold(containers.subList(50, 75), 0.75); + assertTrue(scmChillModeManager.getInChillMode()); + testContainerThreshold(containers.subList(75, 100), 1.0); + + GenericTestUtils.waitFor(() -> { + return !scmChillModeManager.getInChillMode(); + }, 100, 1000 * 5); + } + + @Test + public void testDisableChillMode() { + OzoneConfiguration conf = new OzoneConfiguration(config); + conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false); + scmChillModeManager = new SCMChillModeManager(conf, containers, queue); + assertFalse(scmChillModeManager.getInChillMode()); + } + + private void testContainerThreshold(List dnContainers, + double expectedThreshold) + throws Exception { + queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + HddsTestUtils.createNodeRegistrationContainerReport(dnContainers)); + GenericTestUtils.waitFor(() -> { + double threshold = scmChillModeManager.getCurrentContainerThreshold(); + return threshold == expectedThreshold; + }, 100, 2000 * 9); + } + +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java new file mode 100644 index 00000000000..4b2001848b1 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMClientProtocolServer.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.server; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.test.LambdaTestUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test class for @{@link SCMClientProtocolServer}. + * */ +public class TestSCMClientProtocolServer { + private SCMClientProtocolServer scmClientProtocolServer; + private OzoneConfiguration config; + private EventQueue eventQueue; + + @Before + public void setUp() throws Exception { + config = new OzoneConfiguration(); + eventQueue = new EventQueue(); + scmClientProtocolServer = new SCMClientProtocolServer(config, null); + eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, scmClientProtocolServer); + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void testAllocateContainerFailureInChillMode() throws Exception { + LambdaTestUtils.intercept(SCMException.class, + "hillModePrecheck failed for allocateContainer", () -> { + scmClientProtocolServer.allocateContainer( + ReplicationType.STAND_ALONE, ReplicationFactor.ONE, ""); + }); + } +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMDatanodeHeartbeatDispatcher.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMDatanodeHeartbeatDispatcher.java index 326a34b7920..6a0b9091796 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMDatanodeHeartbeatDispatcher.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMDatanodeHeartbeatDispatcher.java @@ -20,9 +20,11 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto; +import org.apache.hadoop.hdds.scm.server. + SCMDatanodeHeartbeatDispatcher.CommandStatusReportFromDatanode; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.protocol.proto @@ -30,6 +32,7 @@ import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto; import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher .ContainerReportFromDatanode; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher @@ -39,6 +42,11 @@ import org.junit.Assert; import org.junit.Test; +import org.mockito.Mockito; + +import static org.apache.hadoop.hdds.scm.events.SCMEvents.CONTAINER_REPORT; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.NODE_REPORT; +import static org.apache.hadoop.hdds.scm.events.SCMEvents.CMD_STATUS_REPORT; /** * This class tests the behavior of SCMDatanodeHeartbeatDispatcher. @@ -49,26 +57,25 @@ @Test public void testNodeReportDispatcher() throws IOException { - Configuration conf = new OzoneConfiguration(); - AtomicInteger eventReceived = new AtomicInteger(); NodeReportProto nodeReport = NodeReportProto.getDefaultInstance(); SCMDatanodeHeartbeatDispatcher dispatcher = - new SCMDatanodeHeartbeatDispatcher(new EventPublisher() { + new SCMDatanodeHeartbeatDispatcher(Mockito.mock(NodeManager.class), + new EventPublisher() { @Override public > void fireEvent( EVENT_TYPE event, PAYLOAD payload) { - Assert.assertEquals(event, - SCMDatanodeHeartbeatDispatcher.NODE_REPORT); + Assert.assertEquals(event, NODE_REPORT); eventReceived.incrementAndGet(); - Assert.assertEquals(nodeReport, ((NodeReportFromDatanode)payload).getReport()); + Assert.assertEquals(nodeReport, + ((NodeReportFromDatanode)payload).getReport()); } }); - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails(); + DatanodeDetails datanodeDetails = TestUtils.randomDatanodeDetails(); SCMHeartbeatRequestProto heartbeat = SCMHeartbeatRequestProto.newBuilder() @@ -84,34 +91,46 @@ public void testNodeReportDispatcher() throws IOException { @Test public void testContainerReportDispatcher() throws IOException { - Configuration conf = new OzoneConfiguration(); AtomicInteger eventReceived = new AtomicInteger(); ContainerReportsProto containerReport = ContainerReportsProto.getDefaultInstance(); + CommandStatusReportsProto commandStatusReport = + CommandStatusReportsProto.getDefaultInstance(); SCMDatanodeHeartbeatDispatcher dispatcher = - new SCMDatanodeHeartbeatDispatcher(new EventPublisher() { + new SCMDatanodeHeartbeatDispatcher(Mockito.mock(NodeManager.class), + new EventPublisher() { @Override public > void fireEvent( EVENT_TYPE event, PAYLOAD payload) { - Assert.assertEquals(event, - SCMDatanodeHeartbeatDispatcher.CONTAINER_REPORT); - Assert.assertEquals(containerReport, ((ContainerReportFromDatanode)payload).getReport()); + Assert.assertTrue( + event.equals(CONTAINER_REPORT) + || event.equals(CMD_STATUS_REPORT)); + + if (payload instanceof ContainerReportFromDatanode) { + Assert.assertEquals(containerReport, + ((ContainerReportFromDatanode) payload).getReport()); + } + if (payload instanceof CommandStatusReportFromDatanode) { + Assert.assertEquals(commandStatusReport, + ((CommandStatusReportFromDatanode) payload).getReport()); + } eventReceived.incrementAndGet(); } }); - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails(); + DatanodeDetails datanodeDetails = TestUtils.randomDatanodeDetails(); SCMHeartbeatRequestProto heartbeat = SCMHeartbeatRequestProto.newBuilder() .setDatanodeDetails(datanodeDetails.getProtoBufMessage()) .setContainerReport(containerReport) + .setCommandStatusReport(commandStatusReport) .build(); dispatcher.dispatch(heartbeat); - Assert.assertEquals(1, eventReceived.get()); + Assert.assertEquals(2, eventReceived.get()); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/TestCloseContainerWatcher.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/TestCloseContainerWatcher.java new file mode 100644 index 00000000000..56c3830c9b8 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/TestCloseContainerWatcher.java @@ -0,0 +1,287 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.container; + +import org.apache.hadoop.hdds.HddsIdFactory; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.CommandStatus; +import org.apache.hadoop.hdds.protocol.proto + .StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; +import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler + .CloseContainerStatus; +import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler + .CloseContainerRetryableReq; +import org.apache.hadoop.hdds.scm.container.CloseContainerWatcher; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerMapping; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.hdds.server.events.EventWatcher; +import org.apache.hadoop.ozone.lease.LeaseManager; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.event.Level; + +import java.io.IOException; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +/** + * Test class for {@link CloseContainerWatcher}. + * */ +public class TestCloseContainerWatcher implements EventHandler { + + private static final Logger LOG = LoggerFactory + .getLogger(TestCloseContainerWatcher.class); + private static EventWatcher + watcher; + private static LeaseManager leaseManager; + private static ContainerMapping containerMapping = Mockito + .mock(ContainerMapping.class); + private static EventQueue queue; + @Rule + public Timeout timeout = new Timeout(1000*15); + + @After + public void stop() { + leaseManager.shutdown(); + queue.close(); + } + + /* + * This test will test watcher for Failure status event. + * */ + @Test + public void testWatcherForFailureStatusEvent() throws + InterruptedException, IOException { + setupWatcher(90000L); + long id1 = HddsIdFactory.getLongId(); + long id2 = HddsIdFactory.getLongId(); + queue.addHandler(SCMEvents.CLOSE_CONTAINER, this); + setupMock(id1, id2, true); + GenericTestUtils.LogCapturer testLogger = GenericTestUtils.LogCapturer + .captureLogs(LOG); + GenericTestUtils.LogCapturer watcherLogger = GenericTestUtils.LogCapturer + .captureLogs(CloseContainerWatcher.LOG); + GenericTestUtils.setLogLevel(CloseContainerWatcher.LOG, Level.TRACE); + testLogger.clearOutput(); + watcherLogger.clearOutput(); + + CommandStatus cmdStatus1 = CommandStatus.newBuilder() + .setCmdId(id1) + .setStatus(CommandStatus.Status.FAILED) + .setType(Type.closeContainerCommand).build(); + CommandStatus cmdStatus2 = CommandStatus.newBuilder() + .setCmdId(id2) + .setStatus(CommandStatus.Status.FAILED) + .setType(Type.closeContainerCommand).build(); + + // File events to watcher + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + new CloseContainerRetryableReq(ContainerID.valueof(id1))); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + new CloseContainerRetryableReq(ContainerID.valueof(id2))); + Thread.sleep(10L); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_STATUS, new + CloseContainerStatus(cmdStatus1)); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_STATUS, new + CloseContainerStatus(cmdStatus2)); + + Thread.sleep(1000*4L); + // validation + assertTrue(watcherLogger.getOutput().contains("CloseContainerCommand for " + + "containerId: " + id1 + " executed")); + assertTrue(watcherLogger.getOutput().contains("CloseContainerCommand for " + + "containerId: " + id2 + " executed")); + assertTrue( + testLogger.getOutput().contains("Handling closeContainerEvent " + + "for containerId: id=" + id1)); + assertTrue(testLogger.getOutput().contains("Handling closeContainerEvent " + + "for containerId: id=" + id2)); + + } + + @Test + public void testWatcherForPendingStatusEvent() throws + InterruptedException, IOException { + setupWatcher(90000L); + long id1 = HddsIdFactory.getLongId(); + long id2 = HddsIdFactory.getLongId(); + queue.addHandler(SCMEvents.CLOSE_CONTAINER, this); + setupMock(id1, id2, true); + GenericTestUtils.LogCapturer testLogger = GenericTestUtils.LogCapturer + .captureLogs(LOG); + GenericTestUtils.LogCapturer watcherLogger = GenericTestUtils.LogCapturer + .captureLogs(CloseContainerWatcher.LOG); + GenericTestUtils.setLogLevel(CloseContainerWatcher.LOG, Level.TRACE); + testLogger.clearOutput(); + watcherLogger.clearOutput(); + + CommandStatus cmdStatus1 = CommandStatus.newBuilder() + .setCmdId(id1) + .setStatus(CommandStatus.Status.PENDING) + .setType(Type.closeContainerCommand).build(); + CommandStatus cmdStatus2 = CommandStatus.newBuilder() + .setCmdId(id2) + .setStatus(CommandStatus.Status.PENDING) + .setType(Type.closeContainerCommand).build(); + + // File events to watcher + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + new CloseContainerRetryableReq(ContainerID.valueof(id1))); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + new CloseContainerRetryableReq(ContainerID.valueof(id2))); + Thread.sleep(10L); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_STATUS, new + CloseContainerStatus(cmdStatus1)); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_STATUS, new + CloseContainerStatus(cmdStatus2)); + + Thread.sleep(1000*2L); + // validation + assertFalse(watcherLogger.getOutput().contains("CloseContainerCommand " + + "for containerId: " + id1 + " executed")); + assertFalse(watcherLogger.getOutput().contains("CloseContainerCommand " + + "for containerId: " + id2 + " executed")); + assertFalse(testLogger.getOutput().contains("Handling " + + "closeContainerEvent for containerId: id=" + id1)); + assertFalse(testLogger.getOutput().contains("Handling " + + "closeContainerEvent for containerId: id=" + id2)); + + } + + @Test + public void testWatcherForExecutedStatusEvent() + throws IOException, InterruptedException { + setupWatcher(90000L); + long id1 = HddsIdFactory.getLongId(); + long id2 = HddsIdFactory.getLongId(); + queue.addHandler(SCMEvents.CLOSE_CONTAINER, this); + setupMock(id1, id2, true); + GenericTestUtils.LogCapturer testLogger = GenericTestUtils.LogCapturer + .captureLogs(LOG); + GenericTestUtils.LogCapturer watcherLogger = GenericTestUtils.LogCapturer + .captureLogs(CloseContainerWatcher.LOG); + GenericTestUtils.setLogLevel(CloseContainerWatcher.LOG, Level.TRACE); + testLogger.clearOutput(); + watcherLogger.clearOutput(); + + // When both of the pending event are executed successfully by DataNode + CommandStatus cmdStatus1 = CommandStatus.newBuilder() + .setCmdId(id1) + .setStatus(CommandStatus.Status.EXECUTED) + .setType(Type.closeContainerCommand).build(); + CommandStatus cmdStatus2 = CommandStatus.newBuilder() + .setCmdId(id2) + .setStatus(CommandStatus.Status.EXECUTED) + .setType(Type.closeContainerCommand).build(); + // File events to watcher + testLogger.clearOutput(); + watcherLogger.clearOutput(); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + new CloseContainerRetryableReq(ContainerID.valueof(id1))); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + new CloseContainerRetryableReq(ContainerID.valueof(id2))); + Thread.sleep(10L); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_STATUS, + new CloseContainerStatus(cmdStatus1)); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_STATUS, + new CloseContainerStatus(cmdStatus2)); + + Thread.sleep(1000*3L); + // validation + assertTrue(watcherLogger.getOutput().contains("CloseContainerCommand " + + "for containerId: " + id1 + " executed")); + assertTrue(watcherLogger.getOutput().contains("CloseContainerCommand " + + "for containerId: " + id2 + " executed")); + assertFalse(testLogger.getOutput().contains("Handling " + + "closeContainerEvent for containerId: id=" + id1)); + assertFalse(testLogger.getOutput().contains("Handling " + + "closeContainerEvent for containerId: id=" + id2)); + } + + private void setupWatcher(long time) { + leaseManager = new LeaseManager<>("TestCloseContainerWatcher#LeaseManager", + time); + leaseManager.start(); + watcher = new CloseContainerWatcher(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + SCMEvents.CLOSE_CONTAINER_STATUS, leaseManager, containerMapping); + queue = new EventQueue(); + watcher.start(queue); + } + + /* + * This test will fire two retryable closeContainer events. Both will timeout. + * First event container will be open at time of handling so it should be + * sent back to appropriate handler. Second event container will be closed, + * so it should not be retried. + * */ + @Test + public void testWatcherRetryableTimeoutHandling() throws InterruptedException, + IOException { + + long id1 = HddsIdFactory.getLongId(); + long id2 = HddsIdFactory.getLongId(); + setupWatcher(1000L); + queue.addHandler(SCMEvents.CLOSE_CONTAINER, this); + setupMock(id1, id2, false); + GenericTestUtils.LogCapturer testLogger = GenericTestUtils.LogCapturer + .captureLogs(LOG); + testLogger.clearOutput(); + + // File events to watcher + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + new CloseContainerRetryableReq(ContainerID.valueof(id1))); + queue.fireEvent(SCMEvents.CLOSE_CONTAINER_RETRYABLE_REQ, + new CloseContainerRetryableReq(ContainerID.valueof(id2))); + + Thread.sleep(1000L + 10); + + // validation + assertTrue(testLogger.getOutput().contains("Handling " + + "closeContainerEvent for containerId: id=" + id1)); + assertFalse(testLogger.getOutput().contains("Handling " + + "closeContainerEvent for containerId: id=" + id2)); + } + + + private void setupMock(long id1, long id2, boolean isOpen) + throws IOException { + ContainerInfo containerInfo = Mockito.mock(ContainerInfo.class); + ContainerInfo containerInfo2 = Mockito.mock(ContainerInfo.class); + when(containerMapping.getContainer(id1)).thenReturn(containerInfo); + when(containerMapping.getContainer(id2)).thenReturn(containerInfo2); + when(containerInfo.isContainerOpen()).thenReturn(true); + when(containerInfo2.isContainerOpen()).thenReturn(isOpen); + } + + @Override + public void onMessage(ContainerID containerID, EventPublisher publisher) { + LOG.info("Handling closeContainerEvent for containerId: {}", containerID); + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java index 34779daf947..a513f6c54e4 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java @@ -16,17 +16,31 @@ */ package org.apache.hadoop.ozone.container.common; -import org.apache.commons.codec.digest.DigestUtils; -import org.apache.commons.lang3.RandomUtils; +import java.util.List; +import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.CloseContainerCommandProto; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.CommandStatus.Status; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.DeleteBlocksCommandProto; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.ReplicateContainerCommandProto; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.SCMCommandProto; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type; import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.scm.VersionInfo; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMHeartbeatRequestProto; import org.apache.hadoop.hdds.protocol.proto @@ -37,10 +51,9 @@ .StorageContainerDatanodeProtocolProtos.StorageReportProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ozone.OzoneConfigKeys; -import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.container.common.helpers.ContainerReport; import org.apache.hadoop.ozone.container.common.statemachine .DatanodeStateMachine; import org.apache.hadoop.ozone.container.common.statemachine @@ -52,7 +65,10 @@ .RegisterEndpointTask; import org.apache.hadoop.ozone.container.common.states.endpoint .VersionEndpointTask; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; +import org.apache.hadoop.ozone.protocol.commands.CommandStatus; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; import org.apache.hadoop.util.Time; import org.junit.AfterClass; @@ -63,16 +79,17 @@ import java.io.File; import java.net.InetSocketAddress; -import java.util.List; import java.util.UUID; -import static org.apache.hadoop.hdds.scm.TestUtils.getDatanodeDetails; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils .createEndpoint; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.mockito.Mockito.when; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; /** * Tests the endpoints. @@ -82,6 +99,7 @@ private static RPC.Server scmServer; private static ScmTestMock scmServerImpl; private static File testDir; + private static Configuration config; @AfterClass public static void tearDown() throws Exception { @@ -98,6 +116,12 @@ public static void setUp() throws Exception { scmServer = SCMTestUtils.startScmRpcServer(SCMTestUtils.getConf(), scmServerImpl, serverAddress, 10); testDir = PathUtils.getTestDir(TestEndPoint.class); + config = SCMTestUtils.getConf(); + config.set(DFS_DATANODE_DATA_DIR_KEY, testDir.getAbsolutePath()); + config.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath()); + config + .setBoolean(OzoneConfigKeys.DFS_CONTAINER_RATIS_IPC_RANDOM_PORT, true); + config.set(HddsConfigKeys.HDDS_COMMAND_STATUS_REPORT_INTERVAL, "1s"); } @Test @@ -125,12 +149,14 @@ public void testGetVersion() throws Exception { * how the state machine would make the call. */ public void testGetVersionTask() throws Exception { - Configuration conf = SCMTestUtils.getConf(); + OzoneConfiguration conf = SCMTestUtils.getConf(); try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, serverAddress, 1000)) { + OzoneContainer ozoneContainer = new OzoneContainer( + TestUtils.randomDatanodeDetails(), conf, null); rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); VersionEndpointTask versionTask = new VersionEndpointTask(rpcEndPoint, - conf); + conf, ozoneContainer); EndpointStateMachine.EndPointStates newState = versionTask.call(); // if version call worked the endpoint should automatically move to the @@ -143,20 +169,69 @@ public void testGetVersionTask() throws Exception { } } + @Test + public void testCheckVersionResponse() throws Exception { + OzoneConfiguration conf = SCMTestUtils.getConf(); + try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, + serverAddress, 1000)) { + GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer + .captureLogs(VersionEndpointTask.LOG); + OzoneContainer ozoneContainer = new OzoneContainer(TestUtils + .randomDatanodeDetails(), conf, null); + rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); + VersionEndpointTask versionTask = new VersionEndpointTask(rpcEndPoint, + conf, ozoneContainer); + EndpointStateMachine.EndPointStates newState = versionTask.call(); + + // if version call worked the endpoint should automatically move to the + // next state. + Assert.assertEquals(EndpointStateMachine.EndPointStates.REGISTER, + newState); + + // Now rpcEndpoint should remember the version it got from SCM + Assert.assertNotNull(rpcEndPoint.getVersion()); + + // Now change server scmId, so datanode scmId will be + // different from SCM server response scmId + String newScmId = UUID.randomUUID().toString(); + scmServerImpl.setScmId(newScmId); + newState = versionTask.call(); + Assert.assertEquals(EndpointStateMachine.EndPointStates.SHUTDOWN, + newState); + List volumesList = ozoneContainer.getVolumeSet() + .getFailedVolumesList(); + Assert.assertTrue(volumesList.size() == 1); + File expectedScmDir = new File(volumesList.get(0).getHddsRootDir(), + scmServerImpl.getScmId()); + Assert.assertTrue(logCapturer.getOutput().contains("expected scm " + + "directory " + expectedScmDir.getAbsolutePath() + " does not " + + "exist")); + Assert.assertTrue(ozoneContainer.getVolumeSet().getVolumesList().size() + == 0); + Assert.assertTrue(ozoneContainer.getVolumeSet().getFailedVolumesList() + .size() == 1); + + } + } + + + @Test /** * This test makes a call to end point where there is no SCM server. We * expect that versionTask should be able to handle it. */ public void testGetVersionToInvalidEndpoint() throws Exception { - Configuration conf = SCMTestUtils.getConf(); + OzoneConfiguration conf = SCMTestUtils.getConf(); InetSocketAddress nonExistentServerAddress = SCMTestUtils .getReuseableAddress(); try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, nonExistentServerAddress, 1000)) { rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); + OzoneContainer ozoneContainer = new OzoneContainer( + TestUtils.randomDatanodeDetails(), conf, null); VersionEndpointTask versionTask = new VersionEndpointTask(rpcEndPoint, - conf); + conf, ozoneContainer); EndpointStateMachine.EndPointStates newState = versionTask.call(); // This version call did NOT work, so endpoint should remain in the same @@ -175,13 +250,15 @@ public void testGetVersionToInvalidEndpoint() throws Exception { public void testGetVersionAssertRpcTimeOut() throws Exception { final long rpcTimeout = 1000; final long tolerance = 100; - Configuration conf = SCMTestUtils.getConf(); + OzoneConfiguration conf = SCMTestUtils.getConf(); try (EndpointStateMachine rpcEndPoint = createEndpoint(conf, serverAddress, (int) rpcTimeout)) { rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); + OzoneContainer ozoneContainer = new OzoneContainer( + TestUtils.randomDatanodeDetails(), conf, null); VersionEndpointTask versionTask = new VersionEndpointTask(rpcEndPoint, - conf); + conf, ozoneContainer); scmServerImpl.setRpcResponseDelay(1500); long start = Time.monotonicNow(); @@ -196,14 +273,14 @@ public void testGetVersionAssertRpcTimeOut() throws Exception { @Test public void testRegister() throws Exception { - DatanodeDetails nodeToRegister = getDatanodeDetails(); + DatanodeDetails nodeToRegister = TestUtils.randomDatanodeDetails(); try (EndpointStateMachine rpcEndPoint = createEndpoint( SCMTestUtils.getConf(), serverAddress, 1000)) { SCMRegisteredResponseProto responseProto = rpcEndPoint.getEndPoint() .register(nodeToRegister.getProtoBufMessage(), TestUtils .createNodeReport( - getStorageReports(nodeToRegister.getUuidString())), - createContainerReport(10, nodeToRegister)); + getStorageReports(nodeToRegister.getUuid())), + TestUtils.getRandomContainerReports(10)); Assert.assertNotNull(responseProto); Assert.assertEquals(nodeToRegister.getUuidString(), responseProto.getDatanodeUUID()); @@ -214,9 +291,9 @@ public void testRegister() throws Exception { } } - private List getStorageReports(String id) { + private StorageReportProto getStorageReports(UUID id) { String storagePath = testDir.getAbsolutePath() + "/" + id; - return TestUtils.createStorageReport(100, 10, 90, storagePath, null, id, 1); + return TestUtils.createStorageReport(id, storagePath, 100, 10, 90, null); } private EndpointStateMachine registerTaskHelper(InetSocketAddress scmAddress, @@ -228,13 +305,14 @@ private EndpointStateMachine registerTaskHelper(InetSocketAddress scmAddress, rpcEndPoint.setState(EndpointStateMachine.EndPointStates.REGISTER); OzoneContainer ozoneContainer = mock(OzoneContainer.class); when(ozoneContainer.getNodeReport()).thenReturn(TestUtils - .createNodeReport(getStorageReports(UUID.randomUUID().toString()))); + .createNodeReport(getStorageReports(UUID.randomUUID()))); when(ozoneContainer.getContainerReport()).thenReturn( - createContainerReport(10, null)); + TestUtils.getRandomContainerReports(10)); RegisterEndpointTask endpointTask = - new RegisterEndpointTask(rpcEndPoint, conf, ozoneContainer); + new RegisterEndpointTask(rpcEndPoint, conf, ozoneContainer, + mock(StateContext.class)); if (!clearDatanodeDetails) { - DatanodeDetails datanodeDetails = TestUtils.getDatanodeDetails(); + DatanodeDetails datanodeDetails = TestUtils.randomDatanodeDetails(); endpointTask.setDatanodeDetails(datanodeDetails); } endpointTask.call(); @@ -287,15 +365,14 @@ public void testRegisterRpcTimeout() throws Exception { @Test public void testHeartbeat() throws Exception { - DatanodeDetails dataNode = getDatanodeDetails(); + DatanodeDetails dataNode = TestUtils.randomDatanodeDetails(); try (EndpointStateMachine rpcEndPoint = createEndpoint(SCMTestUtils.getConf(), serverAddress, 1000)) { - String storageId = UUID.randomUUID().toString(); SCMHeartbeatRequestProto request = SCMHeartbeatRequestProto.newBuilder() .setDatanodeDetails(dataNode.getProtoBufMessage()) .setNodeReport(TestUtils.createNodeReport( - getStorageReports(storageId))) + getStorageReports(UUID.randomUUID()))) .build(); SCMHeartbeatResponseProto responseProto = rpcEndPoint.getEndPoint() @@ -305,7 +382,88 @@ public void testHeartbeat() throws Exception { } } - private void heartbeatTaskHelper(InetSocketAddress scmAddress, + @Test + public void testHeartbeatWithCommandStatusReport() throws Exception { + DatanodeDetails dataNode = TestUtils.randomDatanodeDetails(); + try (EndpointStateMachine rpcEndPoint = + createEndpoint(SCMTestUtils.getConf(), + serverAddress, 1000)) { + // Add some scmCommands for heartbeat response + addScmCommands(); + + + SCMHeartbeatRequestProto request = SCMHeartbeatRequestProto.newBuilder() + .setDatanodeDetails(dataNode.getProtoBufMessage()) + .setNodeReport(TestUtils.createNodeReport( + getStorageReports(UUID.randomUUID()))) + .build(); + + SCMHeartbeatResponseProto responseProto = rpcEndPoint.getEndPoint() + .sendHeartbeat(request); + assertNotNull(responseProto); + assertEquals(3, responseProto.getCommandsCount()); + assertEquals(0, scmServerImpl.getCommandStatusReportCount()); + + // Send heartbeat again from heartbeat endpoint task + final StateContext stateContext = heartbeatTaskHelper( + serverAddress, 3000); + Map map = stateContext.getCommandStatusMap(); + assertNotNull(map); + assertEquals("Should have 3 objects", 3, map.size()); + assertTrue(map.containsKey(Long.valueOf(1))); + assertTrue(map.containsKey(Long.valueOf(2))); + assertTrue(map.containsKey(Long.valueOf(3))); + assertTrue(map.get(Long.valueOf(1)).getType() + .equals(Type.closeContainerCommand)); + assertTrue(map.get(Long.valueOf(2)).getType() + .equals(Type.replicateContainerCommand)); + assertTrue( + map.get(Long.valueOf(3)).getType().equals(Type.deleteBlocksCommand)); + assertTrue(map.get(Long.valueOf(1)).getStatus().equals(Status.PENDING)); + assertTrue(map.get(Long.valueOf(2)).getStatus().equals(Status.PENDING)); + assertTrue(map.get(Long.valueOf(3)).getStatus().equals(Status.PENDING)); + + scmServerImpl.clearScmCommandRequests(); + } + } + + private void addScmCommands() { + SCMCommandProto closeCommand = SCMCommandProto.newBuilder() + .setCloseContainerCommandProto( + CloseContainerCommandProto.newBuilder().setCmdId(1) + .setContainerID(1) + .setReplicationType(ReplicationType.RATIS) + .setPipelineID(PipelineID.randomId().getProtobuf()) + .build()) + .setCommandType(Type.closeContainerCommand) + .build(); + SCMCommandProto replicationCommand = SCMCommandProto.newBuilder() + .setReplicateContainerCommandProto( + ReplicateContainerCommandProto.newBuilder() + .setCmdId(2) + .setContainerID(2) + .build()) + .setCommandType(Type.replicateContainerCommand) + .build(); + SCMCommandProto deleteBlockCommand = SCMCommandProto.newBuilder() + .setDeleteBlocksCommandProto( + DeleteBlocksCommandProto.newBuilder() + .setCmdId(3) + .addDeletedBlocksTransactions( + DeletedBlocksTransaction.newBuilder() + .setContainerID(45) + .setCount(1) + .setTxID(23) + .build()) + .build()) + .setCommandType(Type.deleteBlocksCommand) + .build(); + scmServerImpl.addScmCommandRequest(closeCommand); + scmServerImpl.addScmCommandRequest(deleteBlockCommand); + scmServerImpl.addScmCommandRequest(replicationCommand); + } + + private StateContext heartbeatTaskHelper(InetSocketAddress scmAddress, int rpcTimeout) throws Exception { Configuration conf = SCMTestUtils.getConf(); conf.set(DFS_DATANODE_DATA_DIR_KEY, testDir.getAbsolutePath()); @@ -318,11 +476,11 @@ private void heartbeatTaskHelper(InetSocketAddress scmAddress, // Create a datanode state machine for stateConext used by endpoint task try (DatanodeStateMachine stateMachine = new DatanodeStateMachine( - TestUtils.getDatanodeDetails(), conf); - EndpointStateMachine rpcEndPoint = + TestUtils.randomDatanodeDetails(), conf); + EndpointStateMachine rpcEndPoint = createEndpoint(conf, scmAddress, rpcTimeout)) { HddsProtos.DatanodeDetailsProto datanodeDetailsProto = - getDatanodeDetails().getProtoBufMessage(); + TestUtils.randomDatanodeDetails().getProtoBufMessage(); rpcEndPoint.setState(EndpointStateMachine.EndPointStates.HEARTBEAT); final StateContext stateContext = @@ -337,6 +495,7 @@ private void heartbeatTaskHelper(InetSocketAddress scmAddress, Assert.assertEquals(EndpointStateMachine.EndPointStates.HEARTBEAT, rpcEndPoint.getState()); + return stateContext; } } @@ -365,25 +524,4 @@ public void testHeartbeatTaskRpcTimeOut() throws Exception { lessThanOrEqualTo(rpcTimeout + tolerance)); } - private ContainerReportsProto createContainerReport( - int count, DatanodeDetails datanodeDetails) { - StorageContainerDatanodeProtocolProtos.ContainerReportsProto.Builder - reportsBuilder = StorageContainerDatanodeProtocolProtos - .ContainerReportsProto.newBuilder(); - for (int x = 0; x < count; x++) { - long containerID = RandomUtils.nextLong(); - ContainerReport report = new ContainerReport(containerID, - DigestUtils.sha256Hex("Simulated")); - report.setKeyCount(1000); - report.setSize(OzoneConsts.GB * 5); - report.setBytesUsed(OzoneConsts.GB * 2); - report.setReadCount(100); - report.setReadBytes(OzoneConsts.GB * 1); - report.setWriteCount(50); - report.setWriteBytes(OzoneConsts.GB * 2); - - reportsBuilder.addReports(report.getProtoBufMessage()); - } - return reportsBuilder.build(); - } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/placement/TestContainerPlacement.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/placement/TestContainerPlacement.java index 651b77618e4..1c80880d01d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/placement/TestContainerPlacement.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/placement/TestContainerPlacement.java @@ -30,6 +30,7 @@ import org.junit.Assert; import org.junit.Test; +import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -86,11 +87,13 @@ public void testCapacityPlacementYieldsBetterDataDistribution() throws for (int x = 0; x < opsCount; x++) { long containerSize = random.nextInt(100) * OzoneConsts.GB; List nodesCapacity = - capacityPlacer.chooseDatanodes(nodesRequired, containerSize); + capacityPlacer.chooseDatanodes(new ArrayList<>(), nodesRequired, + containerSize); assertEquals(nodesRequired, nodesCapacity.size()); List nodesRandom = - randomPlacer.chooseDatanodes(nodesRequired, containerSize); + randomPlacer.chooseDatanodes(nodesCapacity, nodesRequired, + containerSize); // One fifth of all calls are delete if (x % 5 == 0) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java index e15e0fcfc4a..a0249aaa0e7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/testutils/ReplicationNodeManagerMock.java @@ -28,7 +28,9 @@ .StorageContainerDatanodeProtocolProtos.NodeReportProto; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto; +import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.ozone.protocol.VersionResponse; +import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; @@ -287,4 +289,19 @@ public void addDatanodeCommand(UUID dnId, SCMCommand command) { this.commandQueue.addCommand(dnId, command); } + /** + * Empty implementation for processNodeReport. + * @param dnUuid + * @param nodeReport + */ + @Override + public void processNodeReport(UUID dnUuid, NodeReportProto nodeReport) { + // do nothing. + } + + @Override + public void onMessage(CommandForDatanode commandForDatanode, + EventPublisher publisher) { + // do nothing. + } } diff --git a/hadoop-hdds/tools/pom.xml b/hadoop-hdds/tools/pom.xml index 9017d3ecd58..852c81d0522 100644 --- a/hadoop-hdds/tools/pom.xml +++ b/hadoop-hdds/tools/pom.xml @@ -20,11 +20,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-hdds - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-hdds-tools - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Distributed Data Store Tools Apache Hadoop HDDS Tools jar diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/OzoneCommandHandler.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/OzoneCommandHandler.java deleted file mode 100644 index 641dd0ea9a7..00000000000 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/OzoneCommandHandler.java +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.scm.cli; - -import org.apache.commons.cli.CommandLine; -import org.apache.hadoop.hdds.scm.client.ScmClient; - -import java.io.IOException; -import java.io.PrintStream; - -/** - * The abstract class of all SCM CLI commands. - */ -public abstract class OzoneCommandHandler { - - private ScmClient scmClient; - private PrintStream out = System.out; - private PrintStream err = System.err; - - /** - * Constructs a handler object. - */ - public OzoneCommandHandler(ScmClient scmClient) { - this.scmClient = scmClient; - } - - protected ScmClient getScmClient() { - return scmClient; - } - - /** - * Sets customized output stream to redirect the stdout to somewhere else. - * @param out - */ - public void setOut(PrintStream out) { - this.out = out; - } - - /** - * Sets customized error stream to redirect the stderr to somewhere else. - * @param err - */ - public void setErr(PrintStream err) { - this.err = err; - } - - public void logOut(String msg, String... variable) { - this.out.println(String.format(msg, variable)); - } - - /** - * Executes the Client command. - * - * @param cmd - CommandLine. - * @throws IOException throws exception. - */ - public abstract void execute(CommandLine cmd) throws IOException; - - /** - * Display a help message describing the options the command takes. - * TODO : currently only prints to standard out, may want to change this. - */ - public abstract void displayHelp(); - - public PrintStream getOut() { - return out; - } - - public PrintStream getErr() { - return err; - } -} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java index 8d71d00551e..59cd0ba9a46 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/SCMCLI.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,68 +17,73 @@ */ package org.apache.hadoop.hdds.scm.cli; -import org.apache.commons.cli.BasicParser; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; +import java.io.IOException; +import java.net.InetSocketAddress; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.HddsUtils; +import org.apache.hadoop.hdds.cli.GenericCli; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.XceiverClientManager; -import org.apache.hadoop.hdds.scm.cli.container.ContainerCommandHandler; -import org.apache.hadoop.hdds.scm.cli.container.CreateContainerHandler; +import org.apache.hadoop.hdds.scm.cli.container.CloseSubcommand; +import org.apache.hadoop.hdds.scm.cli.container.CreateSubcommand; +import org.apache.hadoop.hdds.scm.cli.container.DeleteSubcommand; +import org.apache.hadoop.hdds.scm.cli.container.InfoSubcommand; +import org.apache.hadoop.hdds.scm.cli.container.ListSubcommand; import org.apache.hadoop.hdds.scm.client.ContainerOperationClient; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.protocolPB .StorageContainerLocationProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ipc.Client; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.ToolRunner; - -import java.io.IOException; -import java.io.PrintStream; -import java.net.InetSocketAddress; -import java.net.URISyntaxException; -import java.util.Arrays; +import org.apache.hadoop.util.NativeCodeLoader; +import org.apache.commons.lang3.StringUtils; +import static org.apache.hadoop.hdds.HddsUtils.getScmAddressForClients; import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_CONTAINER_SIZE_DEFAULT; + .OZONE_SCM_CLIENT_ADDRESS_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_CONTAINER_SIZE_GB; -import static org.apache.hadoop.hdds.HddsUtils.getScmAddressForClients; -import static org.apache.hadoop.hdds.scm.cli.ResultCode.EXECUTION_ERROR; -import static org.apache.hadoop.hdds.scm.cli.ResultCode.SUCCESS; -import static org.apache.hadoop.hdds.scm.cli.ResultCode.UNRECOGNIZED_CMD; + .OZONE_SCM_CONTAINER_SIZE_DEFAULT; +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Level; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; /** * This class is the CLI of SCM. */ -public class SCMCLI extends OzoneBaseCLI { - - public static final String HELP_OP = "help"; - public static final int CMD_WIDTH = 80; - private final ScmClient scmClient; - private final PrintStream out; - private final PrintStream err; - - private final Options options; - - public SCMCLI(ScmClient scmClient) { - this(scmClient, System.out, System.err); - } - - public SCMCLI(ScmClient scmClient, PrintStream out, PrintStream err) { - this.scmClient = scmClient; - this.out = out; - this.err = err; - this.options = getOptions(); - } +/** + * Container subcommand. + */ +@Command(name = "ozone scmcli", hidden = true, description = + "Developer tools to handle SCM specific " + + "operations.", + versionProvider = HddsVersionProvider.class, + subcommands = { + ListSubcommand.class, + InfoSubcommand.class, + DeleteSubcommand.class, + CreateSubcommand.class, + CloseSubcommand.class + }, + mixinStandardHelpOptions = true) +public class SCMCLI extends GenericCli { + + @Option(names = {"--scm"}, description = "The destination scm (host:port)") + private String scm = ""; /** * Main for the scm shell Command handling. @@ -87,29 +92,40 @@ public SCMCLI(ScmClient scmClient, PrintStream out, PrintStream err) { * @throws Exception */ public static void main(String[] argv) throws Exception { - OzoneConfiguration conf = new OzoneConfiguration(); - ScmClient scmClient = getScmClient(conf); - SCMCLI shell = new SCMCLI(scmClient); - conf.setQuietMode(false); - shell.setConf(conf); - int res = 0; - try { - res = ToolRunner.run(shell, argv); - } catch (Exception ex) { - System.exit(1); - } - System.exit(res); + + LogManager.resetConfiguration(); + Logger.getRootLogger().setLevel(Level.INFO); + Logger.getRootLogger() + .addAppender(new ConsoleAppender(new PatternLayout("%m%n"))); + Logger.getLogger(NativeCodeLoader.class).setLevel(Level.ERROR); + + new SCMCLI().run(argv); } - private static ScmClient getScmClient(OzoneConfiguration ozoneConf) + public ScmClient createScmClient() throws IOException { + + OzoneConfiguration ozoneConf = createOzoneConfiguration(); + if (StringUtils.isNotEmpty(scm)) { + ozoneConf.set(OZONE_SCM_CLIENT_ADDRESS_KEY, scm); + } + if (!HddsUtils.getHostNameFromConfigKeys(ozoneConf, + ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY).isPresent()) { + + throw new IllegalArgumentException( + ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY + + " should be set in ozone-site.xml or with the --scm option"); + } + long version = RPC.getProtocolVersion( StorageContainerLocationProtocolPB.class); InetSocketAddress scmAddress = getScmAddressForClients(ozoneConf); - int containerSizeGB = ozoneConf.getInt(OZONE_SCM_CONTAINER_SIZE_GB, - OZONE_SCM_CONTAINER_SIZE_DEFAULT); - ContainerOperationClient.setContainerSizeB(containerSizeGB*OzoneConsts.GB); + int containerSizeGB = (int) ozoneConf.getStorageSize( + OZONE_SCM_CONTAINER_SIZE, OZONE_SCM_CONTAINER_SIZE_DEFAULT, + StorageUnit.GB); + ContainerOperationClient + .setContainerSizeB(containerSizeGB * OzoneConsts.GB); RPC.setProtocolEngine(ozoneConf, StorageContainerLocationProtocolPB.class, ProtobufRpcEngine.class); @@ -119,116 +135,16 @@ private static ScmClient getScmClient(OzoneConfiguration ozoneConf) scmAddress, UserGroupInformation.getCurrentUser(), ozoneConf, NetUtils.getDefaultSocketFactory(ozoneConf), Client.getRpcTimeout(ozoneConf))); - ScmClient storageClient = new ContainerOperationClient( + return new ContainerOperationClient( client, new XceiverClientManager(ozoneConf)); - return storageClient; - } - - /** - * Adds ALL the options that hdfs scm command supports. Given the hierarchy - * of commands, the options are added in a cascading manner, e.g.: - * {@link SCMCLI} asks {@link ContainerCommandHandler} to add it's options, - * which then asks it's sub command, such as - * {@link CreateContainerHandler} - * to add it's own options. - * - * We need to do this because {@link BasicParser} need to take all the options - * when paring args. - * @return ALL the options supported by this CLI. - */ - @Override - protected Options getOptions() { - Options newOptions = new Options(); - // add the options - addTopLevelOptions(newOptions); - ContainerCommandHandler.addOptions(newOptions); - // TODO : add pool, node and pipeline commands. - addHelpOption(newOptions); - return newOptions; - } - - private static void addTopLevelOptions(Options options) { - Option containerOps = - new Option(ContainerCommandHandler.CONTAINER_CMD, false, - "Container related options"); - options.addOption(containerOps); - // TODO : add pool, node and pipeline commands. - } - - private static void addHelpOption(Options options) { - Option helpOp = new Option(HELP_OP, false, "display help message"); - options.addOption(helpOp); } - @Override - protected void displayHelp() { - HelpFormatter helpFormatter = new HelpFormatter(); - Options topLevelOptions = new Options(); - addTopLevelOptions(topLevelOptions); - helpFormatter.printHelp(CMD_WIDTH, "hdfs scmcli []", - "where can be one of the following", - topLevelOptions, ""); - } - - @Override - public int run(String[] args) throws Exception { - CommandLine cmd = parseArgs(args, options); - if (cmd == null) { - err.println("Unrecognized options:" + Arrays.asList(args)); - displayHelp(); - return UNRECOGNIZED_CMD; - } - return dispatch(cmd, options); - } - - /** - * This function parses all command line arguments - * and returns the appropriate values. - * - * @param argv - Argv from main - * - * @return CommandLine - */ - @Override - protected CommandLine parseArgs(String[] argv, Options opts) - throws ParseException { - try { - BasicParser parser = new BasicParser(); - return parser.parse(opts, argv); - } catch (ParseException ex) { - err.println(ex.getMessage()); + public void checkContainerExists(ScmClient scmClient, long containerId) + throws IOException { + ContainerInfo container = scmClient.getContainer(containerId); + if (container == null) { + throw new IllegalArgumentException("No such container " + containerId); } - return null; } - @Override - protected int dispatch(CommandLine cmd, Options opts) - throws IOException, URISyntaxException { - OzoneCommandHandler handler = null; - try { - if (cmd.hasOption(ContainerCommandHandler.CONTAINER_CMD)) { - handler = new ContainerCommandHandler(scmClient); - } - - if (handler == null) { - if (cmd.hasOption(HELP_OP)) { - displayHelp(); - return SUCCESS; - } else { - displayHelp(); - err.println("Unrecognized command: " + Arrays.asList(cmd.getArgs())); - return UNRECOGNIZED_CMD; - } - } else { - // Redirect stdout and stderr if necessary. - handler.setOut(this.out); - handler.setErr(this.err); - handler.execute(cmd); - return SUCCESS; - } - } catch (IOException ioe) { - err.println("Error executing command:" + ioe); - return EXECUTION_ERROR; - } - } } diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/CloseContainerHandler.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/CloseContainerHandler.java deleted file mode 100644 index e2267daba54..00000000000 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/CloseContainerHandler.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.scm.cli.container; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.hadoop.hdds.scm.cli.OzoneCommandHandler; -import org.apache.hadoop.hdds.scm.cli.SCMCLI; -import org.apache.hadoop.hdds.scm.client.ScmClient; - -import java.io.IOException; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; - -/** - * The handler of close container command. - */ -public class CloseContainerHandler extends OzoneCommandHandler { - - public static final String CONTAINER_CLOSE = "close"; - public static final String OPT_CONTAINER_ID = "c"; - - @Override - public void execute(CommandLine cmd) throws IOException { - if (!cmd.hasOption(CONTAINER_CLOSE)) { - throw new IOException("Expecting container close"); - } - if (!cmd.hasOption(OPT_CONTAINER_ID)) { - displayHelp(); - if (!cmd.hasOption(SCMCLI.HELP_OP)) { - throw new IOException("Expecting container id"); - } else { - return; - } - } - String containerID = cmd.getOptionValue(OPT_CONTAINER_ID); - - ContainerWithPipeline container = getScmClient(). - getContainerWithPipeline(Long.parseLong(containerID)); - if (container == null) { - throw new IOException("Cannot close an non-exist container " - + containerID); - } - logOut("Closing container : %s.", containerID); - getScmClient() - .closeContainer(container.getContainerInfo().getContainerID()); - logOut("Container closed."); - } - - @Override - public void displayHelp() { - Options options = new Options(); - addOptions(options); - HelpFormatter helpFormatter = new HelpFormatter(); - helpFormatter - .printHelp(SCMCLI.CMD_WIDTH, "hdfs scm -container -close

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.container; + +import java.util.concurrent.Callable; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.scm.cli.SCMCLI; +import org.apache.hadoop.hdds.scm.client.ScmClient; + +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; +import picocli.CommandLine.ParentCommand; + +/** + * The handler of close container command. + */ +@Command( + name = "close", + description = "close container", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class CloseSubcommand implements Callable { + + @ParentCommand + private SCMCLI parent; + + @Parameters(description = "Id of the container to close") + private long containerId; + + @Override + public Void call() throws Exception { + try (ScmClient scmClient = parent.createScmClient()) { + parent.checkContainerExists(scmClient, containerId); + scmClient.closeContainer(containerId); + return null; + } + } +} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommandHandler.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommandHandler.java deleted file mode 100644 index 428f179932d..00000000000 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ContainerCommandHandler.java +++ /dev/null @@ -1,128 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.scm.cli.container; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.hadoop.hdds.scm.cli.OzoneCommandHandler; -import org.apache.hadoop.hdds.scm.client.ScmClient; - -import java.io.IOException; -import java.util.Arrays; - -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.CMD_WIDTH; -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.HELP_OP; -import static org.apache.hadoop.hdds.scm.cli.container.CloseContainerHandler - .CONTAINER_CLOSE; -import static org.apache.hadoop.hdds.scm.cli.container.CreateContainerHandler - .CONTAINER_CREATE; -import static org.apache.hadoop.hdds.scm.cli.container.DeleteContainerHandler - .CONTAINER_DELETE; -import static org.apache.hadoop.hdds.scm.cli.container.InfoContainerHandler - .CONTAINER_INFO; -import static org.apache.hadoop.hdds.scm.cli.container.ListContainerHandler - .CONTAINER_LIST; - -/** - * The handler class of container-specific commands, e.g. addContainer. - */ -public class ContainerCommandHandler extends OzoneCommandHandler { - - public static final String CONTAINER_CMD = "container"; - - public ContainerCommandHandler(ScmClient scmClient) { - super(scmClient); - } - - @Override - public void execute(CommandLine cmd) throws IOException { - // all container commands should contain -container option - if (!cmd.hasOption(CONTAINER_CMD)) { - throw new IOException("Expecting container cmd"); - } - // check which each the sub command it is - OzoneCommandHandler handler = null; - if (cmd.hasOption(CONTAINER_CREATE)) { - handler = new CreateContainerHandler(getScmClient()); - } else if (cmd.hasOption(CONTAINER_DELETE)) { - handler = new DeleteContainerHandler(getScmClient()); - } else if (cmd.hasOption(CONTAINER_INFO)) { - handler = new InfoContainerHandler(getScmClient()); - } else if (cmd.hasOption(CONTAINER_LIST)) { - handler = new ListContainerHandler(getScmClient()); - } else if (cmd.hasOption(CONTAINER_CLOSE)) { - handler = new CloseContainerHandler(getScmClient()); - } - - // execute the sub command, throw exception if no sub command found - // unless -help option is given. - if (handler != null) { - handler.setOut(this.getOut()); - handler.setErr(this.getErr()); - handler.execute(cmd); - } else { - displayHelp(); - if (!cmd.hasOption(HELP_OP)) { - throw new IOException("Unrecognized command " - + Arrays.asList(cmd.getArgs())); - } - } - } - - @Override - public void displayHelp() { - Options options = new Options(); - addCommandsOption(options); - HelpFormatter helpFormatter = new HelpFormatter(); - helpFormatter.printHelp(CMD_WIDTH, - "hdfs scm -container ", - "where can be one of the following", options, ""); - } - - private static void addCommandsOption(Options options) { - Option createContainer = - new Option(CONTAINER_CREATE, false, "Create container"); - Option infoContainer = - new Option(CONTAINER_INFO, false, "Info container"); - Option deleteContainer = - new Option(CONTAINER_DELETE, false, "Delete container"); - Option listContainer = - new Option(CONTAINER_LIST, false, "List container"); - Option closeContainer = - new Option(CONTAINER_CLOSE, false, "Close container"); - - options.addOption(createContainer); - options.addOption(deleteContainer); - options.addOption(infoContainer); - options.addOption(listContainer); - options.addOption(closeContainer); - // Every new option should add it's option here. - } - - public static void addOptions(Options options) { - addCommandsOption(options); - // for create container options. - DeleteContainerHandler.addOptions(options); - InfoContainerHandler.addOptions(options); - ListContainerHandler.addOptions(options); - CloseContainerHandler.addOptions(options); - // Every new option should add it's option here. - } -} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/CreateContainerHandler.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/CreateContainerHandler.java deleted file mode 100644 index 278ee30842f..00000000000 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/CreateContainerHandler.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.scm.cli.container; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Options; -import org.apache.hadoop.hdds.scm.cli.OzoneCommandHandler; -import org.apache.hadoop.hdds.scm.client.ScmClient; - -import java.io.IOException; - -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.CMD_WIDTH; -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.HELP_OP; - -/** - * This is the handler that process container creation command. - */ -public class CreateContainerHandler extends OzoneCommandHandler { - - public static final String CONTAINER_CREATE = "create"; - public static final String CONTAINER_OWNER = "OZONE"; - // TODO Support an optional -p option to create - // container on given datanodes. - - public CreateContainerHandler(ScmClient scmClient) { - super(scmClient); - } - - @Override - public void execute(CommandLine cmd) throws IOException { - if (cmd.hasOption(HELP_OP)) { - displayHelp(); - } - - if (!cmd.hasOption(CONTAINER_CREATE)) { - throw new IOException("Expecting container create"); - } - - logOut("Creating container..."); - getScmClient().createContainer(CONTAINER_OWNER); - logOut("Container created."); - } - - @Override - public void displayHelp() { - Options options = new Options(); - HelpFormatter helpFormatter = new HelpFormatter(); - helpFormatter.printHelp(CMD_WIDTH, "hdfs scm -container -create", - null, options, null); - } -} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/CreateSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/CreateSubcommand.java new file mode 100644 index 00000000000..1dda9c485ed --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/CreateSubcommand.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.container; + +import java.util.concurrent.Callable; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.scm.cli.SCMCLI; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.common.helpers + .ContainerWithPipeline; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.ParentCommand; + +/** + * This is the handler that process container creation command. + */ +@Command( + name = "create", + description = "Create container", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class CreateSubcommand implements Callable { + + private static final Logger LOG = + LoggerFactory.getLogger(CreateSubcommand.class); + + @ParentCommand + private SCMCLI parent; + + @Option(description = "Owner of the new container", defaultValue = "OZONE", + required = false, names = { + "-o", "--owner"}) + + private String owner; + + @Override + public Void call() throws Exception { + try (ScmClient scmClient = parent.createScmClient()) { + ContainerWithPipeline container = scmClient.createContainer(owner); + LOG.info("Container {} is created.", + container.getContainerInfo().getContainerID()); + return null; + } + } +} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/DeleteContainerHandler.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/DeleteContainerHandler.java deleted file mode 100644 index 1b266653f03..00000000000 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/DeleteContainerHandler.java +++ /dev/null @@ -1,95 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdds.scm.cli.container; - -import com.google.common.base.Preconditions; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.hadoop.hdds.scm.cli.OzoneCommandHandler; -import org.apache.hadoop.hdds.scm.client.ScmClient; - -import java.io.IOException; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; - -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.CMD_WIDTH; -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.HELP_OP; - -/** - * This is the handler that process delete container command. - */ -public class DeleteContainerHandler extends OzoneCommandHandler { - - protected static final String CONTAINER_DELETE = "delete"; - protected static final String OPT_FORCE = "f"; - protected static final String OPT_CONTAINER_ID = "c"; - - public DeleteContainerHandler(ScmClient scmClient) { - super(scmClient); - } - - @Override - public void execute(CommandLine cmd) throws IOException { - Preconditions.checkArgument(cmd.hasOption(CONTAINER_DELETE), - "Expecting command delete"); - if (!cmd.hasOption(OPT_CONTAINER_ID)) { - displayHelp(); - if (!cmd.hasOption(HELP_OP)) { - throw new IOException("Expecting container name"); - } else { - return; - } - } - - String containerID = cmd.getOptionValue(OPT_CONTAINER_ID); - - ContainerWithPipeline container = getScmClient().getContainerWithPipeline( - Long.parseLong(containerID)); - if (container == null) { - throw new IOException("Cannot delete an non-exist container " - + containerID); - } - - logOut("Deleting container : %s.", containerID); - getScmClient() - .deleteContainer(container.getContainerInfo().getContainerID(), - container.getPipeline(), cmd.hasOption(OPT_FORCE)); - logOut("Container %s deleted.", containerID); - } - - @Override - public void displayHelp() { - Options options = new Options(); - addOptions(options); - HelpFormatter helpFormatter = new HelpFormatter(); - helpFormatter.printHelp(CMD_WIDTH, "hdfs scm -container -delete

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.cli.container; + +import java.util.concurrent.Callable; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.scm.cli.SCMCLI; +import org.apache.hadoop.hdds.scm.client.ScmClient; + +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; +import picocli.CommandLine.ParentCommand; + +/** + * This is the handler that process delete container command. + */ +@Command( + name = "delete", + description = "Delete container", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class DeleteSubcommand implements Callable { + + @Parameters(description = "Id of the container to close") + private long containerId; + + @Option(names = {"-f", + "--force"}, description = "forcibly delete the container") + private boolean force; + + @ParentCommand + private SCMCLI parent; + + @Override + public Void call() throws Exception { + try (ScmClient scmClient = parent.createScmClient()) { + parent.checkContainerExists(scmClient, containerId); + scmClient.deleteContainer(containerId, force); + return null; + } + } +} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/InfoContainerHandler.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/InfoContainerHandler.java deleted file mode 100644 index 3716aceaa4d..00000000000 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/InfoContainerHandler.java +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.scm.cli.container; - -import com.google.common.base.Preconditions; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.hadoop.hdds.scm.cli.OzoneCommandHandler; -import org.apache.hadoop.hdds.scm.client.ScmClient; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerData; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerLifeCycleState; - -import java.io.IOException; -import java.util.stream.Collectors; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; - -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.CMD_WIDTH; -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.HELP_OP; - -/** - * This is the handler that process container info command. - */ -public class InfoContainerHandler extends OzoneCommandHandler { - - public static final String CONTAINER_INFO = "info"; - protected static final String OPT_CONTAINER_ID = "c"; - - /** - * Constructs a handler object. - * - * @param scmClient scm client. - */ - public InfoContainerHandler(ScmClient scmClient) { - super(scmClient); - } - - @Override - public void execute(CommandLine cmd) throws IOException { - if (!cmd.hasOption(CONTAINER_INFO)) { - throw new IOException("Expecting container info"); - } - if (!cmd.hasOption(OPT_CONTAINER_ID)) { - displayHelp(); - if (!cmd.hasOption(HELP_OP)) { - throw new IOException("Expecting container name"); - } else { - return; - } - } - String containerID = cmd.getOptionValue(OPT_CONTAINER_ID); - ContainerWithPipeline container = getScmClient(). - getContainerWithPipeline(Long.parseLong(containerID)); - Preconditions.checkNotNull(container, "Container cannot be null"); - - ContainerData containerData = getScmClient().readContainer(container - .getContainerInfo().getContainerID(), container.getPipeline()); - - // Print container report info. - logOut("Container id: %s", containerID); - String openStatus = - containerData.getState() == ContainerLifeCycleState.OPEN ? "OPEN" : - "CLOSED"; - logOut("Container State: %s", openStatus); - logOut("Container DB Path: %s", containerData.getDbPath()); - logOut("Container Path: %s", containerData.getContainerPath()); - - // Output meta data. - String metadataStr = containerData.getMetadataList().stream().map( - p -> p.getKey() + ":" + p.getValue()).collect(Collectors.joining(", ")); - logOut("Container Metadata: {%s}", metadataStr); - - // Print pipeline of an existing container. - logOut("LeaderID: %s", container.getPipeline() - .getLeader().getHostName()); - String machinesStr = container.getPipeline() - .getMachines().stream().map( - DatanodeDetails::getHostName).collect(Collectors.joining(",")); - logOut("Datanodes: [%s]", machinesStr); - } - - @Override - public void displayHelp() { - Options options = new Options(); - addOptions(options); - HelpFormatter helpFormatter = new HelpFormatter(); - helpFormatter.printHelp(CMD_WIDTH, "hdfs scm -container -info

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.container; + +import java.util.concurrent.Callable; +import java.util.stream.Collectors; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerData; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos + .ContainerLifeCycleState; +import org.apache.hadoop.hdds.scm.cli.SCMCLI; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.common.helpers + .ContainerWithPipeline; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; +import picocli.CommandLine.ParentCommand; + +/** + * This is the handler that process container info command. + */ +@Command( + name = "info", + description = "Show information about a specific container", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class InfoSubcommand implements Callable { + + private static final Logger LOG = + LoggerFactory.getLogger(InfoSubcommand.class); + + @ParentCommand + private SCMCLI parent; + + @Parameters(description = "Decimal id of the container.") + private long containerID; + + @Override + public Void call() throws Exception { + try (ScmClient scmClient = parent.createScmClient()) { + ContainerWithPipeline container = scmClient. + getContainerWithPipeline(containerID); + Preconditions.checkNotNull(container, "Container cannot be null"); + + ContainerData containerData = scmClient.readContainer(container + .getContainerInfo().getContainerID(), container.getPipeline()); + + // Print container report info. + LOG.info("Container id: {}", containerID); + String openStatus = + containerData.getState() == ContainerLifeCycleState.OPEN ? "OPEN" : + "CLOSED"; + LOG.info("Container State: {}", openStatus); + LOG.info("Container Path: {}", containerData.getContainerPath()); + + // Output meta data. + String metadataStr = containerData.getMetadataList().stream().map( + p -> p.getKey() + ":" + p.getValue()) + .collect(Collectors.joining(", ")); + LOG.info("Container Metadata: {}", metadataStr); + + // Print pipeline of an existing container. + LOG.info("LeaderID: {}", container.getPipeline() + .getLeader().getHostName()); + String machinesStr = container.getPipeline() + .getMachines().stream().map( + DatanodeDetails::getHostName).collect(Collectors.joining(",")); + LOG.info("Datanodes: [{}]", machinesStr); + return null; + } + } +} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ListContainerHandler.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ListContainerHandler.java deleted file mode 100644 index 3483b3e69aa..00000000000 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ListContainerHandler.java +++ /dev/null @@ -1,117 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdds.scm.cli.container; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.hadoop.hdds.scm.cli.OzoneCommandHandler; -import org.apache.hadoop.hdds.scm.client.ScmClient; -import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; -import org.apache.hadoop.ozone.web.utils.JsonUtils; - -import java.io.IOException; -import java.util.List; - -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.CMD_WIDTH; -import static org.apache.hadoop.hdds.scm.cli.SCMCLI.HELP_OP; - -/** - * This is the handler that process container list command. - */ -public class ListContainerHandler extends OzoneCommandHandler { - - public static final String CONTAINER_LIST = "list"; - public static final String OPT_START_CONTAINER = "start"; - public static final String OPT_COUNT = "count"; - - /** - * Constructs a handler object. - * - * @param scmClient scm client - */ - public ListContainerHandler(ScmClient scmClient) { - super(scmClient); - } - - @Override - public void execute(CommandLine cmd) throws IOException { - if (!cmd.hasOption(CONTAINER_LIST)) { - throw new IOException("Expecting container list"); - } - if (cmd.hasOption(HELP_OP)) { - displayHelp(); - return; - } - - if (!cmd.hasOption(OPT_COUNT)) { - displayHelp(); - if (!cmd.hasOption(HELP_OP)) { - throw new IOException("Expecting container count"); - } else { - return; - } - } - - String startID = cmd.getOptionValue(OPT_START_CONTAINER); - int count = 0; - - if (cmd.hasOption(OPT_COUNT)) { - count = Integer.parseInt(cmd.getOptionValue(OPT_COUNT)); - if (count < 0) { - displayHelp(); - throw new IOException("-count should not be negative"); - } - } - - List containerList = - getScmClient().listContainer( - Long.parseLong(startID), count); - - // Output data list - for (ContainerInfo container : containerList) { - outputContainerInfo(container); - } - } - - private void outputContainerInfo(ContainerInfo containerInfo) - throws IOException { - // Print container report info. - logOut("%s", JsonUtils.toJsonStringWithDefaultPrettyPrinter( - containerInfo.toJsonString())); - } - - @Override - public void displayHelp() { - Options options = new Options(); - addOptions(options); - HelpFormatter helpFormatter = new HelpFormatter(); - helpFormatter.printHelp(CMD_WIDTH, "hdfs scm -container -list

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.cli.container; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.Callable; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.scm.cli.SCMCLI; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; +import org.apache.hadoop.ozone.web.utils.JsonUtils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine.Command; +import picocli.CommandLine.Help.Visibility; +import picocli.CommandLine.Option; +import picocli.CommandLine.ParentCommand; + +/** + * This is the handler that process container list command. + */ +@Command( + name = "list", + description = "List containers", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class ListSubcommand implements Callable { + + private static final Logger LOG = + LoggerFactory.getLogger(ListSubcommand.class); + + @ParentCommand + private SCMCLI parent; + + @Option(names = {"-s", "--start"}, + description = "Container id to start the iteration", required = true) + private long startId; + + @Option(names = {"-c", "--count"}, + description = "Maximum number of containers to list", + defaultValue = "20", showDefaultValue = Visibility.ALWAYS) + private int count = 20; + + private void outputContainerInfo(ContainerInfo containerInfo) + throws IOException { + // Print container report info. + LOG.info("{}", JsonUtils.toJsonStringWithDefaultPrettyPrinter( + containerInfo.toJsonString())); + } + + @Override + public Void call() throws Exception { + try (ScmClient scmClient = parent.createScmClient()) { + + List containerList = + scmClient.listContainer(startId, count); + + // Output data list + for (ContainerInfo container : containerList) { + outputContainerInfo(container); + } + return null; + } + } +} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/package-info.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/package-info.java index 0630df2d15d..ff8adbc56f1 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/package-info.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/container/package-info.java @@ -16,4 +16,7 @@ * limitations under the License. */ +/** + * Contains all of the container related scm commands. + */ package org.apache.hadoop.hdds.scm.cli.container; \ No newline at end of file diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/package-info.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/package-info.java index 4762d550fbe..d358b3cf6a5 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/package-info.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/package-info.java @@ -6,14 +6,18 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + *

+ * SCM related cli tools. + */ +/** + * SCM related cli tools. */ - package org.apache.hadoop.hdds.scm.cli; \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/AddBlockFlag.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/AddBlockFlag.java index 6a0805bb71b..b0686d7c4b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/AddBlockFlag.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/AddBlockFlag.java @@ -36,7 +36,16 @@ * * @see CreateFlag#NO_LOCAL_WRITE */ - NO_LOCAL_WRITE((short) 0x01); + NO_LOCAL_WRITE((short) 0x01), + + /** + * Advise that the first block replica NOT take into account DataNode + * locality. The first block replica should be placed randomly within the + * cluster. Subsequent block replicas should follow DataNode locality rules. + * + * @see CreateFlag#IGNORE_CLIENT_LOCALITY + */ + IGNORE_CLIENT_LOCALITY((short) 0x02); private final short mode; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 85d65124fa0..f4d11b9fd6f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -131,6 +131,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException; +import org.apache.hadoop.hdfs.protocol.NoECPolicySetException; import org.apache.hadoop.hdfs.protocol.OpenFileEntry; import org.apache.hadoop.hdfs.protocol.OpenFilesIterator; import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType; @@ -953,8 +954,12 @@ public HdfsDataOutputStream createWrappedOutputStream(DFSOutputStream dfsos, final CryptoCodec codec = HdfsKMSUtil.getCryptoCodec(conf, feInfo); KeyVersion decrypted; try (TraceScope ignored = tracer.newScope("decryptEDEK")) { + LOG.debug("Start decrypting EDEK for file: {}, output stream: 0x{}", + dfsos.getSrc(), Integer.toHexString(dfsos.hashCode())); decrypted = HdfsKMSUtil.decryptEncryptedDataEncryptionKey(feInfo, getKeyProvider()); + LOG.debug("Decrypted EDEK for file: {}, output stream: 0x{}", + dfsos.getSrc(), Integer.toHexString(dfsos.hashCode())); } final CryptoOutputStream cryptoOut = new CryptoOutputStream(dfsos, codec, @@ -1868,7 +1873,7 @@ protected Type inferChecksumTypeByReading(LocatedBlock lb, DatanodeInfo dn) return PBHelperClient.convert( reply.getReadOpChecksumInfo().getChecksum().getType()); } finally { - IOUtilsClient.cleanup(null, pair.in, pair.out); + IOUtilsClient.cleanupWithLogger(LOG, pair.in, pair.out); } } @@ -2746,7 +2751,7 @@ public void unsetErasureCodingPolicy(String src) throws IOException { throw re.unwrapRemoteException(AccessControlException.class, SafeModeException.class, UnresolvedPathException.class, - FileNotFoundException.class); + FileNotFoundException.class, NoECPolicySetException.class); } } @@ -2929,7 +2934,7 @@ public Peer newConnectedPeer(InetSocketAddress addr, return peer; } finally { if (!success) { - IOUtilsClient.cleanup(LOG, peer); + IOUtilsClient.cleanupWithLogger(LOG, peer); IOUtils.closeSocket(sock); } } @@ -3079,7 +3084,6 @@ TraceScope newSrcDstTraceScope(String description, String src, String dst) { * has REPLICATION policy. * @throws IOException */ - public ErasureCodingPolicy getErasureCodingPolicy(String src) throws IOException { checkOpen(); @@ -3092,6 +3096,24 @@ public ErasureCodingPolicy getErasureCodingPolicy(String src) } } + /** + * Satisfy storage policy for an existing file/directory. + * @param src file/directory name + * @throws IOException + */ + public void satisfyStoragePolicy(String src) throws IOException { + checkOpen(); + try (TraceScope ignored = + newPathTraceScope("satisfyStoragePolicy", src)) { + namenode.satisfyStoragePolicy(src); + } catch (RemoteException re) { + throw re.unwrapRemoteException(AccessControlException.class, + FileNotFoundException.class, + SafeModeException.class, + UnresolvedPathException.class); + } + } + Tracer getTracer() { return tracer; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index 9734752052a..e9770548a7b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -201,6 +201,9 @@ private DFSOutputStream(DFSClient dfsClient, String src, if (flag.contains(CreateFlag.NO_LOCAL_WRITE)) { this.addBlockFlags.add(AddBlockFlag.NO_LOCAL_WRITE); } + if (flag.contains(CreateFlag.IGNORE_CLIENT_LOCALITY)) { + this.addBlockFlags.add(AddBlockFlag.IGNORE_CLIENT_LOCALITY); + } if (progress != null) { DFSClient.LOG.debug("Set non-null progress callback on DFSOutputStream " +"{}", src); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java index 3fac7c8c100..8f785c79155 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java @@ -649,7 +649,7 @@ public static Peer peerFromSocketAndKey( return peer; } finally { if (!success) { - IOUtilsClient.cleanup(null, peer); + IOUtilsClient.cleanupWithLogger(LOG, peer); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index 82cdd8c5ddc..28c1e2735cf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -1217,7 +1217,11 @@ private DirListingIterator(Path p, PathFilter filter, thisListing = dfs.listPaths(src, HdfsFileStatus.EMPTY_NAME, needLocation); statistics.incrementReadOps(1); - storageStatistics.incrementOpCounter(OpType.LIST_LOCATED_STATUS); + if (needLocation) { + storageStatistics.incrementOpCounter(OpType.LIST_LOCATED_STATUS); + } else { + storageStatistics.incrementOpCounter(OpType.LIST_STATUS); + } if (thisListing == null) { // the directory does not exist throw new FileNotFoundException("File " + p + " does not exist."); } @@ -2862,6 +2866,40 @@ public Void next(final FileSystem fs, final Path p) throws IOException { }.resolve(this, absF); } + /** + * Set the source path to satisfy storage policy. This API is non-recursive + * in nature, i.e., if the source path is a directory then all the files + * immediately under the directory would be considered for satisfying the + * policy and the sub-directories if any under this path will be skipped. + * + * @param path The source path referring to either a directory or a file. + * @throws IOException + */ + public void satisfyStoragePolicy(final Path path) throws IOException { + Path absF = fixRelativePart(path); + new FileSystemLinkResolver() { + + @Override + public Void doCall(Path p) throws IOException { + dfs.satisfyStoragePolicy(getPathName(p)); + return null; + } + + @Override + public Void next(FileSystem fs, Path p) throws IOException { + // DFS only + if (fs instanceof DistributedFileSystem) { + DistributedFileSystem myDfs = (DistributedFileSystem) fs; + myDfs.satisfyStoragePolicy(p); + return null; + } + throw new UnsupportedOperationException( + "Cannot satisfyStoragePolicy through a symlink to a " + + "non-DistributedFileSystem: " + path + " -> " + p); + } + }.resolve(this, absF); + } + /** * Get erasure coding policy information for the specified path * @@ -3205,6 +3243,17 @@ public HdfsDataOutputStreamBuilder replicate() { return this; } + /** + * Advise that the first block replica be written without regard to the + * client locality. + * + * @see CreateFlag for the details. + */ + public HdfsDataOutputStreamBuilder ignoreClientLocality() { + getFlags().add(CreateFlag.IGNORE_CLIENT_LOCALITY); + return this; + } + @VisibleForTesting @Override protected EnumSet getFlags() { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PeerCache.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PeerCache.java index 1ddb42ead42..0580ed536da 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PeerCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/PeerCache.java @@ -188,7 +188,7 @@ public void put(DatanodeID dnId, Peer peer) { if (peer.isClosed()) return; if (capacity <= 0) { // Cache disabled. - IOUtilsClient.cleanup(LOG, peer); + IOUtilsClient.cleanupWithLogger(LOG, peer); return; } putInternal(dnId, peer); @@ -221,7 +221,7 @@ private synchronized void evictExpired(long expiryPeriod) { Time.monotonicNow() - entry.getValue().getTime() < expiryPeriod) { break; } - IOUtilsClient.cleanup(LOG, entry.getValue().getPeer()); + IOUtilsClient.cleanupWithLogger(LOG, entry.getValue().getPeer()); iter.remove(); } } @@ -239,7 +239,7 @@ private synchronized void evictOldest() { "capacity: " + capacity); } Entry entry = iter.next(); - IOUtilsClient.cleanup(LOG, entry.getValue().getPeer()); + IOUtilsClient.cleanupWithLogger(LOG, entry.getValue().getPeer()); iter.remove(); } @@ -267,7 +267,7 @@ private void run() throws InterruptedException { @VisibleForTesting synchronized void clear() { for (Value value : multimap.values()) { - IOUtilsClient.cleanup(LOG, value.getPeer()); + IOUtilsClient.cleanupWithLogger(LOG, value.getPeer()); } multimap.clear(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java index 88044b9714d..f204f3c6232 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java @@ -532,6 +532,16 @@ public ErasureCodingPolicy getErasureCodingPolicy(final Path path) return dfs.getErasureCodingPolicy(path); } + /** + * Set the source path to the specified storage policy. + * + * @param path The source path referring to either a directory or a file. + * @throws IOException + */ + public void satisfyStoragePolicy(final Path path) throws IOException { + dfs.satisfyStoragePolicy(path); + } + /** * Get the Erasure coding policies supported. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java index ce4318531a3..8e592f4a54e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderFactory.java @@ -549,14 +549,14 @@ public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() { // Handle an I/O error we got when using a cached socket. // These are considered less serious, because the socket may be stale. LOG.debug("{}: closing stale domain peer {}", this, peer, e); - IOUtilsClient.cleanup(LOG, peer); + IOUtilsClient.cleanupWithLogger(LOG, peer); } else { // Handle an I/O error we got when using a newly created socket. // We temporarily disable the domain socket path for a few minutes in // this case, to prevent wasting more time on it. LOG.warn(this + ": I/O error requesting file descriptors. " + "Disabling domain socket " + peer.getDomainSocket(), e); - IOUtilsClient.cleanup(LOG, peer); + IOUtilsClient.cleanupWithLogger(LOG, peer); clientContext.getDomainSocketFactory() .disableDomainSocketPath(pathInfo.getPath()); return null; @@ -620,7 +620,7 @@ private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer, return null; } finally { if (replica == null) { - IOUtilsClient.cleanup(DFSClient.LOG, fis[0], fis[1]); + IOUtilsClient.cleanupWithLogger(DFSClient.LOG, fis[0], fis[1]); } } case ERROR_UNSUPPORTED: @@ -692,7 +692,7 @@ private BlockReader getRemoteBlockReaderFromDomain() throws IOException { blockReader = getRemoteBlockReader(peer); return blockReader; } catch (IOException ioe) { - IOUtilsClient.cleanup(LOG, peer); + IOUtilsClient.cleanupWithLogger(LOG, peer); if (isSecurityException(ioe)) { LOG.trace("{}: got security exception while constructing a remote " + " block reader from the unix domain socket at {}", @@ -715,7 +715,7 @@ private BlockReader getRemoteBlockReaderFromDomain() throws IOException { } } finally { if (blockReader == null) { - IOUtilsClient.cleanup(LOG, peer); + IOUtilsClient.cleanupWithLogger(LOG, peer); } } } @@ -766,7 +766,7 @@ private BlockReader getRemoteBlockReaderFromTcp() throws IOException { } } finally { if (blockReader == null) { - IOUtilsClient.cleanup(LOG, peer); + IOUtilsClient.cleanupWithLogger(LOG, peer); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocalLegacy.java index e1e38c6e943..e48ace6c227 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocalLegacy.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocalLegacy.java @@ -686,7 +686,7 @@ public synchronized long skip(long n) throws IOException { @Override public synchronized void close() throws IOException { - IOUtilsClient.cleanup(LOG, dataIn, checksumIn); + IOUtilsClient.cleanupWithLogger(LOG, dataIn, checksumIn); if (slowReadBuff != null) { bufferPool.returnBuffer(slowReadBuff); slowReadBuff = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index 7729e1042ac..e8c881b624c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -1742,4 +1742,18 @@ void setErasureCodingPolicy(String src, String ecPolicyName) @Idempotent BatchedEntries listOpenFiles(long prevId, EnumSet openFilesTypes, String path) throws IOException; + + /** + * Satisfy the storage policy for a file/directory. + * @param path Path of an existing file/directory. + * @throws AccessControlException If access is denied. + * @throws org.apache.hadoop.fs.UnresolvedLinkException if src + * contains a symlink. + * @throws java.io.FileNotFoundException If file/dir src is not + * found. + * @throws org.apache.hadoop.hdfs.server.namenode.SafeModeException append not + * allowed in safemode. + */ + @AtMostOnce + void satisfyStoragePolicy(String path) throws IOException; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java index 9a8ad8cdb13..3dde6043468 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java @@ -34,15 +34,26 @@ private final long missingBlockGroups; private final long bytesInFutureBlockGroups; private final long pendingDeletionBlocks; + private final Long highestPriorityLowRedundancyBlocks; public ECBlockGroupStats(long lowRedundancyBlockGroups, long corruptBlockGroups, long missingBlockGroups, long bytesInFutureBlockGroups, long pendingDeletionBlocks) { + this(lowRedundancyBlockGroups, corruptBlockGroups, missingBlockGroups, + bytesInFutureBlockGroups, pendingDeletionBlocks, null); + } + + public ECBlockGroupStats(long lowRedundancyBlockGroups, + long corruptBlockGroups, long missingBlockGroups, + long bytesInFutureBlockGroups, long pendingDeletionBlocks, + Long highestPriorityLowRedundancyBlocks) { this.lowRedundancyBlockGroups = lowRedundancyBlockGroups; this.corruptBlockGroups = corruptBlockGroups; this.missingBlockGroups = missingBlockGroups; this.bytesInFutureBlockGroups = bytesInFutureBlockGroups; this.pendingDeletionBlocks = pendingDeletionBlocks; + this.highestPriorityLowRedundancyBlocks + = highestPriorityLowRedundancyBlocks; } public long getBytesInFutureBlockGroups() { @@ -65,6 +76,14 @@ public long getPendingDeletionBlocks() { return pendingDeletionBlocks; } + public boolean hasHighestPriorityLowRedundancyBlocks() { + return getHighestPriorityLowRedundancyBlocks() != null; + } + + public Long getHighestPriorityLowRedundancyBlocks() { + return highestPriorityLowRedundancyBlocks; + } + @Override public String toString() { StringBuilder statsBuilder = new StringBuilder(); @@ -76,8 +95,12 @@ public String toString() { .append(", BytesInFutureBlockGroups=").append( getBytesInFutureBlockGroups()) .append(", PendingDeletionBlocks=").append( - getPendingDeletionBlocks()) - .append("]"); + getPendingDeletionBlocks()); + if (hasHighestPriorityLowRedundancyBlocks()) { + statsBuilder.append(", HighestPriorityLowRedundancyBlocks=") + .append(getHighestPriorityLowRedundancyBlocks()); + } + statsBuilder.append("]"); return statsBuilder.toString(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java index 74efcd2b55e..6de186a1bc7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java @@ -128,6 +128,38 @@ SAFEMODE_LEAVE, SAFEMODE_ENTER, SAFEMODE_GET, SAFEMODE_FORCE_EXIT } + /** + * Storage policy satisfier service modes. + */ + public enum StoragePolicySatisfierMode { + + /** + * This mode represents that SPS service is running outside Namenode as an + * external service and can accept any SPS call request. + */ + EXTERNAL, + + /** + * This mode represents that SPS service is disabled and cannot accept any + * SPS call request. + */ + NONE; + + private static final Map MAP = + new HashMap<>(); + + static { + for (StoragePolicySatisfierMode a : values()) { + MAP.put(a.name(), a); + } + } + + /** Convert the given String to a StoragePolicySatisfierMode. */ + public static StoragePolicySatisfierMode fromString(String s) { + return MAP.get(StringUtils.toUpperCase(s)); + } + } + public enum RollingUpgradeAction { QUERY, PREPARE, FINALIZE; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/NoECPolicySetException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/NoECPolicySetException.java new file mode 100644 index 00000000000..de3054a4d63 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/NoECPolicySetException.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.protocol; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + *Thrown when no EC policy is set explicitly on the directory. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class NoECPolicySetException extends IOException { + private static final long serialVersionUID = 1L; + + public NoECPolicySetException(String msg) { + super(msg); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java index 49aadedcdec..c2100034bcd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java @@ -35,17 +35,29 @@ private final long missingReplicationOneBlocks; private final long bytesInFutureBlocks; private final long pendingDeletionBlocks; + private final Long highestPriorityLowRedundancyBlocks; public ReplicatedBlockStats(long lowRedundancyBlocks, long corruptBlocks, long missingBlocks, long missingReplicationOneBlocks, long bytesInFutureBlocks, long pendingDeletionBlocks) { + this(lowRedundancyBlocks, corruptBlocks, missingBlocks, + missingReplicationOneBlocks, bytesInFutureBlocks, pendingDeletionBlocks, + null); + } + + public ReplicatedBlockStats(long lowRedundancyBlocks, + long corruptBlocks, long missingBlocks, + long missingReplicationOneBlocks, long bytesInFutureBlocks, + long pendingDeletionBlocks, Long highestPriorityLowRedundancyBlocks) { this.lowRedundancyBlocks = lowRedundancyBlocks; this.corruptBlocks = corruptBlocks; this.missingBlocks = missingBlocks; this.missingReplicationOneBlocks = missingReplicationOneBlocks; this.bytesInFutureBlocks = bytesInFutureBlocks; this.pendingDeletionBlocks = pendingDeletionBlocks; + this.highestPriorityLowRedundancyBlocks + = highestPriorityLowRedundancyBlocks; } public long getLowRedundancyBlocks() { @@ -72,6 +84,14 @@ public long getPendingDeletionBlocks() { return pendingDeletionBlocks; } + public boolean hasHighestPriorityLowRedundancyBlocks() { + return getHighestPriorityLowRedundancyBlocks() != null; + } + + public Long getHighestPriorityLowRedundancyBlocks(){ + return highestPriorityLowRedundancyBlocks; + } + @Override public String toString() { StringBuilder statsBuilder = new StringBuilder(); @@ -83,8 +103,12 @@ public String toString() { getMissingReplicationOneBlocks()) .append(", BytesInFutureBlocks=").append(getBytesInFutureBlocks()) .append(", PendingDeletionBlocks=").append( - getPendingDeletionBlocks()) - .append("]"); + getPendingDeletionBlocks()); + if (hasHighestPriorityLowRedundancyBlocks()) { + statsBuilder.append(", HighestPriorityLowRedundancyBlocks=").append( + getHighestPriorityLowRedundancyBlocks()); + } + statsBuilder.append("]"); return statsBuilder.toString(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java index e7ae6fd6424..e4bca51259b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java @@ -188,6 +188,7 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdatePipelineRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpgradeStatusRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpgradeStatusResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SatisfyStoragePolicyRequestProto; import org.apache.hadoop.hdfs.protocol.proto.*; import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos.CreateEncryptionZoneRequestProto; import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos.EncryptionZoneProto; @@ -238,6 +239,7 @@ import com.google.protobuf.ByteString; import com.google.protobuf.Message; import com.google.protobuf.ServiceException; + import org.apache.hadoop.util.concurrent.AsyncGet; /** @@ -295,6 +297,7 @@ VOID_GET_EC_CODEC_REQUEST = GetErasureCodingCodecsRequestProto .newBuilder().build(); + public ClientNamenodeProtocolTranslatorPB(ClientNamenodeProtocolPB proxy) { rpcProxy = proxy; } @@ -1944,4 +1947,14 @@ public QuotaUsage getQuotaUsage(String path) throws IOException { } } + @Override + public void satisfyStoragePolicy(String src) throws IOException { + SatisfyStoragePolicyRequestProto req = + SatisfyStoragePolicyRequestProto.newBuilder().setSrc(src).build(); + try { + rpcProxy.satisfyStoragePolicy(null, req); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java index 490ccb453b2..4a5a493bf50 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java @@ -1990,6 +1990,13 @@ public static SafeModeActionProto convert(SafeModeAction a) { public static ReplicatedBlockStats convert( GetFsReplicatedBlockStatsResponseProto res) { + if (res.hasHighestPrioLowRedundancyBlocks()) { + return new ReplicatedBlockStats(res.getLowRedundancy(), + res.getCorruptBlocks(), res.getMissingBlocks(), + res.getMissingReplOneBlocks(), res.getBlocksInFuture(), + res.getPendingDeletionBlocks(), + res.getHighestPrioLowRedundancyBlocks()); + } return new ReplicatedBlockStats(res.getLowRedundancy(), res.getCorruptBlocks(), res.getMissingBlocks(), res.getMissingReplOneBlocks(), res.getBlocksInFuture(), @@ -1998,6 +2005,12 @@ public static ReplicatedBlockStats convert( public static ECBlockGroupStats convert( GetFsECBlockGroupStatsResponseProto res) { + if (res.hasHighestPrioLowRedundancyBlocks()) { + return new ECBlockGroupStats(res.getLowRedundancy(), + res.getCorruptBlocks(), res.getMissingBlocks(), + res.getBlocksInFuture(), res.getPendingDeletionBlocks(), + res.getHighestPrioLowRedundancyBlocks()); + } return new ECBlockGroupStats(res.getLowRedundancy(), res.getCorruptBlocks(), res.getMissingBlocks(), res.getBlocksInFuture(), res.getPendingDeletionBlocks()); @@ -2432,6 +2445,10 @@ public static GetFsReplicatedBlockStatsResponseProto convert( replicatedBlockStats.getBytesInFutureBlocks()); result.setPendingDeletionBlocks( replicatedBlockStats.getPendingDeletionBlocks()); + if (replicatedBlockStats.hasHighestPriorityLowRedundancyBlocks()) { + result.setHighestPrioLowRedundancyBlocks( + replicatedBlockStats.getHighestPriorityLowRedundancyBlocks()); + } return result.build(); } @@ -2447,6 +2464,10 @@ public static GetFsECBlockGroupStatsResponseProto convert( ecBlockGroupStats.getBytesInFutureBlockGroups()); result.setPendingDeletionBlocks( ecBlockGroupStats.getPendingDeletionBlocks()); + if (ecBlockGroupStats.hasHighestPriorityLowRedundancyBlocks()) { + result.setHighestPrioLowRedundancyBlocks( + ecBlockGroupStats.getHighestPriorityLowRedundancyBlocks()); + } return result.build(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java index e0fdb3242d8..252b70dde44 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java @@ -18,14 +18,68 @@ package org.apache.hadoop.hdfs.server.namenode.ha; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.hdfs.DFSUtilClient; +import org.apache.hadoop.hdfs.HAUtilClient; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.io.retry.FailoverProxyProvider; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public abstract class AbstractNNFailoverProxyProvider implements FailoverProxyProvider { + protected static final Logger LOG = + LoggerFactory.getLogger(AbstractNNFailoverProxyProvider.class); - private AtomicBoolean fallbackToSimpleAuth; + protected Configuration conf; + protected Class xface; + protected HAProxyFactory factory; + protected UserGroupInformation ugi; + protected AtomicBoolean fallbackToSimpleAuth; + + protected AbstractNNFailoverProxyProvider() { + } + + protected AbstractNNFailoverProxyProvider(Configuration conf, URI uri, + Class xface, HAProxyFactory factory) { + this.conf = new Configuration(conf); + this.xface = xface; + this.factory = factory; + try { + this.ugi = UserGroupInformation.getCurrentUser(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + int maxRetries = this.conf.getInt( + HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_KEY, + HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_DEFAULT); + this.conf.setInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, + maxRetries); + + int maxRetriesOnSocketTimeouts = this.conf.getInt( + HdfsClientConfigKeys + .Failover.CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + HdfsClientConfigKeys + .Failover.CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT); + this.conf.setInt( + CommonConfigurationKeysPublic + .IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + maxRetriesOnSocketTimeouts); + } /** * Inquire whether logical HA URI is used for the implementation. If it is @@ -51,4 +105,100 @@ public synchronized void setFallbackToSimpleAuth( public synchronized AtomicBoolean getFallbackToSimpleAuth() { return fallbackToSimpleAuth; } + + /** + * ProxyInfo to a NameNode. Includes its address. + */ + public static class NNProxyInfo extends ProxyInfo { + private InetSocketAddress address; + + public NNProxyInfo(InetSocketAddress address) { + super(null, address.toString()); + this.address = address; + } + + public InetSocketAddress getAddress() { + return address; + } + } + + @Override + public Class getInterface() { + return xface; + } + + /** + * Create a proxy if it has not been created yet. + */ + protected NNProxyInfo createProxyIfNeeded(NNProxyInfo pi) { + if (pi.proxy == null) { + assert pi.getAddress() != null : "Proxy address is null"; + try { + pi.proxy = factory.createProxy(conf, + pi.getAddress(), xface, ugi, false, getFallbackToSimpleAuth()); + } catch (IOException ioe) { + LOG.error("{} Failed to create RPC proxy to NameNode", + this.getClass().getSimpleName(), ioe); + throw new RuntimeException(ioe); + } + } + return pi; + } + + /** + * Get list of configured NameNode proxy addresses. + * Randomize the list if requested. + */ + protected List> getProxyAddresses(URI uri, String addressKey) { + final List> proxies = new ArrayList>(); + Map> map = + DFSUtilClient.getAddresses(conf, null, addressKey); + Map addressesInNN = map.get(uri.getHost()); + + if (addressesInNN == null || addressesInNN.size() == 0) { + throw new RuntimeException("Could not find any configured addresses " + + "for URI " + uri); + } + + Collection addressesOfNns = addressesInNN.values(); + for (InetSocketAddress address : addressesOfNns) { + proxies.add(new NNProxyInfo(address)); + } + // Randomize the list to prevent all clients pointing to the same one + boolean randomized = getRandomOrder(conf, uri); + if (randomized) { + Collections.shuffle(proxies); + } + + // The client may have a delegation token set for the logical + // URI of the cluster. Clone this token to apply to each of the + // underlying IPC addresses so that the IPC code can find it. + HAUtilClient.cloneDelegationTokenForLogicalUri(ugi, uri, addressesOfNns); + return proxies; + } + + /** + * Check whether random order is configured for failover proxy provider + * for the namenode/nameservice. + * + * @param conf Configuration + * @param nameNodeUri The URI of namenode/nameservice + * @return random order configuration + */ + public static boolean getRandomOrder( + Configuration conf, URI nameNodeUri) { + String host = nameNodeUri.getHost(); + String configKeyWithHost = HdfsClientConfigKeys.Failover.RANDOM_ORDER + + "." + host; + + if (conf.get(configKeyWithHost) != null) { + return conf.getBoolean( + configKeyWithHost, + HdfsClientConfigKeys.Failover.RANDOM_ORDER_DEFAULT); + } + + return conf.getBoolean( + HdfsClientConfigKeys.Failover.RANDOM_ORDER, + HdfsClientConfigKeys.Failover.RANDOM_ORDER_DEFAULT); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java index f46532ad972..92e75cee364 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java @@ -19,23 +19,11 @@ import java.io.Closeable; import java.io.IOException; -import java.net.InetSocketAddress; import java.net.URI; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; import java.util.List; -import java.util.Map; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeysPublic; -import org.apache.hadoop.hdfs.DFSUtilClient; -import org.apache.hadoop.hdfs.HAUtilClient; -import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.security.UserGroupInformation; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY; @@ -48,17 +36,9 @@ public class ConfiguredFailoverProxyProvider extends AbstractNNFailoverProxyProvider { - private static final Logger LOG = - LoggerFactory.getLogger(ConfiguredFailoverProxyProvider.class); - - protected final Configuration conf; - protected final List> proxies = - new ArrayList>(); - protected final UserGroupInformation ugi; - protected final Class xface; + protected final List> proxies; private int currentProxyIndex = 0; - protected final HAProxyFactory factory; public ConfiguredFailoverProxyProvider(Configuration conf, URI uri, Class xface, HAProxyFactory factory) { @@ -67,83 +47,8 @@ public ConfiguredFailoverProxyProvider(Configuration conf, URI uri, public ConfiguredFailoverProxyProvider(Configuration conf, URI uri, Class xface, HAProxyFactory factory, String addressKey) { - this.xface = xface; - this.conf = new Configuration(conf); - int maxRetries = this.conf.getInt( - HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_KEY, - HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_DEFAULT); - this.conf.setInt( - CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, - maxRetries); - - int maxRetriesOnSocketTimeouts = this.conf.getInt( - HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY, - HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT); - this.conf.setInt( - CommonConfigurationKeysPublic - .IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, - maxRetriesOnSocketTimeouts); - - try { - ugi = UserGroupInformation.getCurrentUser(); - - Map> map = - DFSUtilClient.getAddresses(conf, null, addressKey); - Map addressesInNN = map.get(uri.getHost()); - - if (addressesInNN == null || addressesInNN.size() == 0) { - throw new RuntimeException("Could not find any configured addresses " + - "for URI " + uri); - } - - Collection addressesOfNns = addressesInNN.values(); - for (InetSocketAddress address : addressesOfNns) { - proxies.add(new AddressRpcProxyPair(address)); - } - // Randomize the list to prevent all clients pointing to the same one - boolean randomized = getRandomOrder(conf, uri); - if (randomized) { - Collections.shuffle(proxies); - } - - // The client may have a delegation token set for the logical - // URI of the cluster. Clone this token to apply to each of the - // underlying IPC addresses so that the IPC code can find it. - HAUtilClient.cloneDelegationTokenForLogicalUri(ugi, uri, addressesOfNns); - this.factory = factory; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** - * Check whether random order is configured for failover proxy provider - * for the namenode/nameservice. - * - * @param conf Configuration - * @param nameNodeUri The URI of namenode/nameservice - * @return random order configuration - */ - private static boolean getRandomOrder( - Configuration conf, URI nameNodeUri) { - String host = nameNodeUri.getHost(); - String configKeyWithHost = HdfsClientConfigKeys.Failover.RANDOM_ORDER - + "." + host; - - if (conf.get(configKeyWithHost) != null) { - return conf.getBoolean( - configKeyWithHost, - HdfsClientConfigKeys.Failover.RANDOM_ORDER_DEFAULT); - } - - return conf.getBoolean( - HdfsClientConfigKeys.Failover.RANDOM_ORDER, - HdfsClientConfigKeys.Failover.RANDOM_ORDER_DEFAULT); - } - - @Override - public Class getInterface() { - return xface; + super(conf, uri, xface, factory); + this.proxies = getProxyAddresses(uri, addressKey); } /** @@ -151,21 +56,8 @@ private static boolean getRandomOrder( */ @Override public synchronized ProxyInfo getProxy() { - AddressRpcProxyPair current = proxies.get(currentProxyIndex); - return getProxy(current); - } - - protected ProxyInfo getProxy(AddressRpcProxyPair current) { - if (current.namenode == null) { - try { - current.namenode = factory.createProxy(conf, - current.address, xface, ugi, false, getFallbackToSimpleAuth()); - } catch (IOException e) { - LOG.error("Failed to create RPC proxy to NameNode", e); - throw new RuntimeException(e); - } - } - return new ProxyInfo(current.namenode, current.address.toString()); + NNProxyInfo current = proxies.get(currentProxyIndex); + return createProxyIfNeeded(current); } @Override @@ -177,31 +69,18 @@ synchronized void incrementProxyIndex() { currentProxyIndex = (currentProxyIndex + 1) % proxies.size(); } - /** - * A little pair object to store the address and connected RPC proxy object to - * an NN. Note that {@link AddressRpcProxyPair#namenode} may be null. - */ - protected static class AddressRpcProxyPair { - public final InetSocketAddress address; - public T namenode; - - public AddressRpcProxyPair(InetSocketAddress address) { - this.address = address; - } - } - /** * Close all the proxy objects which have been opened over the lifetime of * this proxy provider. */ @Override public synchronized void close() throws IOException { - for (AddressRpcProxyPair proxy : proxies) { - if (proxy.namenode != null) { - if (proxy.namenode instanceof Closeable) { - ((Closeable)proxy.namenode).close(); + for (ProxyInfo proxy : proxies) { + if (proxy.proxy != null) { + if (proxy.proxy instanceof Closeable) { + ((Closeable)proxy.proxy).close(); } else { - RPC.stopProxy(proxy.namenode); + RPC.stopProxy(proxy.proxy); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/IPFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/IPFailoverProxyProvider.java index ed250a0f42e..e70374047a3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/IPFailoverProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/IPFailoverProxyProvider.java @@ -19,15 +19,11 @@ import java.io.Closeable; import java.io.IOException; -import java.net.InetSocketAddress; import java.net.URI; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.hdfs.DFSUtilClient; -import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.security.UserGroupInformation; /** * A NNFailoverProxyProvider implementation which works on IP failover setup. @@ -47,53 +43,18 @@ */ public class IPFailoverProxyProvider extends AbstractNNFailoverProxyProvider { - private final Configuration conf; - private final Class xface; - private final URI nameNodeUri; - private final HAProxyFactory factory; - private ProxyInfo nnProxyInfo = null; + private final NNProxyInfo nnProxyInfo; public IPFailoverProxyProvider(Configuration conf, URI uri, Class xface, HAProxyFactory factory) { - this.xface = xface; - this.nameNodeUri = uri; - this.factory = factory; - - this.conf = new Configuration(conf); - int maxRetries = this.conf.getInt( - HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_KEY, - HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_DEFAULT); - this.conf.setInt( - CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, - maxRetries); - - int maxRetriesOnSocketTimeouts = this.conf.getInt( - HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY, - HdfsClientConfigKeys.Failover.CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT); - this.conf.setInt( - CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, - maxRetriesOnSocketTimeouts); - } - - @Override - public Class getInterface() { - return xface; + super(conf, uri, xface, factory); + this.nnProxyInfo = new NNProxyInfo(DFSUtilClient.getNNAddress(uri)); } @Override - public synchronized ProxyInfo getProxy() { + public synchronized NNProxyInfo getProxy() { // Create a non-ha proxy if not already created. - if (nnProxyInfo == null) { - try { - // Create a proxy that is not wrapped in RetryProxy - InetSocketAddress nnAddr = DFSUtilClient.getNNAddress(nameNodeUri); - nnProxyInfo = new ProxyInfo(factory.createProxy(conf, nnAddr, xface, - UserGroupInformation.getCurrentUser(), false), nnAddr.toString()); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - } - return nnProxyInfo; + return createProxyIfNeeded(nnProxyInfo); } /** Nothing to do for IP failover */ @@ -106,7 +67,7 @@ public void performFailover(T currentProxy) { */ @Override public synchronized void close() throws IOException { - if (nnProxyInfo == null) { + if (nnProxyInfo.proxy == null) { return; } if (nnProxyInfo.proxy instanceof Closeable) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java index 9c2d2e0ecb1..aa982d0dc34 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java @@ -880,7 +880,7 @@ public void close() { maxNonMmappedEvictableLifespanMs = 0; maxEvictableMmapedSize = 0; // Close and join cacheCleaner thread. - IOUtilsClient.cleanup(LOG, cacheCleaner); + IOUtilsClient.cleanupWithLogger(LOG, cacheCleaner); // Purge all replicas. while (true) { Object eldestKey; @@ -931,7 +931,7 @@ public void close() { LOG.error("Interrupted while waiting for CleanerThreadPool " + "to terminate", e); } - IOUtilsClient.cleanup(LOG, shmManager); + IOUtilsClient.cleanupWithLogger(LOG, shmManager); } @VisibleForTesting // ONLY for testing diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitReplica.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitReplica.java index fd5dbfc7287..14116e2fdc1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitReplica.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitReplica.java @@ -244,7 +244,7 @@ void close() { suffix += " munmapped."; } } - IOUtilsClient.cleanup(LOG, dataStream, metaStream); + IOUtilsClient.cleanupWithLogger(LOG, dataStream, metaStream); if (slot != null) { cache.scheduleSlotReleaser(slot); if (LOG.isTraceEnabled()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/IOUtilsClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/IOUtilsClient.java index 71596f3835d..85e9cee748c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/IOUtilsClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/IOUtilsClient.java @@ -31,7 +31,8 @@ * @param log the log to record problems to at debug level. Can be null. * @param closeables the objects to close */ - public static void cleanup(Logger log, java.io.Closeable... closeables) { + public static void cleanupWithLogger(Logger log, + java.io.Closeable... closeables) { for (java.io.Closeable c : closeables) { if (c != null) { try { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java index 9bb1846fc64..a685573f8f7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java @@ -133,6 +133,7 @@ static HdfsFileStatus toFileStatus(final Map json, Boolean aclBit = (Boolean) m.get("aclBit"); Boolean encBit = (Boolean) m.get("encBit"); Boolean erasureBit = (Boolean) m.get("ecBit"); + Boolean snapshotEnabledBit = (Boolean) m.get("snapshotEnabled"); EnumSet f = EnumSet.noneOf(HdfsFileStatus.Flags.class); if (aclBit != null && aclBit) { @@ -144,6 +145,9 @@ static HdfsFileStatus toFileStatus(final Map json, if (erasureBit != null && erasureBit) { f.add(HdfsFileStatus.Flags.HAS_EC); } + if (snapshotEnabledBit != null && snapshotEnabledBit) { + f.add(HdfsFileStatus.Flags.SNAPSHOT_ENABLED); + } Map ecPolicyObj = (Map) m.get("ecPolicyObj"); ErasureCodingPolicy ecPolicy = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto index 0f5ce9449c8..49ea3f3687c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto @@ -167,6 +167,7 @@ message AbandonBlockResponseProto { // void response enum AddBlockFlagProto { NO_LOCAL_WRITE = 1; // avoid writing to local node. + IGNORE_CLIENT_LOCALITY = 2; // write to a random node } message AddBlockRequestProto { @@ -347,6 +348,8 @@ message GetFsReplicatedBlockStatsResponseProto { required uint64 missing_repl_one_blocks = 4; required uint64 blocks_in_future = 5; required uint64 pending_deletion_blocks = 6; + optional uint64 highest_prio_low_redundancy_blocks = 7; + } message GetFsECBlockGroupStatsRequestProto { // no input paramters @@ -358,6 +361,7 @@ message GetFsECBlockGroupStatsResponseProto { required uint64 missing_blocks = 3; required uint64 blocks_in_future = 4; required uint64 pending_deletion_blocks = 5; + optional uint64 highest_prio_low_redundancy_blocks = 6; } enum DatanodeReportTypeProto { // type of the datanode report @@ -477,7 +481,6 @@ message RollingUpgradeInfoProto { message RollingUpgradeResponseProto { optional RollingUpgradeInfoProto rollingUpgradeInfo= 1; } - message ListCorruptFileBlocksRequestProto { required string path = 1; optional string cookie = 2; @@ -827,6 +830,14 @@ message ListOpenFilesResponseProto { repeated OpenFilesTypeProto types = 3; } +message SatisfyStoragePolicyRequestProto { + required string src = 1; +} + +message SatisfyStoragePolicyResponseProto { + +} + service ClientNamenodeProtocol { rpc getBlockLocations(GetBlockLocationsRequestProto) returns(GetBlockLocationsResponseProto); @@ -1013,4 +1024,6 @@ service ClientNamenodeProtocol { returns(GetQuotaUsageResponseProto); rpc listOpenFiles(ListOpenFilesRequestProto) returns(ListOpenFilesResponseProto); + rpc satisfyStoragePolicy(SatisfyStoragePolicyRequestProto) + returns(SatisfyStoragePolicyResponseProto); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java index ce76f05fb8f..dd285d4dfcd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java @@ -199,7 +199,7 @@ public static FILE_TYPE getType(FileStatus fileStatus) { public static final String ENC_BIT_JSON = "encBit"; public static final String EC_BIT_JSON = "ecBit"; - public static final String SNAPSHOT_BIT_JSON = "seBit"; + public static final String SNAPSHOT_BIT_JSON = "snapshotEnabled"; public static final String DIRECTORY_LISTING_JSON = "DirectoryListing"; public static final String PARTIAL_LISTING_JSON = "partialListing"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java index 1d47a61af53..a3c45c7982c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java @@ -120,6 +120,9 @@ if (fileStatus.getPermission().getErasureCodedBit()) { json.put(HttpFSFileSystem.EC_BIT_JSON, true); } + if (fileStatus.isSnapshotEnabled()) { + json.put(HttpFSFileSystem.SNAPSHOT_BIT_JSON, true); + } return json; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/resources/httpfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/resources/httpfs-default.xml index e4204564c25..3e9064f4472 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/resources/httpfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/resources/httpfs-default.xml @@ -54,6 +54,13 @@ + + hadoop.http.idle_timeout.ms + 1000 + + Httpfs Server connection timeout in milliseconds. + + hadoop.http.max.threads diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java index a6dce4da108..8dabdeaa6dd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java @@ -376,6 +376,35 @@ private void testListStatus() throws Exception { Assert.assertEquals(stati[0].getPath(), statl[0].getPath()); } + private void testFileStatusAttr() throws Exception { + if (!this.isLocalFS()) { + // Create a directory + Path path = new Path("/tmp/tmp-snap-test"); + DistributedFileSystem distributedFs = (DistributedFileSystem) FileSystem + .get(path.toUri(), this.getProxiedFSConf()); + distributedFs.mkdirs(path); + // Get the FileSystem instance that's being tested + FileSystem fs = this.getHttpFSFileSystem(); + // Check FileStatus + assertFalse("Snapshot should be disallowed by default", + fs.getFileStatus(path).isSnapshotEnabled()); + // Allow snapshot + distributedFs.allowSnapshot(path); + // Check FileStatus + assertTrue("Snapshot enabled bit is not set in FileStatus", + fs.getFileStatus(path).isSnapshotEnabled()); + // Disallow snapshot + distributedFs.disallowSnapshot(path); + // Check FileStatus + assertFalse("Snapshot enabled bit is not cleared in FileStatus", + fs.getFileStatus(path).isSnapshotEnabled()); + // Cleanup + fs.delete(path, true); + fs.close(); + distributedFs.close(); + } + } + private static void assertSameListing(FileSystem expected, FileSystem actual, Path p) throws IOException { // Consume all the entries from both iterators @@ -1041,7 +1070,8 @@ private void testStoragePolicy() throws Exception { SET_REPLICATION, CHECKSUM, CONTENT_SUMMARY, FILEACLS, DIRACLS, SET_XATTR, GET_XATTRS, REMOVE_XATTR, LIST_XATTRS, ENCRYPTION, LIST_STATUS_BATCH, GETTRASHROOT, STORAGEPOLICY, ERASURE_CODING, - CREATE_SNAPSHOT, RENAME_SNAPSHOT, DELETE_SNAPSHOT + CREATE_SNAPSHOT, RENAME_SNAPSHOT, DELETE_SNAPSHOT, + FILE_STATUS_ATTR } private void operation(Operation op) throws Exception { @@ -1139,6 +1169,9 @@ private void operation(Operation op) throws Exception { case DELETE_SNAPSHOT: testDeleteSnapshot(); break; + case FILE_STATUS_ATTR: + testFileStatusAttr(); + break; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml index 52d9257f951..ded1c0d4029 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml @@ -201,26 +201,36 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> - org.apache.maven.plugins - maven-antrun-plugin + org.apache.hadoop + hadoop-maven-plugins - make + cmake-compile compile - run + cmake-compile - - - - - - - - - - + ${basedir}/src + + ${project.build.directory}/native/javah + ${sun.arch.data.model} + ${require.fuse} + ${require.valgrind} + 1 + ${require.libwebhdfs} + ${require.openssl} + ${openssl.prefix} + ${openssl.lib} + ${openssl.include} + + ${project.build.directory} + + + + org.apache.maven.plugins + maven-antrun-plugin + native_tests test @@ -236,6 +246,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> + @@ -246,7 +257,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> - test-patch + native-clang false @@ -256,35 +267,40 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> - org.apache.maven.plugins - maven-antrun-plugin + org.apache.hadoop + hadoop-maven-plugins - make_altern + cmake-compile-clang compile - run + cmake-compile - - - - - - - - - - - - - - - - - + ${basedir}/src + + clang + clang++ + ${project.build.directory}/native/javah + ${sun.arch.data.model} + ${require.fuse} + ${require.valgrind} + 1 + ${require.libwebhdfs} + ${require.openssl} + ${openssl.prefix} + ${openssl.lib} + ${openssl.include} + + ${project.build.directory}/clang + + + + org.apache.maven.plugins + maven-antrun-plugin + - native_tests_altern + native_tests_clang test run @@ -292,26 +308,17 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> - + - + + - - clean_altern - test - run - - - - - - diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt index a3f8f2d24a5..1813ec134f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt @@ -88,6 +88,54 @@ function(link_libhdfs_test NAME LIBRARY) target_link_libraries("${NAME}_${LIBRARY}" ${LIBRARY} ${ARGN}) endfunction() + +set(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) +hadoop_set_find_shared_library_without_version() +set(OPENSSL_NAME "crypto") +if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") + SET(OPENSSL_NAME "eay32") +endif() +message("CUSTOM_OPENSSL_PREFIX = ${CUSTOM_OPENSSL_PREFIX}") +find_library(OPENSSL_LIBRARY + NAMES ${OPENSSL_NAME} + PATHS ${CUSTOM_OPENSSL_PREFIX} ${CUSTOM_OPENSSL_PREFIX}/lib + ${CUSTOM_OPENSSL_PREFIX}/lib64 ${CUSTOM_OPENSSL_LIB} NO_DEFAULT_PATH) +find_library(OPENSSL_LIBRARY NAMES ${OPENSSL_NAME}) +find_path(OPENSSL_INCLUDE_DIR + NAMES openssl/evp.h + PATHS ${CUSTOM_OPENSSL_PREFIX} ${CUSTOM_OPENSSL_PREFIX}/include + ${CUSTOM_OPENSSL_INCLUDE} NO_DEFAULT_PATH) +find_path(OPENSSL_INCLUDE_DIR NAMES openssl/evp.h) +set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES}) +set(USABLE_OPENSSL 0) +if(OPENSSL_LIBRARY AND OPENSSL_INCLUDE_DIR) + include(CheckCSourceCompiles) + set(OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) + set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR}) + check_c_source_compiles("#include \"${OPENSSL_INCLUDE_DIR}/openssl/evp.h\"\nint main(int argc, char **argv) { return !EVP_aes_256_ctr; }" HAS_NEW_ENOUGH_OPENSSL) + set(CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES}) + if(NOT HAS_NEW_ENOUGH_OPENSSL) + message("The OpenSSL library installed at ${OPENSSL_LIBRARY} is too old. You need a version at least new enough to have EVP_aes_256_ctr.") + else() + SET(USABLE_OPENSSL 1) + endif() +endif() +if(USABLE_OPENSSL) + get_filename_component(HADOOP_OPENSSL_LIBRARY ${OPENSSL_LIBRARY} NAME) + set(OPENSSL_SOURCE_FILES + "${SRC}/crypto/OpensslCipher.c" + "${SRC}/crypto/random/OpensslSecureRandom.c") + set(REQUIRE_OPENSSL ${REQUIRE_OPENSSL}) # Stop warning about unused variable. +else() + message("Cannot find a usable OpenSSL library. OPENSSL_LIBRARY=${OPENSSL_LIBRARY}, OPENSSL_INCLUDE_DIR=${OPENSSL_INCLUDE_DIR}, CUSTOM_OPENSSL_LIB=${CUSTOM_OPENSSL_LIB}, CUSTOM_OPENSSL_PREFIX=${CUSTOM_OPENSSL_PREFIX}, CUSTOM_OPENSSL_INCLUDE=${CUSTOM_OPENSSL_INCLUDE}") + if(REQUIRE_OPENSSL) + message(FATAL_ERROR "Terminating build because require.openssl was specified.") + endif() + set(OPENSSL_LIBRARY "") + set(OPENSSL_INCLUDE_DIR "") + set(OPENSSL_SOURCE_FILES "") +endif() + add_subdirectory(main/native/libhdfs) add_subdirectory(main/native/libhdfs-tests) add_subdirectory(main/native/libhdfspp) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_connect.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_connect.c index 6ee4ad5130e..f08917a53c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_connect.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_connect.c @@ -192,7 +192,7 @@ int fuseConnectInit(const char *nnUri, int port) } /** - * Compare two libhdfs connections by username + * Compare two libhdfs connections by username and Kerberos ticket cache path * * @param a The first libhdfs connection * @param b The second libhdfs connection @@ -201,22 +201,26 @@ int fuseConnectInit(const char *nnUri, int port) */ static int hdfsConnCompare(const struct hdfsConn *a, const struct hdfsConn *b) { - return strcmp(a->usrname, b->usrname); + int rc = strcmp(a->usrname, b->usrname); + if (rc) return rc; + return gHdfsAuthConf == AUTH_CONF_KERBEROS && strcmp(a->kpath, b->kpath); } /** * Find a libhdfs connection by username * * @param usrname The username to look up + * @param kpath The Kerberos ticket cache file path * * @return The connection, or NULL if none could be found */ -static struct hdfsConn* hdfsConnFind(const char *usrname) +static struct hdfsConn* hdfsConnFind(const char *usrname, const char *kpath) { struct hdfsConn exemplar; memset(&exemplar, 0, sizeof(exemplar)); exemplar.usrname = (char*)usrname; + exemplar.kpath = (char*)kpath; return RB_FIND(hdfsConnTree, &gConnTree, &exemplar); } @@ -542,8 +546,13 @@ static int fuseConnect(const char *usrname, struct fuse_context *ctx, int ret; struct hdfsConn* conn; + char kpath[PATH_MAX] = { 0 }; + if (gHdfsAuthConf == AUTH_CONF_KERBEROS) { + findKerbTicketCachePath(ctx, kpath, sizeof(kpath)); + } + pthread_mutex_lock(&gConnMutex); - conn = hdfsConnFind(usrname); + conn = hdfsConnFind(usrname, kpath); if (!conn) { ret = fuseNewConnect(usrname, ctx, &conn); if (ret) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/test/TestFuseDFS.java b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/test/TestFuseDFS.java index a5d9abd3118..dabbe00b016 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/test/TestFuseDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/test/TestFuseDFS.java @@ -22,8 +22,8 @@ import java.util.concurrent.atomic.*; import org.apache.log4j.Level; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.permission.*; @@ -48,7 +48,7 @@ private static Runtime r; private static String mountPoint; - private static final Log LOG = LogFactory.getLog(TestFuseDFS.class); + private static final Logger LOG = LoggerFactory.getLogger(TestFuseDFS.class); { GenericTestUtils.setLogLevel(LOG, Level.ALL); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/CMakeLists.txt index 2883585158d..cac1335ae25 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/CMakeLists.txt @@ -74,9 +74,11 @@ endif() # introducing an abstraction layer over the sys/mman.h functions. if(NOT WIN32) build_libhdfs_test(test_libhdfs_vecsum hdfs vecsum.c) + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) if(CMAKE_SYSTEM_NAME MATCHES "Darwin") - link_libhdfs_test(test_libhdfs_vecsum hdfs pthread) + link_libhdfs_test(test_libhdfs_vecsum hdfs ${CMAKE_THREAD_LIBS_INIT}) else() - link_libhdfs_test(test_libhdfs_vecsum hdfs pthread rt) + link_libhdfs_test(test_libhdfs_vecsum hdfs ${CMAKE_THREAD_LIBS_INIT} rt) endif() endif() diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/request.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/request.cc index 9157476daad..2de26fd0eab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/request.cc +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/request.cc @@ -16,7 +16,7 @@ * limitations under the License. */ - +#include #include "request.h" #include "rpc_engine.h" #include "sasl_protocol.h" diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/CMakeLists.txt index 6157902b915..59fdbf20a27 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/CMakeLists.txt @@ -143,6 +143,10 @@ include_directories ( add_library(hdfspp_test_shim_static STATIC hdfs_shim.c libhdfs_wrapper.c libhdfspp_wrapper.cc ${LIBHDFSPP_BINDING_C}/hdfs.cc) add_library(hdfspp_test_static STATIC ${LIBHDFSPP_BINDING_C}/hdfs.cc) +# Add dependencies +add_dependencies(hdfspp_test_shim_static proto) +add_dependencies(hdfspp_test_static proto) + # TODO: get all of the mini dfs library bits here in one place # add_library(hdfspp_mini_cluster native_mini_dfs ${JAVA_JVM_LIBRARY} ) diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java index 4ae51c62a39..27213953802 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java @@ -26,8 +26,8 @@ import java.util.List; import java.util.HashMap; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys; @@ -61,7 +61,8 @@ * RPC program corresponding to mountd daemon. See {@link Mountd}. */ public class RpcProgramMountd extends RpcProgram implements MountInterface { - private static final Log LOG = LogFactory.getLog(RpcProgramMountd.class); + private static final Logger LOG = + LoggerFactory.getLogger(RpcProgramMountd.class); public static final int PROGRAM = 100005; public static final int VERSION_1 = 1; public static final int VERSION_2 = 2; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/AsyncDataService.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/AsyncDataService.java index ee3f90aa644..cbbcccf3ca0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/AsyncDataService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/AsyncDataService.java @@ -22,8 +22,8 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This class is a thread pool to easily schedule async data operations. Current @@ -31,7 +31,7 @@ * for readahead operations too. */ public class AsyncDataService { - static final Log LOG = LogFactory.getLog(AsyncDataService.class); + static final Logger LOG = LoggerFactory.getLogger(AsyncDataService.class); // ThreadPool core pool size private static final int CORE_THREADS_PER_VOLUME = 1; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java index e23e4905d41..5c915d26bf1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java @@ -22,8 +22,8 @@ import java.util.Map.Entry; import java.util.concurrent.ConcurrentMap; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys; import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration; import org.apache.hadoop.nfs.nfs3.FileHandle; @@ -39,7 +39,8 @@ * used to maintain the writing context for a single file. */ class OpenFileCtxCache { - private static final Log LOG = LogFactory.getLog(OpenFileCtxCache.class); + private static final Logger LOG = + LoggerFactory.getLogger(OpenFileCtxCache.class); // Insert and delete with openFileMap are synced private final ConcurrentMap openFileMap = Maps .newConcurrentMap(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/PrivilegedNfsGatewayStarter.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/PrivilegedNfsGatewayStarter.java index e495486efdc..549f5bedc3d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/PrivilegedNfsGatewayStarter.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/PrivilegedNfsGatewayStarter.java @@ -22,8 +22,8 @@ import org.apache.commons.daemon.Daemon; import org.apache.commons.daemon.DaemonContext; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys; import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration; @@ -37,7 +37,8 @@ * Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=594880 */ public class PrivilegedNfsGatewayStarter implements Daemon { - static final Log LOG = LogFactory.getLog(PrivilegedNfsGatewayStarter.class); + static final Logger LOG = + LoggerFactory.getLogger(PrivilegedNfsGatewayStarter.class); private String[] args = null; private DatagramSocket registrationSocket = null; private Nfs3 nfs3Server = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java index 5d667515a61..98f3d6cfa29 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java @@ -22,8 +22,8 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.nfs.nfs3.FileHandle; import org.apache.hadoop.nfs.nfs3.Nfs3Constant.WriteStableHow; @@ -37,7 +37,7 @@ * xid and reply status. */ class WriteCtx { - public static final Log LOG = LogFactory.getLog(WriteCtx.class); + public static final Logger LOG = LoggerFactory.getLogger(WriteCtx.class); /** * In memory write data has 3 states. ALLOW_DUMP: not sequential write, still diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java index 0a3450d66ef..35542391bdb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java @@ -20,8 +20,8 @@ import java.io.IOException; import java.util.EnumSet; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.hdfs.DFSClient; @@ -51,7 +51,7 @@ * Manage the writes and responds asynchronously. */ public class WriteManager { - public static final Log LOG = LogFactory.getLog(WriteManager.class); + public static final Logger LOG = LoggerFactory.getLogger(WriteManager.class); private final NfsConfiguration config; private final IdMappingServiceProvider iug; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestMountd.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestMountd.java index 32ed20fdcd1..fe92c9062b5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestMountd.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestMountd.java @@ -21,8 +21,8 @@ import java.io.IOException; import java.net.InetAddress; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys; import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration; @@ -35,7 +35,7 @@ public class TestMountd { - public static final Log LOG = LogFactory.getLog(TestMountd.class); + public static final Logger LOG = LoggerFactory.getLogger(TestMountd.class); @Test public void testStart() throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java index 1d152cea5c0..4e53c72bec8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java @@ -21,8 +21,8 @@ import java.nio.ByteBuffer; import java.util.Arrays; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys; import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration; import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3Utils; @@ -51,7 +51,8 @@ import org.jboss.netty.channel.MessageEvent; public class TestOutOfOrderWrite { - public final static Log LOG = LogFactory.getLog(TestOutOfOrderWrite.class); + public final static Logger LOG = + LoggerFactory.getLogger(TestOutOfOrderWrite.class); static FileHandle handle = null; static Channel channel; @@ -179,4 +180,4 @@ public static void main(String[] args) throws InterruptedException { // TODO: convert to Junit test, and validate result automatically } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCMetrics.java index 9ab4e5addbc..cce4b86ce1f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCMetrics.java @@ -85,15 +85,6 @@ public static FederationRPCMetrics create(Configuration conf, new FederationRPCMetrics(conf, rpcServer)); } - /** - * Convert nanoseconds to milliseconds. - * @param ns Time in nanoseconds. - * @return Time in milliseconds. - */ - private static double toMs(double ns) { - return ns / 1000000; - } - /** * Reset the metrics system. */ @@ -230,7 +221,7 @@ public void addProxyTime(long time) { @Override public double getProxyAvg() { - return toMs(proxy.lastStat().mean()); + return proxy.lastStat().mean(); } @Override @@ -250,7 +241,7 @@ public void addProcessingTime(long time) { @Override public double getProcessingAvg() { - return toMs(processing.lastStat().mean()); + return processing.lastStat().mean(); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java index 2c2741e5590..15725d14ce9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCPerformanceMonitor.java @@ -35,6 +35,8 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; +import static org.apache.hadoop.util.Time.monotonicNow; + /** * Customizable RPC performance monitor. Receives events from the RPC server * and aggregates them via JMX. @@ -120,12 +122,12 @@ public void resetPerfCounters() { @Override public void startOp() { - START_TIME.set(this.getNow()); + START_TIME.set(monotonicNow()); } @Override public long proxyOp() { - PROXY_TIME.set(this.getNow()); + PROXY_TIME.set(monotonicNow()); long processingTime = getProcessingTime(); if (processingTime >= 0) { metrics.addProcessingTime(processingTime); @@ -188,13 +190,6 @@ public void routerFailureLocked() { metrics.incrRouterFailureLocked(); } - /** - * Get current time. - * @return Current time in nanoseconds. - */ - private long getNow() { - return System.nanoTime(); - } /** * Get time between we receiving the operation and sending it to the Namenode. @@ -214,7 +209,7 @@ private long getProcessingTime() { */ private long getProxyTime() { if (PROXY_TIME.get() != null && PROXY_TIME.get() > 0) { - return getNow() - PROXY_TIME.get(); + return monotonicNow() - PROXY_TIME.get(); } return -1; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java index 4d22ae7dc32..e8ebf0dd8c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java @@ -320,6 +320,16 @@ public long getNumberOfMissingBlocksWithReplicationFactorOne() { return 0; } + @Override + public long getHighestPriorityLowRedundancyReplicatedBlocks() { + return 0; + } + + @Override + public long getHighestPriorityLowRedundancyECBlocks() { + return 0; + } + @Override public String getCorruptFiles() { return "N/A"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java index 3f6efd6a615..bdd75c70f5c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MountTableResolver.java @@ -17,9 +17,15 @@ */ package org.apache.hadoop.hdfs.server.federation.resolver; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_NAMESERVICES; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DeprecatedKeys.DFS_NAMESERVICE_ID; import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_DEFAULT_NAMESERVICE; +import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_DEFAULT_NAMESERVICE_ENABLE; +import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_DEFAULT_NAMESERVICE_ENABLE_DEFAULT; import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_MOUNT_TABLE_MAX_CACHE_SIZE; import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_MOUNT_TABLE_MAX_CACHE_SIZE_DEFAULT; +import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_MOUNT_TABLE_CACHE_ENABLE; +import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_MOUNT_TABLE_CACHE_ENABLE_DEFAULT; import static org.apache.hadoop.hdfs.server.federation.router.FederationUtil.isParentEntry; import java.io.IOException; @@ -42,7 +48,6 @@ import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; -import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSUtil; @@ -92,6 +97,8 @@ /** Default nameservice when no mount matches the math. */ private String defaultNameService = ""; + /** If use default nameservice to read and write files. */ + private boolean defaultNSEnable = true; /** Synchronization for both the tree and the cache. */ private final ReadWriteLock readWriteLock = new ReentrantReadWriteLock(); @@ -123,12 +130,19 @@ public MountTableResolver(Configuration conf, Router routerService, this.stateStore = null; } - int maxCacheSize = conf.getInt( - FEDERATION_MOUNT_TABLE_MAX_CACHE_SIZE, - FEDERATION_MOUNT_TABLE_MAX_CACHE_SIZE_DEFAULT); - this.locationCache = CacheBuilder.newBuilder() - .maximumSize(maxCacheSize) - .build(); + boolean mountTableCacheEnable = conf.getBoolean( + FEDERATION_MOUNT_TABLE_CACHE_ENABLE, + FEDERATION_MOUNT_TABLE_CACHE_ENABLE_DEFAULT); + if (mountTableCacheEnable) { + int maxCacheSize = conf.getInt( + FEDERATION_MOUNT_TABLE_MAX_CACHE_SIZE, + FEDERATION_MOUNT_TABLE_MAX_CACHE_SIZE_DEFAULT); + this.locationCache = CacheBuilder.newBuilder() + .maximumSize(maxCacheSize) + .build(); + } else { + this.locationCache = null; + } registerCacheExternal(); initDefaultNameService(conf); @@ -149,15 +163,33 @@ private void registerCacheExternal() { * @param conf Configuration for this resolver. */ private void initDefaultNameService(Configuration conf) { - try { - this.defaultNameService = conf.get( - DFS_ROUTER_DEFAULT_NAMESERVICE, - DFSUtil.getNamenodeNameServiceId(conf)); - } catch (HadoopIllegalArgumentException e) { - LOG.error("Cannot find default name service, setting it to the first"); + this.defaultNameService = conf.get( + DFS_ROUTER_DEFAULT_NAMESERVICE, + DFSUtil.getNamenodeNameServiceId(conf)); + + this.defaultNSEnable = conf.getBoolean( + DFS_ROUTER_DEFAULT_NAMESERVICE_ENABLE, + DFS_ROUTER_DEFAULT_NAMESERVICE_ENABLE_DEFAULT); + + if (defaultNameService == null) { + LOG.warn( + "{} and {} is not set. Fallback to {} as the default name service.", + DFS_ROUTER_DEFAULT_NAMESERVICE, DFS_NAMESERVICE_ID, DFS_NAMESERVICES); Collection nsIds = DFSUtilClient.getNameServiceIds(conf); - this.defaultNameService = nsIds.iterator().next(); - LOG.info("Default name service: {}", this.defaultNameService); + if (nsIds.isEmpty()) { + this.defaultNameService = ""; + } else { + this.defaultNameService = nsIds.iterator().next(); + } + } + + if (this.defaultNameService.equals("")) { + this.defaultNSEnable = false; + LOG.warn("Default name service is not set."); + } else { + String enable = this.defaultNSEnable ? "enabled" : "disabled"; + LOG.info("Default name service: {}, {} to read or write", + this.defaultNameService, enable); } } @@ -227,7 +259,7 @@ public void removeEntry(final String srcPath) { */ private void invalidateLocationCache(final String path) { LOG.debug("Invalidating {} from {}", path, locationCache); - if (locationCache.size() == 0) { + if (locationCache == null || locationCache.size() == 0) { return; } @@ -347,7 +379,9 @@ public void clear() { LOG.info("Clearing all mount location caches"); writeLock.lock(); try { - this.locationCache.invalidateAll(); + if (this.locationCache != null) { + this.locationCache.invalidateAll(); + } this.tree.clear(); } finally { writeLock.unlock(); @@ -360,6 +394,9 @@ public PathLocation getDestinationForPath(final String path) verifyMountTable(); readLock.lock(); try { + if (this.locationCache == null) { + return lookupLocation(path); + } Callable meh = new Callable() { @Override public PathLocation call() throws Exception { @@ -380,13 +417,17 @@ public PathLocation call() throws Exception { * @param path Path to check/insert. * @return New remote location. */ - public PathLocation lookupLocation(final String path) { + public PathLocation lookupLocation(final String path) throws IOException { PathLocation ret = null; MountTable entry = findDeepest(path); if (entry != null) { ret = buildLocation(path, entry); } else { // Not found, use default location + if (!defaultNSEnable) { + throw new IOException("Cannot find locations for " + path + ", " + + "because the default nameservice is disabled to read or write"); + } RemoteLocation remoteLocation = new RemoteLocation(defaultNameService, path, path); List locations = @@ -591,7 +632,30 @@ private MountTable findDeepest(final String path) { * Get the size of the cache. * @return Size of the cache. */ - protected long getCacheSize() { - return this.locationCache.size(); + protected long getCacheSize() throws IOException{ + if (this.locationCache != null) { + return this.locationCache.size(); + } + throw new IOException("localCache is null"); + } + + @VisibleForTesting + public String getDefaultNameService() { + return defaultNameService; + } + + @VisibleForTesting + public void setDefaultNameService(String defaultNameService) { + this.defaultNameService = defaultNameService; + } + + @VisibleForTesting + public boolean isDefaultNSEnable() { + return defaultNSEnable; + } + + @VisibleForTesting + public void setDefaultNSEnable(boolean defaultNSRWEnable) { + this.defaultNSEnable = defaultNSRWEnable; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java index 0b5084574e2..9fb83e430bc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java @@ -49,9 +49,6 @@ private static final Logger LOG = LoggerFactory.getLogger(ConnectionManager.class); - /** Number of parallel new connections to create. */ - protected static final int MAX_NEW_CONNECTIONS = 100; - /** Minimum amount of active connections: 50%. */ protected static final float MIN_ACTIVE_RATIO = 0.5f; @@ -77,8 +74,10 @@ private final Lock writeLock = readWriteLock.writeLock(); /** Queue for creating new connections. */ - private final BlockingQueue creatorQueue = - new ArrayBlockingQueue<>(MAX_NEW_CONNECTIONS); + private final BlockingQueue creatorQueue; + /** Max size of queue for creating new connections. */ + private final int creatorQueueMaxSize; + /** Create new connections asynchronously. */ private final ConnectionCreator creator; /** Periodic executor to remove stale connection pools. */ @@ -106,7 +105,12 @@ public ConnectionManager(Configuration config) { this.pools = new HashMap<>(); // Create connections in a thread asynchronously - this.creator = new ConnectionCreator(creatorQueue); + this.creatorQueueMaxSize = this.conf.getInt( + RBFConfigKeys.DFS_ROUTER_NAMENODE_CONNECTION_CREATOR_QUEUE_SIZE, + RBFConfigKeys.DFS_ROUTER_NAMENODE_CONNECTION_CREATOR_QUEUE_SIZE_DEFAULT + ); + this.creatorQueue = new ArrayBlockingQueue<>(this.creatorQueueMaxSize); + this.creator = new ConnectionCreator(this.creatorQueue); this.creator.setDaemon(true); // Cleanup periods @@ -213,7 +217,7 @@ public ConnectionContext getConnection(UserGroupInformation ugi, if (conn == null || !conn.isUsable()) { if (!this.creatorQueue.offer(pool)) { LOG.error("Cannot add more than {} connections at the same time", - MAX_NEW_CONNECTIONS); + this.creatorQueueMaxSize); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ErasureCoding.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ErasureCoding.java index d2b2d50fdba..480b232ca42 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ErasureCoding.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ErasureCoding.java @@ -185,12 +185,25 @@ public ECBlockGroupStats getECBlockGroupStats() throws IOException { long missingBlockGroups = 0; long bytesInFutureBlockGroups = 0; long pendingDeletionBlocks = 0; + long highestPriorityLowRedundancyBlocks = 0; + boolean hasHighestPriorityLowRedundancyBlocks = false; + for (ECBlockGroupStats stats : allStats.values()) { lowRedundancyBlockGroups += stats.getLowRedundancyBlockGroups(); corruptBlockGroups += stats.getCorruptBlockGroups(); missingBlockGroups += stats.getMissingBlockGroups(); bytesInFutureBlockGroups += stats.getBytesInFutureBlockGroups(); pendingDeletionBlocks += stats.getPendingDeletionBlocks(); + if (stats.hasHighestPriorityLowRedundancyBlocks()) { + hasHighestPriorityLowRedundancyBlocks = true; + highestPriorityLowRedundancyBlocks += + stats.getHighestPriorityLowRedundancyBlocks(); + } + } + if (hasHighestPriorityLowRedundancyBlocks) { + return new ECBlockGroupStats(lowRedundancyBlockGroups, corruptBlockGroups, + missingBlockGroups, bytesInFutureBlockGroups, pendingDeletionBlocks, + highestPriorityLowRedundancyBlocks); } return new ECBlockGroupStats(lowRedundancyBlockGroups, corruptBlockGroups, missingBlockGroups, bytesInFutureBlockGroups, pendingDeletionBlocks); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/IsRouterActiveServlet.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/IsRouterActiveServlet.java new file mode 100644 index 00000000000..cd84fced531 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/IsRouterActiveServlet.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.router; + +import org.apache.hadoop.http.IsActiveServlet; + +import javax.servlet.ServletContext; + +/** + * Detect if the Router is active and ready to serve requests. + */ +public class IsRouterActiveServlet extends IsActiveServlet { + + @Override + protected boolean isActive() { + final ServletContext context = getServletContext(); + final Router router = RouterHttpServer.getRouterFromContext(context); + final RouterServiceState routerState = router.getRouterState(); + + return routerState == RouterServiceState.RUNNING; + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Quota.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Quota.java index 413a4e1d099..846ccd1c029 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Quota.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Quota.java @@ -67,6 +67,9 @@ public Quota(Router router, RouterRpcServer server) { public void setQuota(String path, long namespaceQuota, long storagespaceQuota, StorageType type) throws IOException { rpcServer.checkOperation(OperationCategory.WRITE); + if (!router.isQuotaEnabled()) { + throw new IOException("The quota system is disabled in Router."); + } // Set quota for current path and its children mount table path. final List locations = getQuotaRemoteLocations(path); @@ -91,6 +94,11 @@ public void setQuota(String path, long namespaceQuota, * @throws IOException */ public QuotaUsage getQuotaUsage(String path) throws IOException { + rpcServer.checkOperation(OperationCategory.READ); + if (!router.isQuotaEnabled()) { + throw new IOException("The quota system is disabled in Router."); + } + final List quotaLocs = getValidQuotaLocations(path); RemoteMethod method = new RemoteMethod("getQuotaUsage", new Class[] {String.class}, new RemoteParam()); @@ -154,6 +162,8 @@ public QuotaUsage getQuotaUsage(String path) throws IOException { private QuotaUsage aggregateQuota(Map results) { long nsCount = 0; long ssCount = 0; + long nsQuota = HdfsConstants.QUOTA_RESET; + long ssQuota = HdfsConstants.QUOTA_RESET; boolean hasQuotaUnSet = false; for (Map.Entry entry : results.entrySet()) { @@ -165,6 +175,8 @@ private QuotaUsage aggregateQuota(Map results) { if (usage.getQuota() == -1 && usage.getSpaceQuota() == -1) { hasQuotaUnSet = true; } + nsQuota = usage.getQuota(); + ssQuota = usage.getSpaceQuota(); nsCount += usage.getFileAndDirectoryCount(); ssCount += usage.getSpaceConsumed(); @@ -179,7 +191,10 @@ private QuotaUsage aggregateQuota(Map results) { QuotaUsage.Builder builder = new QuotaUsage.Builder() .fileAndDirectoryCount(nsCount).spaceConsumed(ssCount); if (hasQuotaUnSet) { - builder.quota(HdfsConstants.QUOTA_DONT_SET); + builder.quota(HdfsConstants.QUOTA_RESET) + .spaceQuota(HdfsConstants.QUOTA_RESET); + } else { + builder.quota(nsQuota).spaceQuota(ssQuota); } return builder.build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java index 363db208056..bbd4250b268 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java @@ -42,6 +42,10 @@ "dfs.federation.router."; public static final String DFS_ROUTER_DEFAULT_NAMESERVICE = FEDERATION_ROUTER_PREFIX + "default.nameserviceId"; + public static final String DFS_ROUTER_DEFAULT_NAMESERVICE_ENABLE = + FEDERATION_ROUTER_PREFIX + "default.nameservice.enable"; + public static final boolean DFS_ROUTER_DEFAULT_NAMESERVICE_ENABLE_DEFAULT = + true; public static final String DFS_ROUTER_HANDLER_COUNT_KEY = FEDERATION_ROUTER_PREFIX + "handler.count"; public static final int DFS_ROUTER_HANDLER_COUNT_DEFAULT = 10; @@ -93,6 +97,11 @@ TimeUnit.SECONDS.toMillis(5); // HDFS Router NN client + public static final String + DFS_ROUTER_NAMENODE_CONNECTION_CREATOR_QUEUE_SIZE = + FEDERATION_ROUTER_PREFIX + "connection.creator.queue-size"; + public static final int + DFS_ROUTER_NAMENODE_CONNECTION_CREATOR_QUEUE_SIZE_DEFAULT = 100; public static final String DFS_ROUTER_NAMENODE_CONNECTION_POOL_SIZE = FEDERATION_ROUTER_PREFIX + "connection.pool-size"; public static final int DFS_ROUTER_NAMENODE_CONNECTION_POOL_SIZE_DEFAULT = @@ -186,6 +195,10 @@ FEDERATION_ROUTER_PREFIX + "mount-table.max-cache-size"; /** Remove cache entries if we have more than 10k. */ public static final int FEDERATION_MOUNT_TABLE_MAX_CACHE_SIZE_DEFAULT = 10000; + public static final String FEDERATION_MOUNT_TABLE_CACHE_ENABLE = + FEDERATION_ROUTER_PREFIX + "mount-table.cache.enable"; + public static final boolean FEDERATION_MOUNT_TABLE_CACHE_ENABLE_DEFAULT = + true; // HDFS Router-based federation admin public static final String DFS_ROUTER_ADMIN_HANDLER_COUNT_KEY = diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java index df2a4486f9b..7e67daaef92 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java @@ -665,4 +665,11 @@ RouterQuotaUpdateService getQuotaCacheUpdateService() { Collection getNamenodeHearbeatServices() { return this.namenodeHeartbeatServices; } + + /** + * Get the Router safe mode service + */ + RouterSafemodeService getSafemodeService() { + return this.safemodeService; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterAdminServer.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterAdminServer.java index 139dfb82fe2..35097682214 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterAdminServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterAdminServer.java @@ -24,9 +24,11 @@ import java.net.InetSocketAddress; import java.util.Set; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.proto.RouterProtocolProtos.RouterAdminProtocolService; import org.apache.hadoop.hdfs.protocolPB.RouterAdminProtocolPB; import org.apache.hadoop.hdfs.protocolPB.RouterAdminProtocolServerSideTranslatorPB; @@ -252,8 +254,11 @@ private void synchronizeQuota(MountTable mountTable) throws IOException { if (nsQuota != HdfsConstants.QUOTA_DONT_SET || ssQuota != HdfsConstants.QUOTA_DONT_SET) { - this.router.getRpcServer().getQuotaModule().setQuota(path, nsQuota, - ssQuota, null); + HdfsFileStatus ret = this.router.getRpcServer().getFileInfo(path); + if (ret != null) { + this.router.getRpcServer().getQuotaModule().setQuota(path, nsQuota, + ssQuota, null); + } } } @@ -272,23 +277,50 @@ public GetMountTableEntriesResponse getMountTableEntries( @Override public EnterSafeModeResponse enterSafeMode(EnterSafeModeRequest request) throws IOException { - this.router.updateRouterState(RouterServiceState.SAFEMODE); - this.router.getRpcServer().setSafeMode(true); - return EnterSafeModeResponse.newInstance(verifySafeMode(true)); + boolean success = false; + RouterSafemodeService safeModeService = this.router.getSafemodeService(); + if (safeModeService != null) { + this.router.updateRouterState(RouterServiceState.SAFEMODE); + safeModeService.setManualSafeMode(true); + success = verifySafeMode(true); + if (success) { + LOG.info("STATE* Safe mode is ON.\n" + "It was turned on manually. " + + "Use \"hdfs dfsrouteradmin -safemode leave\" to turn" + + " safe mode off."); + } else { + LOG.error("Unable to enter safemode."); + } + } + return EnterSafeModeResponse.newInstance(success); } @Override public LeaveSafeModeResponse leaveSafeMode(LeaveSafeModeRequest request) throws IOException { - this.router.updateRouterState(RouterServiceState.RUNNING); - this.router.getRpcServer().setSafeMode(false); - return LeaveSafeModeResponse.newInstance(verifySafeMode(false)); + boolean success = false; + RouterSafemodeService safeModeService = this.router.getSafemodeService(); + if (safeModeService != null) { + this.router.updateRouterState(RouterServiceState.RUNNING); + safeModeService.setManualSafeMode(false); + success = verifySafeMode(false); + if (success) { + LOG.info("STATE* Safe mode is OFF.\n" + "It was turned off manually."); + } else { + LOG.error("Unable to leave safemode."); + } + } + return LeaveSafeModeResponse.newInstance(success); } @Override public GetSafeModeResponse getSafeMode(GetSafeModeRequest request) throws IOException { - boolean isInSafeMode = this.router.getRpcServer().isInSafeMode(); + boolean isInSafeMode = false; + RouterSafemodeService safeModeService = this.router.getSafemodeService(); + if (safeModeService != null) { + isInSafeMode = safeModeService.isInSafeMode(); + LOG.info("Safemode status retrieved successfully."); + } return GetSafeModeResponse.newInstance(isInSafeMode); } @@ -298,7 +330,8 @@ public GetSafeModeResponse getSafeMode(GetSafeModeRequest request) * @return */ private boolean verifySafeMode(boolean isInSafeMode) { - boolean serverInSafeMode = this.router.getRpcServer().isInSafeMode(); + Preconditions.checkNotNull(this.router.getSafemodeService()); + boolean serverInSafeMode = this.router.getSafemodeService().isInSafeMode(); RouterServiceState currentState = this.router.getRouterState(); return (isInSafeMode && currentState == RouterServiceState.SAFEMODE @@ -320,6 +353,11 @@ public DisableNameserviceResponse disableNameservice( boolean success = false; if (namespaceExists(nsId)) { success = getDisabledNameserviceStore().disableNameservice(nsId); + if (success) { + LOG.info("Nameservice {} disabled successfully.", nsId); + } else { + LOG.error("Unable to disable Nameservice {}", nsId); + } } else { LOG.error("Cannot disable {}, it does not exists", nsId); } @@ -353,6 +391,11 @@ public EnableNameserviceResponse enableNameservice( boolean success = false; if (disabled.contains(nsId)) { success = store.enableNameservice(nsId); + if (success) { + LOG.info("Nameservice {} enabled successfully.", nsId); + } else { + LOG.error("Unable to enable Nameservice {}", nsId); + } } else { LOG.error("Cannot enable {}, it was not disabled", nsId); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java new file mode 100644 index 00000000000..f45da3c9417 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterClientProtocol.java @@ -0,0 +1,1805 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.router; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.CryptoProtocolVersion; +import org.apache.hadoop.fs.BatchedRemoteIterator; +import org.apache.hadoop.fs.CacheFlag; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FsServerDefaults; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.QuotaUsage; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.fs.XAttr; +import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.AddBlockFlag; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.inotify.EventBatchList; +import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse; +import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; +import org.apache.hadoop.hdfs.protocol.CachePoolEntry; +import org.apache.hadoop.hdfs.protocol.CachePoolInfo; +import org.apache.hadoop.hdfs.protocol.ClientProtocol; +import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks; +import org.apache.hadoop.hdfs.protocol.DatanodeID; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.DirectoryListing; +import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats; +import org.apache.hadoop.hdfs.protocol.EncryptionZone; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus; +import org.apache.hadoop.hdfs.protocol.LastBlockWithStatus; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.protocol.OpenFileEntry; +import org.apache.hadoop.hdfs.protocol.OpenFilesIterator; +import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats; +import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; +import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; +import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing; +import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; +import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus; +import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.hdfs.server.federation.resolver.ActiveNamenodeResolver; +import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamespaceInfo; +import org.apache.hadoop.hdfs.server.federation.resolver.FileSubclusterResolver; +import org.apache.hadoop.hdfs.server.federation.resolver.MountTableResolver; +import org.apache.hadoop.hdfs.server.federation.resolver.RemoteLocation; +import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; +import org.apache.hadoop.io.EnumSetWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Collection; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +/** + * Module that implements all the RPC calls in {@link ClientProtocol} in the + * {@link RouterRpcServer}. + */ +public class RouterClientProtocol implements ClientProtocol { + private static final Logger LOG = + LoggerFactory.getLogger(RouterClientProtocol.class.getName()); + + private final RouterRpcServer rpcServer; + private final RouterRpcClient rpcClient; + private final FileSubclusterResolver subclusterResolver; + private final ActiveNamenodeResolver namenodeResolver; + + /** Identifier for the super user. */ + private final String superUser; + /** Identifier for the super group. */ + private final String superGroup; + /** Erasure coding calls. */ + private final ErasureCoding erasureCoding; + + RouterClientProtocol(Configuration conf, RouterRpcServer rpcServer) { + this.rpcServer = rpcServer; + this.rpcClient = rpcServer.getRPCClient(); + this.subclusterResolver = rpcServer.getSubclusterResolver(); + this.namenodeResolver = rpcServer.getNamenodeResolver(); + + // User and group for reporting + this.superUser = System.getProperty("user.name"); + this.superGroup = conf.get( + DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY, + DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); + this.erasureCoding = new ErasureCoding(rpcServer); + } + + @Override + public Token getDelegationToken(Text renewer) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + return null; + } + + /** + * The the delegation token from each name service. + * + * @param renewer + * @return Name service -> Token. + * @throws IOException + */ + public Map> + getDelegationTokens(Text renewer) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + return null; + } + + @Override + public long renewDelegationToken(Token token) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + return 0; + } + + @Override + public void cancelDelegationToken(Token token) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public LocatedBlocks getBlockLocations(String src, final long offset, + final long length) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + List locations = rpcServer.getLocationsForPath(src, false); + RemoteMethod remoteMethod = new RemoteMethod("getBlockLocations", + new Class[] {String.class, long.class, long.class}, + new RemoteParam(), offset, length); + return rpcClient.invokeSequential(locations, remoteMethod, + LocatedBlocks.class, null); + } + + @Override + public FsServerDefaults getServerDefaults() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + RemoteMethod method = new RemoteMethod("getServerDefaults"); + String ns = subclusterResolver.getDefaultNamespace(); + return (FsServerDefaults) rpcClient.invokeSingle(ns, method); + } + + @Override + public HdfsFileStatus create(String src, FsPermission masked, + String clientName, EnumSetWritable flag, + boolean createParent, short replication, long blockSize, + CryptoProtocolVersion[] supportedVersions, String ecPolicyName) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + if (createParent && isPathAll(src)) { + int index = src.lastIndexOf(Path.SEPARATOR); + String parent = src.substring(0, index); + LOG.debug("Creating {} requires creating parent {}", src, parent); + FsPermission parentPermissions = getParentPermission(masked); + boolean success = mkdirs(parent, parentPermissions, createParent); + if (!success) { + // This shouldn't happen as mkdirs returns true or exception + LOG.error("Couldn't create parents for {}", src); + } + } + + RemoteLocation createLocation = rpcServer.getCreateLocation(src); + RemoteMethod method = new RemoteMethod("create", + new Class[] {String.class, FsPermission.class, String.class, + EnumSetWritable.class, boolean.class, short.class, + long.class, CryptoProtocolVersion[].class, + String.class}, + createLocation.getDest(), masked, clientName, flag, createParent, + replication, blockSize, supportedVersions, ecPolicyName); + return (HdfsFileStatus) rpcClient.invokeSingle(createLocation, method); + } + + @Override + public LastBlockWithStatus append(String src, final String clientName, + final EnumSetWritable flag) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + List locations = rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("append", + new Class[] {String.class, String.class, EnumSetWritable.class}, + new RemoteParam(), clientName, flag); + return rpcClient.invokeSequential( + locations, method, LastBlockWithStatus.class, null); + } + + @Override + public boolean recoverLease(String src, String clientName) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("recoverLease", + new Class[] {String.class, String.class}, new RemoteParam(), + clientName); + Object result = rpcClient.invokeSequential( + locations, method, Boolean.class, Boolean.TRUE); + return (boolean) result; + } + + @Override + public boolean setReplication(String src, short replication) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + List locations = rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("setReplication", + new Class[] {String.class, short.class}, new RemoteParam(), + replication); + Object result = rpcClient.invokeSequential( + locations, method, Boolean.class, Boolean.TRUE); + return (boolean) result; + } + + @Override + public void setStoragePolicy(String src, String policyName) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + List locations = rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("setStoragePolicy", + new Class[] {String.class, String.class}, + new RemoteParam(), policyName); + rpcClient.invokeSequential(locations, method, null, null); + } + + @Override + public BlockStoragePolicy[] getStoragePolicies() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + RemoteMethod method = new RemoteMethod("getStoragePolicies"); + String ns = subclusterResolver.getDefaultNamespace(); + return (BlockStoragePolicy[]) rpcClient.invokeSingle(ns, method); + } + + @Override + public void setPermission(String src, FsPermission permissions) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("setPermission", + new Class[] {String.class, FsPermission.class}, + new RemoteParam(), permissions); + if (isPathAll(src)) { + rpcClient.invokeConcurrent(locations, method); + } else { + rpcClient.invokeSequential(locations, method); + } + } + + @Override + public void setOwner(String src, String username, String groupname) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("setOwner", + new Class[] {String.class, String.class, String.class}, + new RemoteParam(), username, groupname); + if (isPathAll(src)) { + rpcClient.invokeConcurrent(locations, method); + } else { + rpcClient.invokeSequential(locations, method); + } + } + + /** + * Excluded and favored nodes are not verified and will be ignored by + * placement policy if they are not in the same nameservice as the file. + */ + @Override + public LocatedBlock addBlock(String src, String clientName, + ExtendedBlock previous, DatanodeInfo[] excludedNodes, long fileId, + String[] favoredNodes, EnumSet addBlockFlags) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("addBlock", + new Class[] {String.class, String.class, ExtendedBlock.class, + DatanodeInfo[].class, long.class, String[].class, + EnumSet.class}, + new RemoteParam(), clientName, previous, excludedNodes, fileId, + favoredNodes, addBlockFlags); + // TODO verify the excludedNodes and favoredNodes are acceptable to this NN + return rpcClient.invokeSequential( + locations, method, LocatedBlock.class, null); + } + + /** + * Excluded nodes are not verified and will be ignored by placement if they + * are not in the same nameservice as the file. + */ + @Override + public LocatedBlock getAdditionalDatanode(final String src, final long fileId, + final ExtendedBlock blk, final DatanodeInfo[] existings, + final String[] existingStorageIDs, final DatanodeInfo[] excludes, + final int numAdditionalNodes, final String clientName) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + final List locations = + rpcServer.getLocationsForPath(src, false); + RemoteMethod method = new RemoteMethod("getAdditionalDatanode", + new Class[] {String.class, long.class, ExtendedBlock.class, + DatanodeInfo[].class, String[].class, + DatanodeInfo[].class, int.class, String.class}, + new RemoteParam(), fileId, blk, existings, existingStorageIDs, excludes, + numAdditionalNodes, clientName); + return rpcClient.invokeSequential( + locations, method, LocatedBlock.class, null); + } + + @Override + public void abandonBlock(ExtendedBlock b, long fileId, String src, + String holder) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + RemoteMethod method = new RemoteMethod("abandonBlock", + new Class[] {ExtendedBlock.class, long.class, String.class, + String.class}, + b, fileId, new RemoteParam(), holder); + rpcClient.invokeSingle(b, method); + } + + @Override + public boolean complete(String src, String clientName, ExtendedBlock last, + long fileId) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("complete", + new Class[] {String.class, String.class, ExtendedBlock.class, + long.class}, + new RemoteParam(), clientName, last, fileId); + // Complete can return true/false, so don't expect a result + return rpcClient.invokeSequential(locations, method, Boolean.class, null); + } + + @Override + public LocatedBlock updateBlockForPipeline( + ExtendedBlock block, String clientName) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + RemoteMethod method = new RemoteMethod("updateBlockForPipeline", + new Class[] {ExtendedBlock.class, String.class}, + block, clientName); + return (LocatedBlock) rpcClient.invokeSingle(block, method); + } + + /** + * Datanode are not verified to be in the same nameservice as the old block. + * TODO This may require validation. + */ + @Override + public void updatePipeline(String clientName, ExtendedBlock oldBlock, + ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + RemoteMethod method = new RemoteMethod("updatePipeline", + new Class[] {String.class, ExtendedBlock.class, ExtendedBlock.class, + DatanodeID[].class, String[].class}, + clientName, oldBlock, newBlock, newNodes, newStorageIDs); + rpcClient.invokeSingle(oldBlock, method); + } + + @Override + public long getPreferredBlockSize(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("getPreferredBlockSize", + new Class[] {String.class}, new RemoteParam()); + return rpcClient.invokeSequential(locations, method, Long.class, null); + } + + @Deprecated + @Override + public boolean rename(final String src, final String dst) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List srcLocations = + rpcServer.getLocationsForPath(src, true, false); + // srcLocations may be trimmed by getRenameDestinations() + final List locs = new LinkedList<>(srcLocations); + RemoteParam dstParam = getRenameDestinations(locs, dst); + if (locs.isEmpty()) { + throw new IOException( + "Rename of " + src + " to " + dst + " is not allowed," + + " no eligible destination in the same namespace was found."); + } + RemoteMethod method = new RemoteMethod("rename", + new Class[] {String.class, String.class}, + new RemoteParam(), dstParam); + return rpcClient.invokeSequential(locs, method, Boolean.class, + Boolean.TRUE); + } + + @Override + public void rename2(final String src, final String dst, + final Options.Rename... options) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List srcLocations = + rpcServer.getLocationsForPath(src, true, false); + // srcLocations may be trimmed by getRenameDestinations() + final List locs = new LinkedList<>(srcLocations); + RemoteParam dstParam = getRenameDestinations(locs, dst); + if (locs.isEmpty()) { + throw new IOException( + "Rename of " + src + " to " + dst + " is not allowed," + + " no eligible destination in the same namespace was found."); + } + RemoteMethod method = new RemoteMethod("rename2", + new Class[] {String.class, String.class, options.getClass()}, + new RemoteParam(), dstParam, options); + rpcClient.invokeSequential(locs, method, null, null); + } + + @Override + public void concat(String trg, String[] src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // See if the src and target files are all in the same namespace + LocatedBlocks targetBlocks = getBlockLocations(trg, 0, 1); + if (targetBlocks == null) { + throw new IOException("Cannot locate blocks for target file - " + trg); + } + LocatedBlock lastLocatedBlock = targetBlocks.getLastLocatedBlock(); + String targetBlockPoolId = lastLocatedBlock.getBlock().getBlockPoolId(); + for (String source : src) { + LocatedBlocks sourceBlocks = getBlockLocations(source, 0, 1); + if (sourceBlocks == null) { + throw new IOException( + "Cannot located blocks for source file " + source); + } + String sourceBlockPoolId = + sourceBlocks.getLastLocatedBlock().getBlock().getBlockPoolId(); + if (!sourceBlockPoolId.equals(targetBlockPoolId)) { + throw new IOException("Cannot concatenate source file " + source + + " because it is located in a different namespace" + + " with block pool id " + sourceBlockPoolId + + " from the target file with block pool id " + + targetBlockPoolId); + } + } + + // Find locations in the matching namespace. + final RemoteLocation targetDestination = + rpcServer.getLocationForPath(trg, true, targetBlockPoolId); + String[] sourceDestinations = new String[src.length]; + for (int i = 0; i < src.length; i++) { + String sourceFile = src[i]; + RemoteLocation location = + rpcServer.getLocationForPath(sourceFile, true, targetBlockPoolId); + sourceDestinations[i] = location.getDest(); + } + // Invoke + RemoteMethod method = new RemoteMethod("concat", + new Class[] {String.class, String[].class}, + targetDestination.getDest(), sourceDestinations); + rpcClient.invokeSingle(targetDestination, method); + } + + @Override + public boolean truncate(String src, long newLength, String clientName) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("truncate", + new Class[] {String.class, long.class, String.class}, + new RemoteParam(), newLength, clientName); + return rpcClient.invokeSequential(locations, method, Boolean.class, + Boolean.TRUE); + } + + @Override + public boolean delete(String src, boolean recursive) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true, false); + RemoteMethod method = new RemoteMethod("delete", + new Class[] {String.class, boolean.class}, new RemoteParam(), + recursive); + if (isPathAll(src)) { + return rpcClient.invokeAll(locations, method); + } else { + return rpcClient.invokeSequential(locations, method, + Boolean.class, Boolean.TRUE); + } + } + + @Override + public boolean mkdirs(String src, FsPermission masked, boolean createParent) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("mkdirs", + new Class[] {String.class, FsPermission.class, boolean.class}, + new RemoteParam(), masked, createParent); + + // Create in all locations + if (isPathAll(src)) { + return rpcClient.invokeAll(locations, method); + } + + if (locations.size() > 1) { + // Check if this directory already exists + try { + HdfsFileStatus fileStatus = getFileInfo(src); + if (fileStatus != null) { + // When existing, the NN doesn't return an exception; return true + return true; + } + } catch (IOException ioe) { + // Can't query if this file exists or not. + LOG.error("Error requesting file info for path {} while proxing mkdirs", + src, ioe); + } + } + + RemoteLocation firstLocation = locations.get(0); + return (boolean) rpcClient.invokeSingle(firstLocation, method); + } + + @Override + public void renewLease(String clientName) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + RemoteMethod method = new RemoteMethod("renewLease", + new Class[] {String.class}, clientName); + Set nss = namenodeResolver.getNamespaces(); + rpcClient.invokeConcurrent(nss, method, false, false); + } + + @Override + public DirectoryListing getListing(String src, byte[] startAfter, + boolean needLocation) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + // Locate the dir and fetch the listing + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("getListing", + new Class[] {String.class, startAfter.getClass(), boolean.class}, + new RemoteParam(), startAfter, needLocation); + Map listings = + rpcClient.invokeConcurrent( + locations, method, false, false, DirectoryListing.class); + + Map nnListing = new TreeMap<>(); + int totalRemainingEntries = 0; + int remainingEntries = 0; + boolean namenodeListingExists = false; + if (listings != null) { + // Check the subcluster listing with the smallest name + String lastName = null; + for (Map.Entry entry : + listings.entrySet()) { + RemoteLocation location = entry.getKey(); + DirectoryListing listing = entry.getValue(); + if (listing == null) { + LOG.debug("Cannot get listing from {}", location); + } else { + totalRemainingEntries += listing.getRemainingEntries(); + HdfsFileStatus[] partialListing = listing.getPartialListing(); + int length = partialListing.length; + if (length > 0) { + HdfsFileStatus lastLocalEntry = partialListing[length-1]; + String lastLocalName = lastLocalEntry.getLocalName(); + if (lastName == null || lastName.compareTo(lastLocalName) > 0) { + lastName = lastLocalName; + } + } + } + } + + // Add existing entries + for (Object value : listings.values()) { + DirectoryListing listing = (DirectoryListing) value; + if (listing != null) { + namenodeListingExists = true; + for (HdfsFileStatus file : listing.getPartialListing()) { + String filename = file.getLocalName(); + if (totalRemainingEntries > 0 && filename.compareTo(lastName) > 0) { + // Discarding entries further than the lastName + remainingEntries++; + } else { + nnListing.put(filename, file); + } + } + remainingEntries += listing.getRemainingEntries(); + } + } + } + + // Add mount points at this level in the tree + final List children = subclusterResolver.getMountPoints(src); + if (children != null) { + // Get the dates for each mount point + Map dates = getMountPointDates(src); + + // Create virtual folder with the mount name + for (String child : children) { + long date = 0; + if (dates != null && dates.containsKey(child)) { + date = dates.get(child); + } + // TODO add number of children + HdfsFileStatus dirStatus = getMountPointStatus(child, 0, date); + + // This may overwrite existing listing entries with the mount point + // TODO don't add if already there? + nnListing.put(child, dirStatus); + } + } + + if (!namenodeListingExists && nnListing.size() == 0) { + // NN returns a null object if the directory cannot be found and has no + // listing. If we didn't retrieve any NN listing data, and there are no + // mount points here, return null. + return null; + } + + // Generate combined listing + HdfsFileStatus[] combinedData = new HdfsFileStatus[nnListing.size()]; + combinedData = nnListing.values().toArray(combinedData); + return new DirectoryListing(combinedData, remainingEntries); + } + + @Override + public HdfsFileStatus getFileInfo(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + final List locations = + rpcServer.getLocationsForPath(src, false); + RemoteMethod method = new RemoteMethod("getFileInfo", + new Class[] {String.class}, new RemoteParam()); + + HdfsFileStatus ret = null; + // If it's a directory, we check in all locations + if (isPathAll(src)) { + ret = getFileInfoAll(locations, method); + } else { + // Check for file information sequentially + ret = rpcClient.invokeSequential( + locations, method, HdfsFileStatus.class, null); + } + + // If there is no real path, check mount points + if (ret == null) { + List children = subclusterResolver.getMountPoints(src); + if (children != null && !children.isEmpty()) { + Map dates = getMountPointDates(src); + long date = 0; + if (dates != null && dates.containsKey(src)) { + date = dates.get(src); + } + ret = getMountPointStatus(src, children.size(), date); + } + } + + return ret; + } + + @Override + public boolean isFileClosed(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + final List locations = + rpcServer.getLocationsForPath(src, false); + RemoteMethod method = new RemoteMethod("isFileClosed", + new Class[] {String.class}, new RemoteParam()); + return rpcClient.invokeSequential(locations, method, Boolean.class, + Boolean.TRUE); + } + + @Override + public HdfsFileStatus getFileLinkInfo(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + final List locations = + rpcServer.getLocationsForPath(src, false); + RemoteMethod method = new RemoteMethod("getFileLinkInfo", + new Class[] {String.class}, new RemoteParam()); + return rpcClient.invokeSequential(locations, method, HdfsFileStatus.class, + null); + } + + @Override + public HdfsLocatedFileStatus getLocatedFileInfo(String src, + boolean needBlockToken) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + final List locations = + rpcServer.getLocationsForPath(src, false); + RemoteMethod method = new RemoteMethod("getLocatedFileInfo", + new Class[] {String.class, boolean.class}, new RemoteParam(), + needBlockToken); + return (HdfsLocatedFileStatus) rpcClient.invokeSequential( + locations, method, HdfsFileStatus.class, null); + } + + @Override + public long[] getStats() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + + RemoteMethod method = new RemoteMethod("getStats"); + Set nss = namenodeResolver.getNamespaces(); + Map results = + rpcClient.invokeConcurrent(nss, method, true, false, long[].class); + long[] combinedData = new long[STATS_ARRAY_LENGTH]; + for (long[] data : results.values()) { + for (int i = 0; i < combinedData.length && i < data.length; i++) { + if (data[i] >= 0) { + combinedData[i] += data[i]; + } + } + } + return combinedData; + } + + @Override + public DatanodeInfo[] getDatanodeReport(HdfsConstants.DatanodeReportType type) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + return rpcServer.getDatanodeReport(type, true, 0); + } + + @Override + public DatanodeStorageReport[] getDatanodeStorageReport( + HdfsConstants.DatanodeReportType type) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + + Map dnSubcluster = + rpcServer.getDatanodeStorageReportMap(type); + + // Avoid repeating machines in multiple subclusters + Map datanodesMap = new LinkedHashMap<>(); + for (DatanodeStorageReport[] dns : dnSubcluster.values()) { + for (DatanodeStorageReport dn : dns) { + DatanodeInfo dnInfo = dn.getDatanodeInfo(); + String nodeId = dnInfo.getXferAddr(); + if (!datanodesMap.containsKey(nodeId)) { + datanodesMap.put(nodeId, dn); + } + // TODO merge somehow, right now it just takes the first one + } + } + + Collection datanodes = datanodesMap.values(); + DatanodeStorageReport[] combinedData = + new DatanodeStorageReport[datanodes.size()]; + combinedData = datanodes.toArray(combinedData); + return combinedData; + } + + @Override + public boolean setSafeMode(HdfsConstants.SafeModeAction action, + boolean isChecked) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // Set safe mode in all the name spaces + RemoteMethod method = new RemoteMethod("setSafeMode", + new Class[] {HdfsConstants.SafeModeAction.class, boolean.class}, + action, isChecked); + Set nss = namenodeResolver.getNamespaces(); + Map results = + rpcClient.invokeConcurrent( + nss, method, true, !isChecked, Boolean.class); + + // We only report true if all the name space are in safe mode + int numSafemode = 0; + for (boolean safemode : results.values()) { + if (safemode) { + numSafemode++; + } + } + return numSafemode == results.size(); + } + + @Override + public boolean restoreFailedStorage(String arg) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + + RemoteMethod method = new RemoteMethod("restoreFailedStorage", + new Class[] {String.class}, arg); + final Set nss = namenodeResolver.getNamespaces(); + Map ret = + rpcClient.invokeConcurrent(nss, method, true, false, Boolean.class); + + boolean success = true; + for (boolean s : ret.values()) { + if (!s) { + success = false; + break; + } + } + return success; + } + + @Override + public boolean saveNamespace(long timeWindow, long txGap) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + + RemoteMethod method = new RemoteMethod("saveNamespace", + new Class[] {Long.class, Long.class}, timeWindow, txGap); + final Set nss = namenodeResolver.getNamespaces(); + Map ret = + rpcClient.invokeConcurrent(nss, method, true, false, boolean.class); + + boolean success = true; + for (boolean s : ret.values()) { + if (!s) { + success = false; + break; + } + } + return success; + } + + @Override + public long rollEdits() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + RemoteMethod method = new RemoteMethod("rollEdits", new Class[] {}); + final Set nss = namenodeResolver.getNamespaces(); + Map ret = + rpcClient.invokeConcurrent(nss, method, true, false, long.class); + + // Return the maximum txid + long txid = 0; + for (long t : ret.values()) { + if (t > txid) { + txid = t; + } + } + return txid; + } + + @Override + public void refreshNodes() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + + RemoteMethod method = new RemoteMethod("refreshNodes", new Class[] {}); + final Set nss = namenodeResolver.getNamespaces(); + rpcClient.invokeConcurrent(nss, method, true, true); + } + + @Override + public void finalizeUpgrade() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + + RemoteMethod method = new RemoteMethod("finalizeUpgrade", + new Class[] {}); + final Set nss = namenodeResolver.getNamespaces(); + rpcClient.invokeConcurrent(nss, method, true, false); + } + + @Override + public boolean upgradeStatus() throws IOException { + String methodName = RouterRpcServer.getMethodName(); + throw new UnsupportedOperationException( + "Operation \"" + methodName + "\" is not supported"); + } + + @Override + public RollingUpgradeInfo rollingUpgrade(HdfsConstants.RollingUpgradeAction action) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + RemoteMethod method = new RemoteMethod("rollingUpgrade", + new Class[] {HdfsConstants.RollingUpgradeAction.class}, action); + final Set nss = namenodeResolver.getNamespaces(); + Map ret = + rpcClient.invokeConcurrent( + nss, method, true, false, RollingUpgradeInfo.class); + + // Return the first rolling upgrade info + RollingUpgradeInfo info = null; + for (RollingUpgradeInfo infoNs : ret.values()) { + if (info == null && infoNs != null) { + info = infoNs; + } + } + return info; + } + + @Override + public void metaSave(String filename) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + + RemoteMethod method = new RemoteMethod("metaSave", + new Class[] {String.class}, filename); + final Set nss = namenodeResolver.getNamespaces(); + rpcClient.invokeConcurrent(nss, method, true, false); + } + + @Override + public CorruptFileBlocks listCorruptFileBlocks(String path, String cookie) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + final List locations = + rpcServer.getLocationsForPath(path, false); + RemoteMethod method = new RemoteMethod("listCorruptFileBlocks", + new Class[] {String.class, String.class}, + new RemoteParam(), cookie); + return rpcClient.invokeSequential( + locations, method, CorruptFileBlocks.class, null); + } + + @Override + public void setBalancerBandwidth(long bandwidth) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.UNCHECKED); + + RemoteMethod method = new RemoteMethod("setBalancerBandwidth", + new Class[] {Long.class}, bandwidth); + final Set nss = namenodeResolver.getNamespaces(); + rpcClient.invokeConcurrent(nss, method, true, false); + } + + @Override + public ContentSummary getContentSummary(String path) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + // Get the summaries from regular files + Collection summaries = new LinkedList<>(); + FileNotFoundException notFoundException = null; + try { + final List locations = + rpcServer.getLocationsForPath(path, false); + RemoteMethod method = new RemoteMethod("getContentSummary", + new Class[] {String.class}, new RemoteParam()); + Map results = + rpcClient.invokeConcurrent( + locations, method, false, false, ContentSummary.class); + summaries.addAll(results.values()); + } catch (FileNotFoundException e) { + notFoundException = e; + } + + // Add mount points at this level in the tree + final List children = subclusterResolver.getMountPoints(path); + if (children != null) { + for (String child : children) { + Path childPath = new Path(path, child); + try { + ContentSummary mountSummary = getContentSummary(childPath.toString()); + if (mountSummary != null) { + summaries.add(mountSummary); + } + } catch (Exception e) { + LOG.error("Cannot get content summary for mount {}: {}", + childPath, e.getMessage()); + } + } + } + + // Throw original exception if no original nor mount points + if (summaries.isEmpty() && notFoundException != null) { + throw notFoundException; + } + + return aggregateContentSummary(summaries); + } + + @Override + public void fsync(String src, long fileId, String clientName, + long lastBlockLength) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("fsync", + new Class[] {String.class, long.class, String.class, long.class }, + new RemoteParam(), fileId, clientName, lastBlockLength); + rpcClient.invokeSequential(locations, method); + } + + @Override + public void setTimes(String src, long mtime, long atime) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("setTimes", + new Class[] {String.class, long.class, long.class}, + new RemoteParam(), mtime, atime); + rpcClient.invokeSequential(locations, method); + } + + @Override + public void createSymlink(String target, String link, FsPermission dirPerms, + boolean createParent) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // TODO Verify that the link location is in the same NS as the targets + final List targetLocations = + rpcServer.getLocationsForPath(target, true); + final List linkLocations = + rpcServer.getLocationsForPath(link, true); + RemoteLocation linkLocation = linkLocations.get(0); + RemoteMethod method = new RemoteMethod("createSymlink", + new Class[] {String.class, String.class, FsPermission.class, + boolean.class}, + new RemoteParam(), linkLocation.getDest(), dirPerms, createParent); + rpcClient.invokeSequential(targetLocations, method); + } + + @Override + public String getLinkTarget(String path) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + final List locations = + rpcServer.getLocationsForPath(path, true); + RemoteMethod method = new RemoteMethod("getLinkTarget", + new Class[] {String.class}, new RemoteParam()); + return rpcClient.invokeSequential(locations, method, String.class, null); + } + + @Override // Client Protocol + public void allowSnapshot(String snapshotRoot) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override // Client Protocol + public void disallowSnapshot(String snapshot) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public void renameSnapshot(String snapshotRoot, String snapshotOldName, + String snapshotNewName) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public SnapshottableDirectoryStatus[] getSnapshottableDirListing() + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public SnapshotDiffReport getSnapshotDiffReport(String snapshotRoot, + String earlierSnapshotName, String laterSnapshotName) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public SnapshotDiffReportListing getSnapshotDiffReportListing( + String snapshotRoot, String earlierSnapshotName, String laterSnapshotName, + byte[] startPath, int index) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public long addCacheDirective(CacheDirectiveInfo path, + EnumSet flags) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + return 0; + } + + @Override + public void modifyCacheDirective(CacheDirectiveInfo directive, + EnumSet flags) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public void removeCacheDirective(long id) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public BatchedRemoteIterator.BatchedEntries listCacheDirectives( + long prevId, CacheDirectiveInfo filter) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public void addCachePool(CachePoolInfo info) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public void modifyCachePool(CachePoolInfo info) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public void removeCachePool(String cachePoolName) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public BatchedRemoteIterator.BatchedEntries listCachePools(String prevKey) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public void modifyAclEntries(String src, List aclSpec) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("modifyAclEntries", + new Class[] {String.class, List.class}, + new RemoteParam(), aclSpec); + rpcClient.invokeSequential(locations, method, null, null); + } + + @Override + public void removeAclEntries(String src, List aclSpec) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("removeAclEntries", + new Class[] {String.class, List.class}, + new RemoteParam(), aclSpec); + rpcClient.invokeSequential(locations, method, null, null); + } + + @Override + public void removeDefaultAcl(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("removeDefaultAcl", + new Class[] {String.class}, new RemoteParam()); + rpcClient.invokeSequential(locations, method); + } + + @Override + public void removeAcl(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("removeAcl", + new Class[] {String.class}, new RemoteParam()); + rpcClient.invokeSequential(locations, method); + } + + @Override + public void setAcl(String src, List aclSpec) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod( + "setAcl", new Class[] {String.class, List.class}, + new RemoteParam(), aclSpec); + rpcClient.invokeSequential(locations, method); + } + + @Override + public AclStatus getAclStatus(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, false); + RemoteMethod method = new RemoteMethod("getAclStatus", + new Class[] {String.class}, new RemoteParam()); + return rpcClient.invokeSequential(locations, method, AclStatus.class, null); + } + + @Override + public void createEncryptionZone(String src, String keyName) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("createEncryptionZone", + new Class[] {String.class, String.class}, + new RemoteParam(), keyName); + rpcClient.invokeSequential(locations, method); + } + + @Override + public EncryptionZone getEZForPath(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, false); + RemoteMethod method = new RemoteMethod("getEZForPath", + new Class[] {String.class}, new RemoteParam()); + return rpcClient.invokeSequential( + locations, method, EncryptionZone.class, null); + } + + @Override + public BatchedRemoteIterator.BatchedEntries listEncryptionZones(long prevId) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public void reencryptEncryptionZone(String zone, HdfsConstants.ReencryptAction action) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public BatchedRemoteIterator.BatchedEntries listReencryptionStatus( + long prevId) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public void setXAttr(String src, XAttr xAttr, EnumSet flag) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("setXAttr", + new Class[] {String.class, XAttr.class, EnumSet.class}, + new RemoteParam(), xAttr, flag); + rpcClient.invokeSequential(locations, method); + } + + @SuppressWarnings("unchecked") + @Override + public List getXAttrs(String src, List xAttrs) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, false); + RemoteMethod method = new RemoteMethod("getXAttrs", + new Class[] {String.class, List.class}, new RemoteParam(), xAttrs); + return (List) rpcClient.invokeSequential( + locations, method, List.class, null); + } + + @SuppressWarnings("unchecked") + @Override + public List listXAttrs(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, false); + RemoteMethod method = new RemoteMethod("listXAttrs", + new Class[] {String.class}, new RemoteParam()); + return (List) rpcClient.invokeSequential( + locations, method, List.class, null); + } + + @Override + public void removeXAttr(String src, XAttr xAttr) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(src, true); + RemoteMethod method = new RemoteMethod("removeXAttr", + new Class[] {String.class, XAttr.class}, new RemoteParam(), xAttr); + rpcClient.invokeSequential(locations, method); + } + + @Override + public void checkAccess(String path, FsAction mode) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + // TODO handle virtual directories + final List locations = + rpcServer.getLocationsForPath(path, true); + RemoteMethod method = new RemoteMethod("checkAccess", + new Class[] {String.class, FsAction.class}, + new RemoteParam(), mode); + rpcClient.invokeSequential(locations, method); + } + + @Override + public long getCurrentEditLogTxid() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ); + + RemoteMethod method = new RemoteMethod( + "getCurrentEditLogTxid", new Class[] {}); + final Set nss = namenodeResolver.getNamespaces(); + Map ret = + rpcClient.invokeConcurrent(nss, method, true, false, long.class); + + // Return the maximum txid + long txid = 0; + for (long t : ret.values()) { + if (t > txid) { + txid = t; + } + } + return txid; + } + + @Override + public EventBatchList getEditsFromTxid(long txid) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public DataEncryptionKey getDataEncryptionKey() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public String createSnapshot(String snapshotRoot, String snapshotName) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + return null; + } + + @Override + public void deleteSnapshot(String snapshotRoot, String snapshotName) + throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public void setQuota(String path, long namespaceQuota, long storagespaceQuota, + StorageType type) throws IOException { + rpcServer.getQuotaModule() + .setQuota(path, namespaceQuota, storagespaceQuota, type); + } + + @Override + public QuotaUsage getQuotaUsage(String path) throws IOException { + return rpcServer.getQuotaModule().getQuotaUsage(path); + } + + @Override + public void reportBadBlocks(LocatedBlock[] blocks) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE); + + // Block pool id -> blocks + Map> blockLocations = new HashMap<>(); + for (LocatedBlock block : blocks) { + String bpId = block.getBlock().getBlockPoolId(); + List bpBlocks = blockLocations.get(bpId); + if (bpBlocks == null) { + bpBlocks = new LinkedList<>(); + blockLocations.put(bpId, bpBlocks); + } + bpBlocks.add(block); + } + + // Invoke each block pool + for (Map.Entry> entry : blockLocations.entrySet()) { + String bpId = entry.getKey(); + List bpBlocks = entry.getValue(); + + LocatedBlock[] bpBlocksArray = + bpBlocks.toArray(new LocatedBlock[bpBlocks.size()]); + RemoteMethod method = new RemoteMethod("reportBadBlocks", + new Class[] {LocatedBlock[].class}, + new Object[] {bpBlocksArray}); + rpcClient.invokeSingleBlockPool(bpId, method); + } + } + + @Override + public void unsetStoragePolicy(String src) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + @Override + public BlockStoragePolicy getStoragePolicy(String path) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public ErasureCodingPolicyInfo[] getErasureCodingPolicies() + throws IOException { + return erasureCoding.getErasureCodingPolicies(); + } + + @Override + public Map getErasureCodingCodecs() throws IOException { + return erasureCoding.getErasureCodingCodecs(); + } + + @Override + public AddErasureCodingPolicyResponse[] addErasureCodingPolicies( + ErasureCodingPolicy[] policies) throws IOException { + return erasureCoding.addErasureCodingPolicies(policies); + } + + @Override + public void removeErasureCodingPolicy(String ecPolicyName) + throws IOException { + erasureCoding.removeErasureCodingPolicy(ecPolicyName); + } + + @Override + public void disableErasureCodingPolicy(String ecPolicyName) + throws IOException { + erasureCoding.disableErasureCodingPolicy(ecPolicyName); + } + + @Override + public void enableErasureCodingPolicy(String ecPolicyName) + throws IOException { + erasureCoding.enableErasureCodingPolicy(ecPolicyName); + } + + @Override + public ErasureCodingPolicy getErasureCodingPolicy(String src) + throws IOException { + return erasureCoding.getErasureCodingPolicy(src); + } + + @Override + public void setErasureCodingPolicy(String src, String ecPolicyName) + throws IOException { + erasureCoding.setErasureCodingPolicy(src, ecPolicyName); + } + + @Override + public void unsetErasureCodingPolicy(String src) throws IOException { + erasureCoding.unsetErasureCodingPolicy(src); + } + + @Override + public ECBlockGroupStats getECBlockGroupStats() throws IOException { + return erasureCoding.getECBlockGroupStats(); + } + + @Override + public ReplicatedBlockStats getReplicatedBlockStats() throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Deprecated + @Override + public BatchedRemoteIterator.BatchedEntries listOpenFiles(long prevId) + throws IOException { + return listOpenFiles(prevId, EnumSet.of(OpenFilesIterator.OpenFilesType.ALL_OPEN_FILES), + OpenFilesIterator.FILTER_PATH_DEFAULT); + } + + @Override + public BatchedRemoteIterator.BatchedEntries listOpenFiles(long prevId, + EnumSet openFilesTypes, String path) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.READ, false); + return null; + } + + @Override + public void satisfyStoragePolicy(String path) throws IOException { + rpcServer.checkOperation(NameNode.OperationCategory.WRITE, false); + } + + /** + * Determines combinations of eligible src/dst locations for a rename. A + * rename cannot change the namespace. Renames are only allowed if there is an + * eligible dst location in the same namespace as the source. + * + * @param srcLocations List of all potential source destinations where the + * path may be located. On return this list is trimmed to include + * only the paths that have corresponding destinations in the same + * namespace. + * @param dst The destination path + * @return A map of all eligible source namespaces and their corresponding + * replacement value. + * @throws IOException If the dst paths could not be determined. + */ + private RemoteParam getRenameDestinations( + final List srcLocations, final String dst) + throws IOException { + + final List dstLocations = + rpcServer.getLocationsForPath(dst, true); + final Map dstMap = new HashMap<>(); + + Iterator iterator = srcLocations.iterator(); + while (iterator.hasNext()) { + RemoteLocation srcLocation = iterator.next(); + RemoteLocation eligibleDst = + getFirstMatchingLocation(srcLocation, dstLocations); + if (eligibleDst != null) { + // Use this dst for this source location + dstMap.put(srcLocation, eligibleDst.getDest()); + } else { + // This src destination is not valid, remove from the source list + iterator.remove(); + } + } + return new RemoteParam(dstMap); + } + + /** + * Get first matching location. + * + * @param location Location we are looking for. + * @param locations List of locations. + * @return The first matchin location in the list. + */ + private RemoteLocation getFirstMatchingLocation(RemoteLocation location, + List locations) { + for (RemoteLocation loc : locations) { + if (loc.getNameserviceId().equals(location.getNameserviceId())) { + // Return first matching location + return loc; + } + } + return null; + } + + /** + * Aggregate content summaries for each subcluster. + * + * @param summaries Collection of individual summaries. + * @return Aggregated content summary. + */ + private ContentSummary aggregateContentSummary( + Collection summaries) { + if (summaries.size() == 1) { + return summaries.iterator().next(); + } + + long length = 0; + long fileCount = 0; + long directoryCount = 0; + long quota = 0; + long spaceConsumed = 0; + long spaceQuota = 0; + + for (ContentSummary summary : summaries) { + length += summary.getLength(); + fileCount += summary.getFileCount(); + directoryCount += summary.getDirectoryCount(); + quota += summary.getQuota(); + spaceConsumed += summary.getSpaceConsumed(); + spaceQuota += summary.getSpaceQuota(); + } + + ContentSummary ret = new ContentSummary.Builder() + .length(length) + .fileCount(fileCount) + .directoryCount(directoryCount) + .quota(quota) + .spaceConsumed(spaceConsumed) + .spaceQuota(spaceQuota) + .build(); + return ret; + } + + /** + * Get the file info from all the locations. + * + * @param locations Locations to check. + * @param method The file information method to run. + * @return The first file info if it's a file, the directory if it's + * everywhere. + * @throws IOException If all the locations throw an exception. + */ + private HdfsFileStatus getFileInfoAll(final List locations, + final RemoteMethod method) throws IOException { + + // Get the file info from everybody + Map results = + rpcClient.invokeConcurrent(locations, method, HdfsFileStatus.class); + + // We return the first file + HdfsFileStatus dirStatus = null; + for (RemoteLocation loc : locations) { + HdfsFileStatus fileStatus = results.get(loc); + if (fileStatus != null) { + if (!fileStatus.isDirectory()) { + return fileStatus; + } else if (dirStatus == null) { + dirStatus = fileStatus; + } + } + } + return dirStatus; + } + + /** + * Get the permissions for the parent of a child with given permissions. + * Add implicit u+wx permission for parent. This is based on + * @{FSDirMkdirOp#addImplicitUwx}. + * @param mask The permission mask of the child. + * @return The permission mask of the parent. + */ + private static FsPermission getParentPermission(final FsPermission mask) { + FsPermission ret = new FsPermission( + mask.getUserAction().or(FsAction.WRITE_EXECUTE), + mask.getGroupAction(), + mask.getOtherAction()); + return ret; + } + + /** + * Check if a path should be in all subclusters. + * + * @param path Path to check. + * @return If a path should be in all subclusters. + */ + private boolean isPathAll(final String path) { + if (subclusterResolver instanceof MountTableResolver) { + try { + MountTableResolver mountTable = (MountTableResolver)subclusterResolver; + MountTable entry = mountTable.getMountPoint(path); + if (entry != null) { + return entry.isAll(); + } + } catch (IOException e) { + LOG.error("Cannot get mount point", e); + } + } + return false; + } + + /** + * Create a new file status for a mount point. + * + * @param name Name of the mount point. + * @param childrenNum Number of children. + * @param date Map with the dates. + * @return New HDFS file status representing a mount point. + */ + private HdfsFileStatus getMountPointStatus( + String name, int childrenNum, long date) { + long modTime = date; + long accessTime = date; + FsPermission permission = FsPermission.getDirDefault(); + String owner = this.superUser; + String group = this.superGroup; + try { + // TODO support users, it should be the user for the pointed folder + UserGroupInformation ugi = RouterRpcServer.getRemoteUser(); + owner = ugi.getUserName(); + group = ugi.getPrimaryGroupName(); + } catch (IOException e) { + LOG.error("Cannot get the remote user: {}", e.getMessage()); + } + long inodeId = 0; + return new HdfsFileStatus.Builder() + .isdir(true) + .mtime(modTime) + .atime(accessTime) + .perm(permission) + .owner(owner) + .group(group) + .symlink(new byte[0]) + .path(DFSUtil.string2Bytes(name)) + .fileId(inodeId) + .children(childrenNum) + .build(); + } + + /** + * Get the modification dates for mount points. + * + * @param path Name of the path to start checking dates from. + * @return Map with the modification dates for all sub-entries. + */ + private Map getMountPointDates(String path) { + Map ret = new TreeMap<>(); + if (subclusterResolver instanceof MountTableResolver) { + try { + final List children = subclusterResolver.getMountPoints(path); + for (String child : children) { + Long modTime = getModifiedTime(ret, path, child); + ret.put(child, modTime); + } + } catch (IOException e) { + LOG.error("Cannot get mount point", e); + } + } + return ret; + } + + /** + * Get modified time for child. If the child is present in mount table it + * will return the modified time. If the child is not present but subdirs of + * this child are present then it will return latest modified subdir's time + * as modified time of the requested child. + * + * @param ret contains children and modified times. + * @param path Name of the path to start checking dates from. + * @param child child of the requested path. + * @return modified time. + */ + private long getModifiedTime(Map ret, String path, + String child) { + MountTableResolver mountTable = (MountTableResolver)subclusterResolver; + String srcPath; + if (path.equals(Path.SEPARATOR)) { + srcPath = Path.SEPARATOR + child; + } else { + srcPath = path + Path.SEPARATOR + child; + } + Long modTime = 0L; + try { + // Get mount table entry for the srcPath + MountTable entry = mountTable.getMountPoint(srcPath); + // if srcPath is not in mount table but its subdirs are in mount + // table we will display latest modified subdir date/time. + if (entry == null) { + List entries = mountTable.getMounts(srcPath); + for (MountTable eachEntry : entries) { + // Get the latest date + if (ret.get(child) == null || + ret.get(child) < eachEntry.getDateModified()) { + modTime = eachEntry.getDateModified(); + } + } + } else { + modTime = entry.getDateModified(); + } + } catch (IOException e) { + LOG.error("Cannot get mount point", e); + } + return modTime; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java index 21a98717271..0cf884f15c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java @@ -27,6 +27,8 @@ import org.apache.hadoop.http.HttpServer2; import org.apache.hadoop.service.AbstractService; +import javax.servlet.ServletContext; + /** * Web interface for the {@link Router}. It exposes the Web UI and the WebHDFS * methods from {@link RouterWebHdfsMethods}. @@ -116,6 +118,9 @@ protected void serviceStop() throws Exception { private static void setupServlets( HttpServer2 httpServer, Configuration conf) { // TODO Add servlets for FSCK, etc + httpServer.addInternalServlet(IsRouterActiveServlet.SERVLET_NAME, + IsRouterActiveServlet.PATH_SPEC, + IsRouterActiveServlet.class); } public InetSocketAddress getHttpAddress() { @@ -125,4 +130,8 @@ public InetSocketAddress getHttpAddress() { public InetSocketAddress getHttpsAddress() { return this.httpsAddress; } + + public static Router getRouterFromContext(ServletContext context) { + return (Router)context.getAttribute(NAMENODE_ATTRIBUTE_KEY); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterNamenodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterNamenodeProtocol.java index 0433650b94b..bf0db6e7253 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterNamenodeProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterNamenodeProtocol.java @@ -184,4 +184,11 @@ public boolean isRollingUpgrade() throws IOException { rpcServer.checkOperation(OperationCategory.READ, false); return false; } + + @Override + public Long getNextSPSPath() throws IOException { + rpcServer.checkOperation(OperationCategory.READ, false); + // not supported + return null; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterPermissionChecker.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterPermissionChecker.java index 63d190c07cf..cf660d3133d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterPermissionChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterPermissionChecker.java @@ -21,8 +21,8 @@ import java.util.Arrays; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; @@ -35,7 +35,8 @@ * Class that helps in checking permissions in Router-based federation. */ public class RouterPermissionChecker extends FSPermissionChecker { - static final Log LOG = LogFactory.getLog(RouterPermissionChecker.class); + static final Logger LOG = + LoggerFactory.getLogger(RouterPermissionChecker.class); /** Mount table default permission. */ public static final short MOUNT_TABLE_PERMISSION_DEFAULT = 00755; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaManager.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaManager.java index 0df34fc6410..87a8724317e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaManager.java @@ -161,8 +161,8 @@ public boolean isQuotaSet(RouterQuotaUsage quota) { long ssQuota = quota.getSpaceQuota(); // once nsQuota or ssQuota was set, this mount table is quota set - if (nsQuota != HdfsConstants.QUOTA_DONT_SET - || ssQuota != HdfsConstants.QUOTA_DONT_SET) { + if (nsQuota != HdfsConstants.QUOTA_RESET + || ssQuota != HdfsConstants.QUOTA_RESET) { return true; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaUpdateService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaUpdateService.java index 506e2ee2c8c..4813b535e37 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaUpdateService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaUpdateService.java @@ -111,7 +111,7 @@ protected void periodicInvoke() { // If quota is not set in some subclusters under federation path, // set quota for this path. - if (currentQuotaUsage.getQuota() == HdfsConstants.QUOTA_DONT_SET) { + if (currentQuotaUsage.getQuota() == HdfsConstants.QUOTA_RESET) { try { this.rpcServer.setQuota(src, nsQuota, ssQuota, null); } catch (IOException ioe) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaUsage.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaUsage.java index eedd80f774d..18268aa71d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaUsage.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterQuotaUsage.java @@ -96,14 +96,14 @@ public void verifyStoragespaceQuota() throws DSQuotaExceededException { public String toString() { String nsQuota = String.valueOf(getQuota()); String nsCount = String.valueOf(getFileAndDirectoryCount()); - if (getQuota() == HdfsConstants.QUOTA_DONT_SET) { + if (getQuota() == HdfsConstants.QUOTA_RESET) { nsQuota = "-"; nsCount = "-"; } String ssQuota = StringUtils.byteDesc(getSpaceQuota()); String ssCount = StringUtils.byteDesc(getSpaceConsumed()); - if (getSpaceQuota() == HdfsConstants.QUOTA_DONT_SET) { + if (getSpaceQuota() == HdfsConstants.QUOTA_RESET) { ssQuota = "-"; ssCount = "-"; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java index 3eb72414288..56ca55ff6ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java @@ -92,8 +92,8 @@ LoggerFactory.getLogger(RouterRpcClient.class); - /** Router identifier. */ - private final String routerId; + /** Router using this RPC client. */ + private final Router router; /** Interface to identify the active NN for a nameservice or blockpool ID. */ private final ActiveNamenodeResolver namenodeResolver; @@ -116,12 +116,13 @@ * Create a router RPC client to manage remote procedure calls to NNs. * * @param conf Hdfs Configuation. + * @param router A router using this RPC client. * @param resolver A NN resolver to determine the currently active NN in HA. * @param monitor Optional performance monitor. */ - public RouterRpcClient(Configuration conf, String identifier, + public RouterRpcClient(Configuration conf, Router router, ActiveNamenodeResolver resolver, RouterRpcMonitor monitor) { - this.routerId = identifier; + this.router = router; this.namenodeResolver = resolver; @@ -343,7 +344,8 @@ private Object invokeMethod( if (namenodes == null || namenodes.isEmpty()) { throw new IOException("No namenodes to invoke " + method.getName() + - " with params " + Arrays.toString(params) + " from " + this.routerId); + " with params " + Arrays.toString(params) + " from " + + router.getRouterId()); } Object ret = null; @@ -1126,7 +1128,7 @@ public Object call() throws Exception { String msg = "Not enough client threads " + active + "/" + total; LOG.error(msg); throw new StandbyException( - "Router " + routerId + " is overloaded: " + msg); + "Router " + router.getRouterId() + " is overloaded: " + msg); } catch (InterruptedException ex) { LOG.error("Unexpected error while invoking API: {}", ex.getMessage()); throw new IOException( @@ -1150,7 +1152,7 @@ public Object call() throws Exception { if (namenodes == null || namenodes.isEmpty()) { throw new IOException("Cannot locate a registered namenode for " + nsId + - " from " + this.routerId); + " from " + router.getRouterId()); } return namenodes; } @@ -1171,7 +1173,7 @@ public Object call() throws Exception { if (namenodes == null || namenodes.isEmpty()) { throw new IOException("Cannot locate a registered namenode for " + bpId + - " from " + this.routerId); + " from " + router.getRouterId()); } return namenodes; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java index 716ebee9bc4..165b429cc6e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java @@ -33,16 +33,12 @@ import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; -import java.util.HashMap; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.CryptoProtocolVersion; @@ -54,7 +50,6 @@ import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.Options; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.QuotaUsage; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.XAttr; @@ -64,7 +59,6 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.AddBlockFlag; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.inotify.EventBatchList; import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse; @@ -93,7 +87,6 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.OpenFileEntry; -import org.apache.hadoop.hdfs.protocol.OpenFilesIterator; import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType; import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats; import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; @@ -101,8 +94,8 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing; import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus; -import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.NamenodeProtocolService; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol; import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB; import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB; import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB; @@ -167,11 +160,6 @@ /** Configuration for the RPC server. */ private Configuration conf; - /** Identifier for the super user. */ - private final String superUser; - /** Identifier for the super group. */ - private final String superGroup; - /** Router using this RPC server. */ private final Router router; @@ -193,25 +181,22 @@ /** Interface to map global name space to HDFS subcluster name spaces. */ private final FileSubclusterResolver subclusterResolver; - /** If we are in safe mode, fail requests as if a standby NN. */ - private volatile boolean safeMode; - /** Category of the operation that a thread is executing. */ private final ThreadLocal opCategory = new ThreadLocal<>(); // Modules implementing groups of RPC calls /** Router Quota calls. */ private final Quota quotaCall; - /** Erasure coding calls. */ - private final ErasureCoding erasureCoding; /** NamenodeProtocol calls. */ private final RouterNamenodeProtocol nnProto; - + /** ClientProtocol calls. */ + private final RouterClientProtocol clientProto; /** * Construct a router RPC server. * * @param configuration HDFS Configuration. + * @param router A router using this RPC server. * @param nnResolver The NN resolver instance to determine active NNs in HA. * @param fileResolver File resolver to resolve file paths to subclusters. * @throws IOException If the RPC server could not be created. @@ -226,12 +211,6 @@ public RouterRpcServer(Configuration configuration, Router router, this.namenodeResolver = nnResolver; this.subclusterResolver = fileResolver; - // User and group for reporting - this.superUser = System.getProperty("user.name"); - this.superGroup = this.conf.get( - DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY, - DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); - // RPC server settings int handlerCount = this.conf.getInt(DFS_ROUTER_HANDLER_COUNT_KEY, DFS_ROUTER_HANDLER_COUNT_DEFAULT); @@ -313,13 +292,13 @@ public RouterRpcServer(Configuration configuration, Router router, this.rpcMonitor = ReflectionUtils.newInstance(rpcMonitorClass, conf); // Create the client - this.rpcClient = new RouterRpcClient(this.conf, this.router.getRouterId(), + this.rpcClient = new RouterRpcClient(this.conf, this.router, this.namenodeResolver, this.rpcMonitor); // Initialize modules this.quotaCall = new Quota(this.router, this); - this.erasureCoding = new ErasureCoding(this); this.nnProto = new RouterNamenodeProtocol(this); + this.clientProto = new RouterClientProtocol(conf, this); } @Override @@ -373,6 +352,13 @@ public FileSubclusterResolver getSubclusterResolver() { return subclusterResolver; } + /** + * Get the active namenode resolver + */ + public ActiveNamenodeResolver getNamenodeResolver() { + return namenodeResolver; + } + /** * Get the RPC monitor and metrics. * @@ -414,7 +400,7 @@ public InetSocketAddress getRpcAddress() { * client requests. * @throws UnsupportedOperationException If the operation is not supported. */ - protected void checkOperation(OperationCategory op, boolean supported) + void checkOperation(OperationCategory op, boolean supported) throws StandbyException, UnsupportedOperationException { checkOperation(op); @@ -436,7 +422,7 @@ protected void checkOperation(OperationCategory op, boolean supported) * @throws SafeModeException If the Router is in safe mode and cannot serve * client requests. */ - protected void checkOperation(OperationCategory op) + void checkOperation(OperationCategory op) throws StandbyException { // Log the function we are currently calling. if (rpcMonitor != null) { @@ -456,7 +442,8 @@ protected void checkOperation(OperationCategory op) return; } - if (safeMode) { + RouterSafemodeService safemodeService = router.getSafemodeService(); + if (safemodeService != null && safemodeService.isInSafeMode()) { // Throw standby exception, router is not available if (rpcMonitor != null) { rpcMonitor.routerFailureSafemode(); @@ -467,77 +454,43 @@ protected void checkOperation(OperationCategory op) } /** - * In safe mode all RPC requests will fail and return a standby exception. - * The client will try another Router, similar to the client retry logic for - * HA. - * - * @param mode True if enabled, False if disabled. - */ - public void setSafeMode(boolean mode) { - this.safeMode = mode; - } - - /** - * Check if the Router is in safe mode and cannot serve RPC calls. + * Get the name of the method that is calling this function. * - * @return If the Router is in safe mode. + * @return Name of the method calling this function. */ - public boolean isInSafeMode() { - return this.safeMode; + static String getMethodName() { + final StackTraceElement[] stack = Thread.currentThread().getStackTrace(); + String methodName = stack[3].getMethodName(); + return methodName; } @Override // ClientProtocol public Token getDelegationToken(Text renewer) throws IOException { - checkOperation(OperationCategory.WRITE, false); - return null; - } - - /** - * The the delegation token from each name service. - * @param renewer - * @return Name service -> Token. - * @throws IOException - */ - public Map> - getDelegationTokens(Text renewer) throws IOException { - checkOperation(OperationCategory.WRITE, false); - return null; + return clientProto.getDelegationToken(renewer); } @Override // ClientProtocol public long renewDelegationToken(Token token) throws IOException { - checkOperation(OperationCategory.WRITE, false); - return 0; + return clientProto.renewDelegationToken(token); } @Override // ClientProtocol public void cancelDelegationToken(Token token) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.cancelDelegationToken(token); } @Override // ClientProtocol public LocatedBlocks getBlockLocations(String src, final long offset, final long length) throws IOException { - checkOperation(OperationCategory.READ); - - List locations = getLocationsForPath(src, false); - RemoteMethod remoteMethod = new RemoteMethod("getBlockLocations", - new Class[] {String.class, long.class, long.class}, - new RemoteParam(), offset, length); - return (LocatedBlocks) rpcClient.invokeSequential(locations, remoteMethod, - LocatedBlocks.class, null); + return clientProto.getBlockLocations(src, offset, length); } @Override // ClientProtocol public FsServerDefaults getServerDefaults() throws IOException { - checkOperation(OperationCategory.READ); - - RemoteMethod method = new RemoteMethod("getServerDefaults"); - String ns = subclusterResolver.getDefaultNamespace(); - return (FsServerDefaults) rpcClient.invokeSingle(ns, method); + return clientProto.getServerDefaults(); } @Override // ClientProtocol @@ -546,44 +499,8 @@ public HdfsFileStatus create(String src, FsPermission masked, boolean createParent, short replication, long blockSize, CryptoProtocolVersion[] supportedVersions, String ecPolicyName) throws IOException { - checkOperation(OperationCategory.WRITE); - - if (createParent && isPathAll(src)) { - int index = src.lastIndexOf(Path.SEPARATOR); - String parent = src.substring(0, index); - LOG.debug("Creating {} requires creating parent {}", src, parent); - FsPermission parentPermissions = getParentPermission(masked); - boolean success = mkdirs(parent, parentPermissions, createParent); - if (!success) { - // This shouldn't happen as mkdirs returns true or exception - LOG.error("Couldn't create parents for {}", src); - } - } - - RemoteLocation createLocation = getCreateLocation(src); - RemoteMethod method = new RemoteMethod("create", - new Class[] {String.class, FsPermission.class, String.class, - EnumSetWritable.class, boolean.class, short.class, - long.class, CryptoProtocolVersion[].class, - String.class}, - createLocation.getDest(), masked, clientName, flag, createParent, + return clientProto.create(src, masked, clientName, flag, createParent, replication, blockSize, supportedVersions, ecPolicyName); - return (HdfsFileStatus) rpcClient.invokeSingle(createLocation, method); - } - - /** - * Get the permissions for the parent of a child with given permissions. - * Add implicit u+wx permission for parent. This is based on - * @{FSDirMkdirOp#addImplicitUwx}. - * @param mask The permission mask of the child. - * @return The permission mask of the parent. - */ - private static FsPermission getParentPermission(final FsPermission mask) { - FsPermission ret = new FsPermission( - mask.getUserAction().or(FsAction.WRITE_EXECUTE), - mask.getGroupAction(), - mask.getOtherAction()); - return ret; } /** @@ -594,7 +511,7 @@ private static FsPermission getParentPermission(final FsPermission mask) { * @return The remote location for this file. * @throws IOException If the file has no creation location. */ - protected RemoteLocation getCreateLocation(final String src) + RemoteLocation getCreateLocation(final String src) throws IOException { final List locations = getLocationsForPath(src, true); @@ -635,100 +552,45 @@ protected RemoteLocation getCreateLocation(final String src) return createLocation; } - // Medium @Override // ClientProtocol public LastBlockWithStatus append(String src, final String clientName, final EnumSetWritable flag) throws IOException { - checkOperation(OperationCategory.WRITE); - - List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("append", - new Class[] {String.class, String.class, EnumSetWritable.class}, - new RemoteParam(), clientName, flag); - return rpcClient.invokeSequential( - locations, method, LastBlockWithStatus.class, null); + return clientProto.append(src, clientName, flag); } - // Low @Override // ClientProtocol public boolean recoverLease(String src, String clientName) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("recoverLease", - new Class[] {String.class, String.class}, new RemoteParam(), - clientName); - Object result = rpcClient.invokeSequential( - locations, method, Boolean.class, Boolean.TRUE); - return (boolean) result; + return clientProto.recoverLease(src, clientName); } @Override // ClientProtocol public boolean setReplication(String src, short replication) throws IOException { - checkOperation(OperationCategory.WRITE); - - List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("setReplication", - new Class[] {String.class, short.class}, new RemoteParam(), - replication); - Object result = rpcClient.invokeSequential( - locations, method, Boolean.class, Boolean.TRUE); - return (boolean) result; + return clientProto.setReplication(src, replication); } - @Override + @Override // ClientProtocol public void setStoragePolicy(String src, String policyName) throws IOException { - checkOperation(OperationCategory.WRITE); - - List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("setStoragePolicy", - new Class[] {String.class, String.class}, - new RemoteParam(), policyName); - rpcClient.invokeSequential(locations, method, null, null); + clientProto.setStoragePolicy(src, policyName); } - @Override + @Override // ClientProtocol public BlockStoragePolicy[] getStoragePolicies() throws IOException { - checkOperation(OperationCategory.READ); - - RemoteMethod method = new RemoteMethod("getStoragePolicies"); - String ns = subclusterResolver.getDefaultNamespace(); - return (BlockStoragePolicy[]) rpcClient.invokeSingle(ns, method); + return clientProto.getStoragePolicies(); } @Override // ClientProtocol public void setPermission(String src, FsPermission permissions) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("setPermission", - new Class[] {String.class, FsPermission.class}, - new RemoteParam(), permissions); - if (isPathAll(src)) { - rpcClient.invokeConcurrent(locations, method); - } else { - rpcClient.invokeSequential(locations, method); - } + clientProto.setPermission(src, permissions); } @Override // ClientProtocol public void setOwner(String src, String username, String groupname) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("setOwner", - new Class[] {String.class, String.class, String.class}, - new RemoteParam(), username, groupname); - if (isPathAll(src)) { - rpcClient.invokeConcurrent(locations, method); - } else { - rpcClient.invokeSequential(locations, method); - } + clientProto.setOwner(src, username, groupname); } /** @@ -740,18 +602,8 @@ public LocatedBlock addBlock(String src, String clientName, ExtendedBlock previous, DatanodeInfo[] excludedNodes, long fileId, String[] favoredNodes, EnumSet addBlockFlags) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("addBlock", - new Class[] {String.class, String.class, ExtendedBlock.class, - DatanodeInfo[].class, long.class, String[].class, - EnumSet.class}, - new RemoteParam(), clientName, previous, excludedNodes, fileId, - favoredNodes, addBlockFlags); - // TODO verify the excludedNodes and favoredNodes are acceptable to this NN - return (LocatedBlock) rpcClient.invokeSequential( - locations, method, LocatedBlock.class, null); + return clientProto.addBlock(src, clientName, previous, excludedNodes, + fileId, favoredNodes, addBlockFlags); } /** @@ -764,55 +616,26 @@ public LocatedBlock getAdditionalDatanode(final String src, final long fileId, final String[] existingStorageIDs, final DatanodeInfo[] excludes, final int numAdditionalNodes, final String clientName) throws IOException { - checkOperation(OperationCategory.READ); - - final List locations = getLocationsForPath(src, false); - RemoteMethod method = new RemoteMethod("getAdditionalDatanode", - new Class[] {String.class, long.class, ExtendedBlock.class, - DatanodeInfo[].class, String[].class, - DatanodeInfo[].class, int.class, String.class}, - new RemoteParam(), fileId, blk, existings, existingStorageIDs, excludes, - numAdditionalNodes, clientName); - return (LocatedBlock) rpcClient.invokeSequential( - locations, method, LocatedBlock.class, null); + return clientProto.getAdditionalDatanode(src, fileId, blk, existings, + existingStorageIDs, excludes, numAdditionalNodes, clientName); } @Override // ClientProtocol public void abandonBlock(ExtendedBlock b, long fileId, String src, String holder) throws IOException { - checkOperation(OperationCategory.WRITE); - - RemoteMethod method = new RemoteMethod("abandonBlock", - new Class[] {ExtendedBlock.class, long.class, String.class, - String.class}, - b, fileId, new RemoteParam(), holder); - rpcClient.invokeSingle(b, method); + clientProto.abandonBlock(b, fileId, src, holder); } @Override // ClientProtocol public boolean complete(String src, String clientName, ExtendedBlock last, long fileId) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("complete", - new Class[] {String.class, String.class, ExtendedBlock.class, - long.class}, - new RemoteParam(), clientName, last, fileId); - // Complete can return true/false, so don't expect a result - return ((Boolean) rpcClient.invokeSequential( - locations, method, Boolean.class, null)).booleanValue(); + return clientProto.complete(src, clientName, last, fileId); } @Override // ClientProtocol public LocatedBlock updateBlockForPipeline( ExtendedBlock block, String clientName) throws IOException { - checkOperation(OperationCategory.WRITE); - - RemoteMethod method = new RemoteMethod("updateBlockForPipeline", - new Class[] {ExtendedBlock.class, String.class}, - block, clientName); - return (LocatedBlock) rpcClient.invokeSingle(block, method); + return clientProto.updateBlockForPipeline(block, clientName); } /** @@ -823,462 +646,91 @@ public LocatedBlock updateBlockForPipeline( public void updatePipeline(String clientName, ExtendedBlock oldBlock, ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs) throws IOException { - checkOperation(OperationCategory.WRITE); - - RemoteMethod method = new RemoteMethod("updatePipeline", - new Class[] {String.class, ExtendedBlock.class, ExtendedBlock.class, - DatanodeID[].class, String[].class}, - clientName, oldBlock, newBlock, newNodes, newStorageIDs); - rpcClient.invokeSingle(oldBlock, method); + clientProto.updatePipeline(clientName, oldBlock, newBlock, newNodes, + newStorageIDs); } @Override // ClientProtocol public long getPreferredBlockSize(String src) throws IOException { - checkOperation(OperationCategory.READ); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("getPreferredBlockSize", - new Class[] {String.class}, new RemoteParam()); - return ((Long) rpcClient.invokeSequential( - locations, method, Long.class, null)).longValue(); - } - - /** - * Determines combinations of eligible src/dst locations for a rename. A - * rename cannot change the namespace. Renames are only allowed if there is an - * eligible dst location in the same namespace as the source. - * - * @param srcLocations List of all potential source destinations where the - * path may be located. On return this list is trimmed to include - * only the paths that have corresponding destinations in the same - * namespace. - * @param dst The destination path - * @return A map of all eligible source namespaces and their corresponding - * replacement value. - * @throws IOException If the dst paths could not be determined. - */ - private RemoteParam getRenameDestinations( - final List srcLocations, final String dst) - throws IOException { - - final List dstLocations = getLocationsForPath(dst, true); - final Map dstMap = new HashMap<>(); - - Iterator iterator = srcLocations.iterator(); - while (iterator.hasNext()) { - RemoteLocation srcLocation = iterator.next(); - RemoteLocation eligibleDst = - getFirstMatchingLocation(srcLocation, dstLocations); - if (eligibleDst != null) { - // Use this dst for this source location - dstMap.put(srcLocation, eligibleDst.getDest()); - } else { - // This src destination is not valid, remove from the source list - iterator.remove(); - } - } - return new RemoteParam(dstMap); - } - - /** - * Get first matching location. - * - * @param location Location we are looking for. - * @param locations List of locations. - * @return The first matchin location in the list. - */ - private RemoteLocation getFirstMatchingLocation(RemoteLocation location, - List locations) { - for (RemoteLocation loc : locations) { - if (loc.getNameserviceId().equals(location.getNameserviceId())) { - // Return first matching location - return loc; - } - } - return null; + return clientProto.getPreferredBlockSize(src); } @Deprecated @Override // ClientProtocol public boolean rename(final String src, final String dst) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List srcLocations = - getLocationsForPath(src, true, false); - // srcLocations may be trimmed by getRenameDestinations() - final List locs = new LinkedList<>(srcLocations); - RemoteParam dstParam = getRenameDestinations(locs, dst); - if (locs.isEmpty()) { - throw new IOException( - "Rename of " + src + " to " + dst + " is not allowed," + - " no eligible destination in the same namespace was found."); - } - RemoteMethod method = new RemoteMethod("rename", - new Class[] {String.class, String.class}, - new RemoteParam(), dstParam); - return ((Boolean) rpcClient.invokeSequential( - locs, method, Boolean.class, Boolean.TRUE)).booleanValue(); + return clientProto.rename(src, dst); } @Override // ClientProtocol public void rename2(final String src, final String dst, final Options.Rename... options) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List srcLocations = - getLocationsForPath(src, true, false); - // srcLocations may be trimmed by getRenameDestinations() - final List locs = new LinkedList<>(srcLocations); - RemoteParam dstParam = getRenameDestinations(locs, dst); - if (locs.isEmpty()) { - throw new IOException( - "Rename of " + src + " to " + dst + " is not allowed," + - " no eligible destination in the same namespace was found."); - } - RemoteMethod method = new RemoteMethod("rename2", - new Class[] {String.class, String.class, options.getClass()}, - new RemoteParam(), dstParam, options); - rpcClient.invokeSequential(locs, method, null, null); + clientProto.rename2(src, dst, options); } @Override // ClientProtocol public void concat(String trg, String[] src) throws IOException { - checkOperation(OperationCategory.WRITE); - - // See if the src and target files are all in the same namespace - LocatedBlocks targetBlocks = getBlockLocations(trg, 0, 1); - if (targetBlocks == null) { - throw new IOException("Cannot locate blocks for target file - " + trg); - } - LocatedBlock lastLocatedBlock = targetBlocks.getLastLocatedBlock(); - String targetBlockPoolId = lastLocatedBlock.getBlock().getBlockPoolId(); - for (String source : src) { - LocatedBlocks sourceBlocks = getBlockLocations(source, 0, 1); - if (sourceBlocks == null) { - throw new IOException( - "Cannot located blocks for source file " + source); - } - String sourceBlockPoolId = - sourceBlocks.getLastLocatedBlock().getBlock().getBlockPoolId(); - if (!sourceBlockPoolId.equals(targetBlockPoolId)) { - throw new IOException("Cannot concatenate source file " + source - + " because it is located in a different namespace" - + " with block pool id " + sourceBlockPoolId - + " from the target file with block pool id " - + targetBlockPoolId); - } - } - - // Find locations in the matching namespace. - final RemoteLocation targetDestination = - getLocationForPath(trg, true, targetBlockPoolId); - String[] sourceDestinations = new String[src.length]; - for (int i = 0; i < src.length; i++) { - String sourceFile = src[i]; - RemoteLocation location = - getLocationForPath(sourceFile, true, targetBlockPoolId); - sourceDestinations[i] = location.getDest(); - } - // Invoke - RemoteMethod method = new RemoteMethod("concat", - new Class[] {String.class, String[].class}, - targetDestination.getDest(), sourceDestinations); - rpcClient.invokeSingle(targetDestination, method); + clientProto.concat(trg, src); } @Override // ClientProtocol public boolean truncate(String src, long newLength, String clientName) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("truncate", - new Class[] {String.class, long.class, String.class}, - new RemoteParam(), newLength, clientName); - return ((Boolean) rpcClient.invokeSequential(locations, method, - Boolean.class, Boolean.TRUE)).booleanValue(); + return clientProto.truncate(src, newLength, clientName); } @Override // ClientProtocol public boolean delete(String src, boolean recursive) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = - getLocationsForPath(src, true, false); - RemoteMethod method = new RemoteMethod("delete", - new Class[] {String.class, boolean.class}, new RemoteParam(), - recursive); - if (isPathAll(src)) { - return rpcClient.invokeAll(locations, method); - } else { - return rpcClient.invokeSequential(locations, method, - Boolean.class, Boolean.TRUE).booleanValue(); - } + return clientProto.delete(src, recursive); } @Override // ClientProtocol public boolean mkdirs(String src, FsPermission masked, boolean createParent) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("mkdirs", - new Class[] {String.class, FsPermission.class, boolean.class}, - new RemoteParam(), masked, createParent); - - // Create in all locations - if (isPathAll(src)) { - return rpcClient.invokeAll(locations, method); - } - - if (locations.size() > 1) { - // Check if this directory already exists - try { - HdfsFileStatus fileStatus = getFileInfo(src); - if (fileStatus != null) { - // When existing, the NN doesn't return an exception; return true - return true; - } - } catch (IOException ioe) { - // Can't query if this file exists or not. - LOG.error("Error requesting file info for path {} while proxing mkdirs", - src, ioe); - } - } - - RemoteLocation firstLocation = locations.get(0); - return ((Boolean) rpcClient.invokeSingle(firstLocation, method)) - .booleanValue(); + return clientProto.mkdirs(src, masked, createParent); } @Override // ClientProtocol public void renewLease(String clientName) throws IOException { - checkOperation(OperationCategory.WRITE); - - RemoteMethod method = new RemoteMethod("renewLease", - new Class[] {String.class}, clientName); - Set nss = namenodeResolver.getNamespaces(); - rpcClient.invokeConcurrent(nss, method, false, false); + clientProto.renewLease(clientName); } @Override // ClientProtocol public DirectoryListing getListing(String src, byte[] startAfter, boolean needLocation) throws IOException { - checkOperation(OperationCategory.READ); - - // Locate the dir and fetch the listing - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("getListing", - new Class[] {String.class, startAfter.getClass(), boolean.class}, - new RemoteParam(), startAfter, needLocation); - Map listings = - rpcClient.invokeConcurrent( - locations, method, false, false, DirectoryListing.class); - - Map nnListing = new TreeMap<>(); - int totalRemainingEntries = 0; - int remainingEntries = 0; - boolean namenodeListingExists = false; - if (listings != null) { - // Check the subcluster listing with the smallest name - String lastName = null; - for (Entry entry : - listings.entrySet()) { - RemoteLocation location = entry.getKey(); - DirectoryListing listing = entry.getValue(); - if (listing == null) { - LOG.debug("Cannot get listing from {}", location); - } else { - totalRemainingEntries += listing.getRemainingEntries(); - HdfsFileStatus[] partialListing = listing.getPartialListing(); - int length = partialListing.length; - if (length > 0) { - HdfsFileStatus lastLocalEntry = partialListing[length-1]; - String lastLocalName = lastLocalEntry.getLocalName(); - if (lastName == null || lastName.compareTo(lastLocalName) > 0) { - lastName = lastLocalName; - } - } - } - } - - // Add existing entries - for (Object value : listings.values()) { - DirectoryListing listing = (DirectoryListing) value; - if (listing != null) { - namenodeListingExists = true; - for (HdfsFileStatus file : listing.getPartialListing()) { - String filename = file.getLocalName(); - if (totalRemainingEntries > 0 && filename.compareTo(lastName) > 0) { - // Discarding entries further than the lastName - remainingEntries++; - } else { - nnListing.put(filename, file); - } - } - remainingEntries += listing.getRemainingEntries(); - } - } - } - - // Add mount points at this level in the tree - final List children = subclusterResolver.getMountPoints(src); - if (children != null) { - // Get the dates for each mount point - Map dates = getMountPointDates(src); - - // Create virtual folder with the mount name - for (String child : children) { - long date = 0; - if (dates != null && dates.containsKey(child)) { - date = dates.get(child); - } - // TODO add number of children - HdfsFileStatus dirStatus = getMountPointStatus(child, 0, date); - - // This may overwrite existing listing entries with the mount point - // TODO don't add if already there? - nnListing.put(child, dirStatus); - } - } - - if (!namenodeListingExists && nnListing.size() == 0) { - // NN returns a null object if the directory cannot be found and has no - // listing. If we didn't retrieve any NN listing data, and there are no - // mount points here, return null. - return null; - } - - // Generate combined listing - HdfsFileStatus[] combinedData = new HdfsFileStatus[nnListing.size()]; - combinedData = nnListing.values().toArray(combinedData); - return new DirectoryListing(combinedData, remainingEntries); + return clientProto.getListing(src, startAfter, needLocation); } @Override // ClientProtocol public HdfsFileStatus getFileInfo(String src) throws IOException { - checkOperation(OperationCategory.READ); - - final List locations = getLocationsForPath(src, false); - RemoteMethod method = new RemoteMethod("getFileInfo", - new Class[] {String.class}, new RemoteParam()); - - HdfsFileStatus ret = null; - // If it's a directory, we check in all locations - if (isPathAll(src)) { - ret = getFileInfoAll(locations, method); - } else { - // Check for file information sequentially - ret = (HdfsFileStatus) rpcClient.invokeSequential( - locations, method, HdfsFileStatus.class, null); - } - - // If there is no real path, check mount points - if (ret == null) { - List children = subclusterResolver.getMountPoints(src); - if (children != null && !children.isEmpty()) { - Map dates = getMountPointDates(src); - long date = 0; - if (dates != null && dates.containsKey(src)) { - date = dates.get(src); - } - ret = getMountPointStatus(src, children.size(), date); - } - } - - return ret; - } - - /** - * Get the file info from all the locations. - * - * @param locations Locations to check. - * @param method The file information method to run. - * @return The first file info if it's a file, the directory if it's - * everywhere. - * @throws IOException If all the locations throw an exception. - */ - private HdfsFileStatus getFileInfoAll(final List locations, - final RemoteMethod method) throws IOException { - - // Get the file info from everybody - Map results = - rpcClient.invokeConcurrent(locations, method, HdfsFileStatus.class); - - // We return the first file - HdfsFileStatus dirStatus = null; - for (RemoteLocation loc : locations) { - HdfsFileStatus fileStatus = results.get(loc); - if (fileStatus != null) { - if (!fileStatus.isDirectory()) { - return fileStatus; - } else if (dirStatus == null) { - dirStatus = fileStatus; - } - } - } - return dirStatus; + return clientProto.getFileInfo(src); } @Override // ClientProtocol public boolean isFileClosed(String src) throws IOException { - checkOperation(OperationCategory.READ); - - final List locations = getLocationsForPath(src, false); - RemoteMethod method = new RemoteMethod("isFileClosed", - new Class[] {String.class}, new RemoteParam()); - return ((Boolean) rpcClient.invokeSequential( - locations, method, Boolean.class, Boolean.TRUE)).booleanValue(); + return clientProto.isFileClosed(src); } @Override // ClientProtocol public HdfsFileStatus getFileLinkInfo(String src) throws IOException { - checkOperation(OperationCategory.READ); - - final List locations = getLocationsForPath(src, false); - RemoteMethod method = new RemoteMethod("getFileLinkInfo", - new Class[] {String.class}, new RemoteParam()); - return (HdfsFileStatus) rpcClient.invokeSequential( - locations, method, HdfsFileStatus.class, null); + return clientProto.getFileLinkInfo(src); } - @Override + @Override // ClientProtocol public HdfsLocatedFileStatus getLocatedFileInfo(String src, boolean needBlockToken) throws IOException { - checkOperation(OperationCategory.READ); - final List locations = getLocationsForPath(src, false); - RemoteMethod method = new RemoteMethod("getLocatedFileInfo", - new Class[] {String.class, boolean.class}, new RemoteParam(), - Boolean.valueOf(needBlockToken)); - return (HdfsLocatedFileStatus) rpcClient.invokeSequential( - locations, method, HdfsFileStatus.class, null); + return clientProto.getLocatedFileInfo(src, needBlockToken); } @Override // ClientProtocol public long[] getStats() throws IOException { - checkOperation(OperationCategory.UNCHECKED); - - RemoteMethod method = new RemoteMethod("getStats"); - Set nss = namenodeResolver.getNamespaces(); - Map results = - rpcClient.invokeConcurrent(nss, method, true, false, long[].class); - long[] combinedData = new long[STATS_ARRAY_LENGTH]; - for (long[] data : results.values()) { - for (int i = 0; i < combinedData.length && i < data.length; i++) { - if (data[i] >= 0) { - combinedData[i] += data[i]; - } - } - } - return combinedData; + return clientProto.getStats(); } @Override // ClientProtocol public DatanodeInfo[] getDatanodeReport(DatanodeReportType type) throws IOException { - checkOperation(OperationCategory.UNCHECKED); - return getDatanodeReport(type, true, 0); + return clientProto.getDatanodeReport(type); } /** @@ -1327,29 +779,7 @@ public HdfsLocatedFileStatus getLocatedFileInfo(String src, @Override // ClientProtocol public DatanodeStorageReport[] getDatanodeStorageReport( DatanodeReportType type) throws IOException { - checkOperation(OperationCategory.UNCHECKED); - - Map dnSubcluster = - getDatanodeStorageReportMap(type); - - // Avoid repeating machines in multiple subclusters - Map datanodesMap = new LinkedHashMap<>(); - for (DatanodeStorageReport[] dns : dnSubcluster.values()) { - for (DatanodeStorageReport dn : dns) { - DatanodeInfo dnInfo = dn.getDatanodeInfo(); - String nodeId = dnInfo.getXferAddr(); - if (!datanodesMap.containsKey(nodeId)) { - datanodesMap.put(nodeId, dn); - } - // TODO merge somehow, right now it just takes the first one - } - } - - Collection datanodes = datanodesMap.values(); - DatanodeStorageReport[] combinedData = - new DatanodeStorageReport[datanodes.size()]; - combinedData = datanodes.toArray(combinedData); - return combinedData; + return clientProto.getDatanodeStorageReport(type); } /** @@ -1382,741 +812,388 @@ public HdfsLocatedFileStatus getLocatedFileInfo(String src, @Override // ClientProtocol public boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException { - checkOperation(OperationCategory.WRITE); - - // Set safe mode in all the name spaces - RemoteMethod method = new RemoteMethod("setSafeMode", - new Class[] {SafeModeAction.class, boolean.class}, - action, isChecked); - Set nss = namenodeResolver.getNamespaces(); - Map results = - rpcClient.invokeConcurrent( - nss, method, true, !isChecked, Boolean.class); - - // We only report true if all the name space are in safe mode - int numSafemode = 0; - for (boolean safemode : results.values()) { - if (safemode) { - numSafemode++; - } - } - return numSafemode == results.size(); + return clientProto.setSafeMode(action, isChecked); } @Override // ClientProtocol public boolean restoreFailedStorage(String arg) throws IOException { - checkOperation(OperationCategory.UNCHECKED); - - RemoteMethod method = new RemoteMethod("restoreFailedStorage", - new Class[] {String.class}, arg); - final Set nss = namenodeResolver.getNamespaces(); - Map ret = - rpcClient.invokeConcurrent(nss, method, true, false, Boolean.class); - - boolean success = true; - for (boolean s : ret.values()) { - if (!s) { - success = false; - break; - } - } - return success; + return clientProto.restoreFailedStorage(arg); } @Override // ClientProtocol public boolean saveNamespace(long timeWindow, long txGap) throws IOException { - checkOperation(OperationCategory.UNCHECKED); - - RemoteMethod method = new RemoteMethod("saveNamespace", - new Class[] {Long.class, Long.class}, timeWindow, txGap); - final Set nss = namenodeResolver.getNamespaces(); - Map ret = - rpcClient.invokeConcurrent(nss, method, true, false, boolean.class); - - boolean success = true; - for (boolean s : ret.values()) { - if (!s) { - success = false; - break; - } - } - return success; + return clientProto.saveNamespace(timeWindow, txGap); } @Override // ClientProtocol public long rollEdits() throws IOException { - checkOperation(OperationCategory.WRITE); - - RemoteMethod method = new RemoteMethod("rollEdits", new Class[] {}); - final Set nss = namenodeResolver.getNamespaces(); - Map ret = - rpcClient.invokeConcurrent(nss, method, true, false, long.class); - - // Return the maximum txid - long txid = 0; - for (long t : ret.values()) { - if (t > txid) { - txid = t; - } - } - return txid; + return clientProto.rollEdits(); } @Override // ClientProtocol public void refreshNodes() throws IOException { - checkOperation(OperationCategory.UNCHECKED); - - RemoteMethod method = new RemoteMethod("refreshNodes", new Class[] {}); - final Set nss = namenodeResolver.getNamespaces(); - rpcClient.invokeConcurrent(nss, method, true, true); + clientProto.refreshNodes(); } @Override // ClientProtocol public void finalizeUpgrade() throws IOException { - checkOperation(OperationCategory.UNCHECKED); - - RemoteMethod method = new RemoteMethod("finalizeUpgrade", - new Class[] {}); - final Set nss = namenodeResolver.getNamespaces(); - rpcClient.invokeConcurrent(nss, method, true, false); + clientProto.finalizeUpgrade(); } @Override // ClientProtocol public boolean upgradeStatus() throws IOException { - String methodName = getMethodName(); - throw new UnsupportedOperationException( - "Operation \"" + methodName + "\" is not supported"); + return clientProto.upgradeStatus(); } @Override // ClientProtocol public RollingUpgradeInfo rollingUpgrade(RollingUpgradeAction action) throws IOException { - checkOperation(OperationCategory.READ); - - RemoteMethod method = new RemoteMethod("rollingUpgrade", - new Class[] {RollingUpgradeAction.class}, action); - final Set nss = namenodeResolver.getNamespaces(); - Map ret = - rpcClient.invokeConcurrent( - nss, method, true, false, RollingUpgradeInfo.class); - - // Return the first rolling upgrade info - RollingUpgradeInfo info = null; - for (RollingUpgradeInfo infoNs : ret.values()) { - if (info == null && infoNs != null) { - info = infoNs; - } - } - return info; + return clientProto.rollingUpgrade(action); } @Override // ClientProtocol public void metaSave(String filename) throws IOException { - checkOperation(OperationCategory.UNCHECKED); - - RemoteMethod method = new RemoteMethod("metaSave", - new Class[] {String.class}, filename); - final Set nss = namenodeResolver.getNamespaces(); - rpcClient.invokeConcurrent(nss, method, true, false); + clientProto.metaSave(filename); } @Override // ClientProtocol public CorruptFileBlocks listCorruptFileBlocks(String path, String cookie) throws IOException { - checkOperation(OperationCategory.READ); - - final List locations = getLocationsForPath(path, false); - RemoteMethod method = new RemoteMethod("listCorruptFileBlocks", - new Class[] {String.class, String.class}, - new RemoteParam(), cookie); - return (CorruptFileBlocks) rpcClient.invokeSequential( - locations, method, CorruptFileBlocks.class, null); + return clientProto.listCorruptFileBlocks(path, cookie); } @Override // ClientProtocol public void setBalancerBandwidth(long bandwidth) throws IOException { - checkOperation(OperationCategory.UNCHECKED); - - RemoteMethod method = new RemoteMethod("setBalancerBandwidth", - new Class[] {Long.class}, bandwidth); - final Set nss = namenodeResolver.getNamespaces(); - rpcClient.invokeConcurrent(nss, method, true, false); + clientProto.setBalancerBandwidth(bandwidth); } @Override // ClientProtocol public ContentSummary getContentSummary(String path) throws IOException { - checkOperation(OperationCategory.READ); - - // Get the summaries from regular files - Collection summaries = new LinkedList<>(); - FileNotFoundException notFoundException = null; - try { - final List locations = getLocationsForPath(path, false); - RemoteMethod method = new RemoteMethod("getContentSummary", - new Class[] {String.class}, new RemoteParam()); - Map results = - rpcClient.invokeConcurrent( - locations, method, false, false, ContentSummary.class); - summaries.addAll(results.values()); - } catch (FileNotFoundException e) { - notFoundException = e; - } - - // Add mount points at this level in the tree - final List children = subclusterResolver.getMountPoints(path); - if (children != null) { - for (String child : children) { - Path childPath = new Path(path, child); - try { - ContentSummary mountSummary = getContentSummary(childPath.toString()); - if (mountSummary != null) { - summaries.add(mountSummary); - } - } catch (Exception e) { - LOG.error("Cannot get content summary for mount {}: {}", - childPath, e.getMessage()); - } - } - } - - // Throw original exception if no original nor mount points - if (summaries.isEmpty() && notFoundException != null) { - throw notFoundException; - } - - return aggregateContentSummary(summaries); - } - - /** - * Aggregate content summaries for each subcluster. - * - * @param summaries Collection of individual summaries. - * @return Aggregated content summary. - */ - private ContentSummary aggregateContentSummary( - Collection summaries) { - if (summaries.size() == 1) { - return summaries.iterator().next(); - } - - long length = 0; - long fileCount = 0; - long directoryCount = 0; - long quota = 0; - long spaceConsumed = 0; - long spaceQuota = 0; - - for (ContentSummary summary : summaries) { - length += summary.getLength(); - fileCount += summary.getFileCount(); - directoryCount += summary.getDirectoryCount(); - quota += summary.getQuota(); - spaceConsumed += summary.getSpaceConsumed(); - spaceQuota += summary.getSpaceQuota(); - } - - ContentSummary ret = new ContentSummary.Builder() - .length(length) - .fileCount(fileCount) - .directoryCount(directoryCount) - .quota(quota) - .spaceConsumed(spaceConsumed) - .spaceQuota(spaceQuota) - .build(); - return ret; + return clientProto.getContentSummary(path); } @Override // ClientProtocol public void fsync(String src, long fileId, String clientName, long lastBlockLength) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("fsync", - new Class[] {String.class, long.class, String.class, long.class }, - new RemoteParam(), fileId, clientName, lastBlockLength); - rpcClient.invokeSequential(locations, method); + clientProto.fsync(src, fileId, clientName, lastBlockLength); } @Override // ClientProtocol public void setTimes(String src, long mtime, long atime) throws IOException { - checkOperation(OperationCategory.WRITE); - - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("setTimes", - new Class[] {String.class, long.class, long.class}, - new RemoteParam(), mtime, atime); - rpcClient.invokeSequential(locations, method); + clientProto.setTimes(src, mtime, atime); } @Override // ClientProtocol public void createSymlink(String target, String link, FsPermission dirPerms, boolean createParent) throws IOException { - checkOperation(OperationCategory.WRITE); - - // TODO Verify that the link location is in the same NS as the targets - final List targetLocations = - getLocationsForPath(target, true); - final List linkLocations = - getLocationsForPath(link, true); - RemoteLocation linkLocation = linkLocations.get(0); - RemoteMethod method = new RemoteMethod("createSymlink", - new Class[] {String.class, String.class, FsPermission.class, - boolean.class}, - new RemoteParam(), linkLocation.getDest(), dirPerms, createParent); - rpcClient.invokeSequential(targetLocations, method); + clientProto.createSymlink(target, link, dirPerms, createParent); } @Override // ClientProtocol public String getLinkTarget(String path) throws IOException { - checkOperation(OperationCategory.READ); - - final List locations = getLocationsForPath(path, true); - RemoteMethod method = new RemoteMethod("getLinkTarget", - new Class[] {String.class}, new RemoteParam()); - return (String) rpcClient.invokeSequential( - locations, method, String.class, null); + return clientProto.getLinkTarget(path); } @Override // Client Protocol public void allowSnapshot(String snapshotRoot) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.allowSnapshot(snapshotRoot); } @Override // Client Protocol public void disallowSnapshot(String snapshot) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.disallowSnapshot(snapshot); } @Override // ClientProtocol public void renameSnapshot(String snapshotRoot, String snapshotOldName, String snapshotNewName) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.renameSnapshot(snapshotRoot, snapshotOldName, snapshotNewName); } @Override // Client Protocol public SnapshottableDirectoryStatus[] getSnapshottableDirListing() throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.getSnapshottableDirListing(); } @Override // ClientProtocol public SnapshotDiffReport getSnapshotDiffReport(String snapshotRoot, String earlierSnapshotName, String laterSnapshotName) throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.getSnapshotDiffReport( + snapshotRoot, earlierSnapshotName, laterSnapshotName); } @Override // ClientProtocol public SnapshotDiffReportListing getSnapshotDiffReportListing( String snapshotRoot, String earlierSnapshotName, String laterSnapshotName, byte[] startPath, int index) throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.getSnapshotDiffReportListing(snapshotRoot, + earlierSnapshotName, laterSnapshotName, startPath, index); } @Override // ClientProtocol public long addCacheDirective(CacheDirectiveInfo path, EnumSet flags) throws IOException { - checkOperation(OperationCategory.WRITE, false); - return 0; + return clientProto.addCacheDirective(path, flags); } @Override // ClientProtocol public void modifyCacheDirective(CacheDirectiveInfo directive, EnumSet flags) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.modifyCacheDirective(directive, flags); } @Override // ClientProtocol public void removeCacheDirective(long id) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.removeCacheDirective(id); } @Override // ClientProtocol public BatchedEntries listCacheDirectives( long prevId, CacheDirectiveInfo filter) throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.listCacheDirectives(prevId, filter); } @Override // ClientProtocol public void addCachePool(CachePoolInfo info) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.addCachePool(info); } @Override // ClientProtocol public void modifyCachePool(CachePoolInfo info) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.modifyCachePool(info); } @Override // ClientProtocol public void removeCachePool(String cachePoolName) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.removeCachePool(cachePoolName); } @Override // ClientProtocol public BatchedEntries listCachePools(String prevKey) throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.listCachePools(prevKey); } @Override // ClientProtocol public void modifyAclEntries(String src, List aclSpec) throws IOException { - checkOperation(OperationCategory.WRITE); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("modifyAclEntries", - new Class[] {String.class, List.class}, - new RemoteParam(), aclSpec); - rpcClient.invokeSequential(locations, method, null, null); + clientProto.modifyAclEntries(src, aclSpec); } @Override // ClienProtocol public void removeAclEntries(String src, List aclSpec) throws IOException { - checkOperation(OperationCategory.WRITE); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("removeAclEntries", - new Class[] {String.class, List.class}, - new RemoteParam(), aclSpec); - rpcClient.invokeSequential(locations, method, null, null); + clientProto.removeAclEntries(src, aclSpec); } @Override // ClientProtocol public void removeDefaultAcl(String src) throws IOException { - checkOperation(OperationCategory.WRITE); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("removeDefaultAcl", - new Class[] {String.class}, new RemoteParam()); - rpcClient.invokeSequential(locations, method); + clientProto.removeDefaultAcl(src); } @Override // ClientProtocol public void removeAcl(String src) throws IOException { - checkOperation(OperationCategory.WRITE); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("removeAcl", - new Class[] {String.class}, new RemoteParam()); - rpcClient.invokeSequential(locations, method); + clientProto.removeAcl(src); } @Override // ClientProtocol public void setAcl(String src, List aclSpec) throws IOException { - checkOperation(OperationCategory.WRITE); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod( - "setAcl", new Class[] {String.class, List.class}, - new RemoteParam(), aclSpec); - rpcClient.invokeSequential(locations, method); + clientProto.setAcl(src, aclSpec); } @Override // ClientProtocol public AclStatus getAclStatus(String src) throws IOException { - checkOperation(OperationCategory.READ); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, false); - RemoteMethod method = new RemoteMethod("getAclStatus", - new Class[] {String.class}, new RemoteParam()); - return (AclStatus) rpcClient.invokeSequential( - locations, method, AclStatus.class, null); + return clientProto.getAclStatus(src); } @Override // ClientProtocol public void createEncryptionZone(String src, String keyName) throws IOException { - checkOperation(OperationCategory.WRITE); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("createEncryptionZone", - new Class[] {String.class, String.class}, - new RemoteParam(), keyName); - rpcClient.invokeSequential(locations, method); + clientProto.createEncryptionZone(src, keyName); } @Override // ClientProtocol public EncryptionZone getEZForPath(String src) throws IOException { - checkOperation(OperationCategory.READ); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, false); - RemoteMethod method = new RemoteMethod("getEZForPath", - new Class[] {String.class}, new RemoteParam()); - return (EncryptionZone) rpcClient.invokeSequential( - locations, method, EncryptionZone.class, null); + return clientProto.getEZForPath(src); } @Override // ClientProtocol public BatchedEntries listEncryptionZones(long prevId) throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.listEncryptionZones(prevId); } @Override // ClientProtocol public void reencryptEncryptionZone(String zone, ReencryptAction action) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.reencryptEncryptionZone(zone, action); } @Override // ClientProtocol public BatchedEntries listReencryptionStatus( long prevId) throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.listReencryptionStatus(prevId); } @Override // ClientProtocol public void setXAttr(String src, XAttr xAttr, EnumSet flag) throws IOException { - checkOperation(OperationCategory.WRITE); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("setXAttr", - new Class[] {String.class, XAttr.class, EnumSet.class}, - new RemoteParam(), xAttr, flag); - rpcClient.invokeSequential(locations, method); + clientProto.setXAttr(src, xAttr, flag); } - @SuppressWarnings("unchecked") @Override // ClientProtocol public List getXAttrs(String src, List xAttrs) throws IOException { - checkOperation(OperationCategory.READ); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, false); - RemoteMethod method = new RemoteMethod("getXAttrs", - new Class[] {String.class, List.class}, new RemoteParam(), xAttrs); - return (List) rpcClient.invokeSequential( - locations, method, List.class, null); + return clientProto.getXAttrs(src, xAttrs); } - @SuppressWarnings("unchecked") @Override // ClientProtocol public List listXAttrs(String src) throws IOException { - checkOperation(OperationCategory.READ); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, false); - RemoteMethod method = new RemoteMethod("listXAttrs", - new Class[] {String.class}, new RemoteParam()); - return (List) rpcClient.invokeSequential( - locations, method, List.class, null); + return clientProto.listXAttrs(src); } @Override // ClientProtocol public void removeXAttr(String src, XAttr xAttr) throws IOException { - checkOperation(OperationCategory.WRITE); - - // TODO handle virtual directories - final List locations = getLocationsForPath(src, true); - RemoteMethod method = new RemoteMethod("removeXAttr", - new Class[] {String.class, XAttr.class}, new RemoteParam(), xAttr); - rpcClient.invokeSequential(locations, method); + clientProto.removeXAttr(src, xAttr); } @Override // ClientProtocol public void checkAccess(String path, FsAction mode) throws IOException { - checkOperation(OperationCategory.READ); - - // TODO handle virtual directories - final List locations = getLocationsForPath(path, true); - RemoteMethod method = new RemoteMethod("checkAccess", - new Class[] {String.class, FsAction.class}, - new RemoteParam(), mode); - rpcClient.invokeSequential(locations, method); + clientProto.checkAccess(path, mode); } @Override // ClientProtocol public long getCurrentEditLogTxid() throws IOException { - checkOperation(OperationCategory.READ); - - RemoteMethod method = new RemoteMethod( - "getCurrentEditLogTxid", new Class[] {}); - final Set nss = namenodeResolver.getNamespaces(); - Map ret = - rpcClient.invokeConcurrent(nss, method, true, false, long.class); - - // Return the maximum txid - long txid = 0; - for (long t : ret.values()) { - if (t > txid) { - txid = t; - } - } - return txid; + return clientProto.getCurrentEditLogTxid(); } @Override // ClientProtocol public EventBatchList getEditsFromTxid(long txid) throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.getEditsFromTxid(txid); } - @Override + @Override // ClientProtocol public DataEncryptionKey getDataEncryptionKey() throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.getDataEncryptionKey(); } - @Override + @Override // ClientProtocol public String createSnapshot(String snapshotRoot, String snapshotName) throws IOException { - checkOperation(OperationCategory.WRITE); - return null; + return clientProto.createSnapshot(snapshotRoot, snapshotName); } - @Override + @Override // ClientProtocol public void deleteSnapshot(String snapshotRoot, String snapshotName) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.deleteSnapshot(snapshotRoot, snapshotName); } @Override // ClientProtocol public void setQuota(String path, long namespaceQuota, long storagespaceQuota, StorageType type) throws IOException { - this.quotaCall.setQuota(path, namespaceQuota, storagespaceQuota, type); + clientProto.setQuota(path, namespaceQuota, storagespaceQuota, type); } @Override // ClientProtocol public QuotaUsage getQuotaUsage(String path) throws IOException { - checkOperation(OperationCategory.READ); - return this.quotaCall.getQuotaUsage(path); + return clientProto.getQuotaUsage(path); } - @Override + @Override // ClientProtocol public void reportBadBlocks(LocatedBlock[] blocks) throws IOException { - checkOperation(OperationCategory.WRITE); - - // Block pool id -> blocks - Map> blockLocations = new HashMap<>(); - for (LocatedBlock block : blocks) { - String bpId = block.getBlock().getBlockPoolId(); - List bpBlocks = blockLocations.get(bpId); - if (bpBlocks == null) { - bpBlocks = new LinkedList<>(); - blockLocations.put(bpId, bpBlocks); - } - bpBlocks.add(block); - } - - // Invoke each block pool - for (Entry> entry : blockLocations.entrySet()) { - String bpId = entry.getKey(); - List bpBlocks = entry.getValue(); - - LocatedBlock[] bpBlocksArray = - bpBlocks.toArray(new LocatedBlock[bpBlocks.size()]); - RemoteMethod method = new RemoteMethod("reportBadBlocks", - new Class[] {LocatedBlock[].class}, - new Object[] {bpBlocksArray}); - rpcClient.invokeSingleBlockPool(bpId, method); - } + clientProto.reportBadBlocks(blocks); } - @Override + @Override // ClientProtocol public void unsetStoragePolicy(String src) throws IOException { - checkOperation(OperationCategory.WRITE, false); + clientProto.unsetStoragePolicy(src); } - @Override + @Override // ClientProtocol public BlockStoragePolicy getStoragePolicy(String path) throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.getStoragePolicy(path); } @Override // ClientProtocol public ErasureCodingPolicyInfo[] getErasureCodingPolicies() throws IOException { - return erasureCoding.getErasureCodingPolicies(); + return clientProto.getErasureCodingPolicies(); } @Override // ClientProtocol public Map getErasureCodingCodecs() throws IOException { - return erasureCoding.getErasureCodingCodecs(); + return clientProto.getErasureCodingCodecs(); } @Override // ClientProtocol public AddErasureCodingPolicyResponse[] addErasureCodingPolicies( ErasureCodingPolicy[] policies) throws IOException { - return erasureCoding.addErasureCodingPolicies(policies); + return clientProto.addErasureCodingPolicies(policies); } @Override // ClientProtocol public void removeErasureCodingPolicy(String ecPolicyName) throws IOException { - erasureCoding.removeErasureCodingPolicy(ecPolicyName); + clientProto.removeErasureCodingPolicy(ecPolicyName); } @Override // ClientProtocol public void disableErasureCodingPolicy(String ecPolicyName) throws IOException { - erasureCoding.disableErasureCodingPolicy(ecPolicyName); + clientProto.disableErasureCodingPolicy(ecPolicyName); } @Override // ClientProtocol public void enableErasureCodingPolicy(String ecPolicyName) throws IOException { - erasureCoding.enableErasureCodingPolicy(ecPolicyName); + clientProto.enableErasureCodingPolicy(ecPolicyName); } @Override // ClientProtocol public ErasureCodingPolicy getErasureCodingPolicy(String src) throws IOException { - return erasureCoding.getErasureCodingPolicy(src); + return clientProto.getErasureCodingPolicy(src); } @Override // ClientProtocol public void setErasureCodingPolicy(String src, String ecPolicyName) throws IOException { - erasureCoding.setErasureCodingPolicy(src, ecPolicyName); + clientProto.setErasureCodingPolicy(src, ecPolicyName); } @Override // ClientProtocol public void unsetErasureCodingPolicy(String src) throws IOException { - erasureCoding.unsetErasureCodingPolicy(src); + clientProto.unsetErasureCodingPolicy(src); } - @Override + @Override // ClientProtocol public ECBlockGroupStats getECBlockGroupStats() throws IOException { - return erasureCoding.getECBlockGroupStats(); + return clientProto.getECBlockGroupStats(); } - @Override + @Override // ClientProtocol public ReplicatedBlockStats getReplicatedBlockStats() throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.getReplicatedBlockStats(); } @Deprecated - @Override + @Override // ClientProtocol public BatchedEntries listOpenFiles(long prevId) throws IOException { - return listOpenFiles(prevId, EnumSet.of(OpenFilesType.ALL_OPEN_FILES), - OpenFilesIterator.FILTER_PATH_DEFAULT); + return clientProto.listOpenFiles(prevId); } - @Override + @Override // ClientProtocol public BatchedEntries listOpenFiles(long prevId, EnumSet openFilesTypes, String path) throws IOException { - checkOperation(OperationCategory.READ, false); - return null; + return clientProto.listOpenFiles(prevId, openFilesTypes, path); + } + + @Override // ClientProtocol + public void satisfyStoragePolicy(String path) throws IOException { + clientProto.satisfyStoragePolicy(path); } @Override // NamenodeProtocol @@ -2190,6 +1267,11 @@ public boolean isRollingUpgrade() throws IOException { return nnProto.isRollingUpgrade(); } + @Override // NamenodeProtocol + public Long getNextSPSPath() throws IOException { + return nnProto.getNextSPSPath(); + } + /** * Locate the location with the matching block pool id. * @@ -2199,7 +1281,7 @@ public boolean isRollingUpgrade() throws IOException { * @return Prioritized list of locations in the federated cluster. * @throws IOException if the location for this path cannot be determined. */ - private RemoteLocation getLocationForPath( + protected RemoteLocation getLocationForPath( String path, boolean failIfLocked, String blockPoolId) throws IOException { @@ -2257,7 +1339,7 @@ private RemoteLocation getLocationForPath( this.subclusterResolver.getDestinationForPath(path); if (location == null) { throw new IOException("Cannot find locations for " + path + " in " + - this.subclusterResolver); + this.subclusterResolver.getClass().getSimpleName()); } // We may block some write operations @@ -2298,27 +1380,6 @@ private RemoteLocation getLocationForPath( } } - /** - * Check if a path should be in all subclusters. - * - * @param path Path to check. - * @return If a path should be in all subclusters. - */ - private boolean isPathAll(final String path) { - if (subclusterResolver instanceof MountTableResolver) { - try { - MountTableResolver mountTable = (MountTableResolver)subclusterResolver; - MountTable entry = mountTable.getMountPoint(path); - if (entry != null) { - return entry.isAll(); - } - } catch (IOException e) { - LOG.error("Cannot get mount point", e); - } - } - return false; - } - /** * Check if a path is in a read only mount point. * @@ -2340,121 +1401,6 @@ private boolean isPathReadOnly(final String path) { return false; } - /** - * Get the modification dates for mount points. - * - * @param path Name of the path to start checking dates from. - * @return Map with the modification dates for all sub-entries. - */ - private Map getMountPointDates(String path) { - Map ret = new TreeMap<>(); - if (subclusterResolver instanceof MountTableResolver) { - try { - final List children = subclusterResolver.getMountPoints(path); - for (String child : children) { - Long modTime = getModifiedTime(ret, path, child); - ret.put(child, modTime); - } - } catch (IOException e) { - LOG.error("Cannot get mount point", e); - } - } - return ret; - } - - /** - * Get modified time for child. If the child is present in mount table it - * will return the modified time. If the child is not present but subdirs of - * this child are present then it will return latest modified subdir's time - * as modified time of the requested child. - * @param ret contains children and modified times. - * @param mountTable. - * @param path Name of the path to start checking dates from. - * @param child child of the requested path. - * @return modified time. - */ - private long getModifiedTime(Map ret, String path, - String child) { - MountTableResolver mountTable = (MountTableResolver)subclusterResolver; - String srcPath; - if (path.equals(Path.SEPARATOR)) { - srcPath = Path.SEPARATOR + child; - } else { - srcPath = path + Path.SEPARATOR + child; - } - Long modTime = 0L; - try { - // Get mount table entry for the srcPath - MountTable entry = mountTable.getMountPoint(srcPath); - // if srcPath is not in mount table but its subdirs are in mount - // table we will display latest modified subdir date/time. - if (entry == null) { - List entries = mountTable.getMounts(srcPath); - for (MountTable eachEntry : entries) { - // Get the latest date - if (ret.get(child) == null || - ret.get(child) < eachEntry.getDateModified()) { - modTime = eachEntry.getDateModified(); - } - } - } else { - modTime = entry.getDateModified(); - } - } catch (IOException e) { - LOG.error("Cannot get mount point", e); - } - return modTime; - } - - /** - * Create a new file status for a mount point. - * - * @param name Name of the mount point. - * @param childrenNum Number of children. - * @param date Map with the dates. - * @return New HDFS file status representing a mount point. - */ - private HdfsFileStatus getMountPointStatus( - String name, int childrenNum, long date) { - long modTime = date; - long accessTime = date; - FsPermission permission = FsPermission.getDirDefault(); - String owner = this.superUser; - String group = this.superGroup; - try { - // TODO support users, it should be the user for the pointed folder - UserGroupInformation ugi = getRemoteUser(); - owner = ugi.getUserName(); - group = ugi.getPrimaryGroupName(); - } catch (IOException e) { - LOG.error("Cannot get the remote user: {}", e.getMessage()); - } - long inodeId = 0; - return new HdfsFileStatus.Builder() - .isdir(true) - .mtime(modTime) - .atime(accessTime) - .perm(permission) - .owner(owner) - .group(group) - .symlink(new byte[0]) - .path(DFSUtil.string2Bytes(name)) - .fileId(inodeId) - .children(childrenNum) - .build(); - } - - /** - * Get the name of the method that is calling this function. - * - * @return Name of the method calling this function. - */ - private static String getMethodName() { - final StackTraceElement[] stack = Thread.currentThread().getStackTrace(); - String methodName = stack[3].getMethodName(); - return methodName; - } - /** * Get the user that is invoking this operation. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterSafemodeService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterSafemodeService.java index 5dfb356ad5c..877e1d4927f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterSafemodeService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterSafemodeService.java @@ -42,6 +42,23 @@ /** Router to manage safe mode. */ private final Router router; + /** + * If we are in safe mode, fail requests as if a standby NN. + * Router can enter safe mode in two different ways: + * 1. upon start up: router enters this mode after service start, and will + * exit after certain time threshold; + * 2. via admin command: router enters this mode via admin command: + * dfsrouteradmin -safemode enter + * and exit after admin command: + * dfsrouteradmin -safemode leave + */ + + /** Whether Router is in safe mode */ + private volatile boolean safeMode; + + /** Whether the Router safe mode is set manually (i.e., via Router admin) */ + private volatile boolean isSafeModeSetManually; + /** Interval in ms to wait post startup before allowing RPC requests. */ private long startupInterval; /** Interval in ms after which the State Store cache is too stale. */ @@ -63,14 +80,29 @@ public RouterSafemodeService(Router router) { this.router = router; } + /** + * Return whether the current Router is in safe mode. + */ + boolean isInSafeMode() { + return this.safeMode; + } + + /** + * Set the flag to indicate that the safe mode for this Router is set manually + * via the Router admin command. + */ + void setManualSafeMode(boolean mode) { + this.safeMode = mode; + this.isSafeModeSetManually = mode; + } + /** * Enter safe mode. */ private void enter() { LOG.info("Entering safe mode"); enterSafeModeTime = now(); - RouterRpcServer rpcServer = router.getRpcServer(); - rpcServer.setSafeMode(true); + safeMode = true; router.updateRouterState(RouterServiceState.SAFEMODE); } @@ -87,8 +119,7 @@ private void leave() { } else { routerMetrics.setSafeModeTime(timeInSafemode); } - RouterRpcServer rpcServer = router.getRpcServer(); - rpcServer.setSafeMode(false); + safeMode = false; router.updateRouterState(RouterServiceState.RUNNING); } @@ -131,17 +162,16 @@ public void periodicInvoke() { this.startupInterval - delta); return; } - RouterRpcServer rpcServer = router.getRpcServer(); StateStoreService stateStore = router.getStateStore(); long cacheUpdateTime = stateStore.getCacheUpdateTime(); boolean isCacheStale = (now - cacheUpdateTime) > this.staleInterval; // Always update to indicate our cache was updated if (isCacheStale) { - if (!rpcServer.isInSafeMode()) { + if (!safeMode) { enter(); } - } else if (rpcServer.isInSafeMode()) { + } else if (safeMode && !isSafeModeSetManually) { // Cache recently updated, leave safe mode leave(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/RecordStore.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/RecordStore.java index 53a8b82e232..92aa5843e3d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/RecordStore.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/RecordStore.java @@ -19,8 +19,8 @@ import java.lang.reflect.Constructor; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver; @@ -36,7 +36,7 @@ @InterfaceStability.Evolving public abstract class RecordStore { - private static final Log LOG = LogFactory.getLog(RecordStore.class); + private static final Logger LOG = LoggerFactory.getLogger(RecordStore.class); /** Class of the record stored in this State Store. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MountTable.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MountTable.java index 49cdf10364c..0e2e86864ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MountTable.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MountTable.java @@ -153,9 +153,9 @@ public static MountTable newInstance(final String src, // Set quota for mount table RouterQuotaUsage quota = new RouterQuotaUsage.Builder() .fileAndDirectoryCount(RouterQuotaUsage.QUOTA_USAGE_COUNT_DEFAULT) - .quota(HdfsConstants.QUOTA_DONT_SET) + .quota(HdfsConstants.QUOTA_RESET) .spaceConsumed(RouterQuotaUsage.QUOTA_USAGE_COUNT_DEFAULT) - .spaceQuota(HdfsConstants.QUOTA_DONT_SET).build(); + .spaceQuota(HdfsConstants.QUOTA_RESET).build(); record.setQuota(quota); // Validate diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MountTablePBImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MountTablePBImpl.java index e62d0a83168..4c7622c0990 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MountTablePBImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MountTablePBImpl.java @@ -257,9 +257,9 @@ public void setMode(FsPermission mode) { public RouterQuotaUsage getQuota() { MountTableRecordProtoOrBuilder proto = this.translator.getProtoOrBuilder(); - long nsQuota = HdfsConstants.QUOTA_DONT_SET; + long nsQuota = HdfsConstants.QUOTA_RESET; long nsCount = RouterQuotaUsage.QUOTA_USAGE_COUNT_DEFAULT; - long ssQuota = HdfsConstants.QUOTA_DONT_SET; + long ssQuota = HdfsConstants.QUOTA_RESET; long ssCount = RouterQuotaUsage.QUOTA_USAGE_COUNT_DEFAULT; if (proto.hasQuota()) { QuotaUsageProto quotaProto = proto.getQuota(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java index b0a2062a6e6..0a681e9623a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java @@ -94,25 +94,95 @@ public RouterAdmin(Configuration conf) { * Print the usage message. */ public void printUsage() { - String usage = "Federation Admin Tools:\n" - + "\t[-add " - + "[-readonly] [-order HASH|LOCAL|RANDOM|HASH_ALL] " - + "-owner -group -mode ]\n" - + "\t[-update " - + "[-readonly] [-order HASH|LOCAL|RANDOM|HASH_ALL] " - + "-owner -group -mode ]\n" - + "\t[-rm ]\n" - + "\t[-ls ]\n" - + "\t[-setQuota -nsQuota -ssQuota " - + "]\n" - + "\t[-clrQuota ]\n" - + "\t[-safemode enter | leave | get]\n" - + "\t[-nameservice enable | disable ]\n" - + "\t[-getDisabledNameservices]\n"; + String usage = getUsage(null); + System.out.println(usage); + } + private void printUsage(String cmd) { + String usage = getUsage(cmd); System.out.println(usage); } + private String getUsage(String cmd) { + if (cmd == null) { + String[] commands = + {"-add", "-update", "-rm", "-ls", "-setQuota", "-clrQuota", + "-safemode", "-nameservice", "-getDisabledNameservices"}; + StringBuilder usage = new StringBuilder(); + usage.append("Usage: hdfs routeradmin :\n"); + for (int i = 0; i < commands.length; i++) { + usage.append(getUsage(commands[i])); + if (i + 1 < commands.length) { + usage.append("\n"); + } + } + return usage.toString(); + } + if (cmd.equals("-add")) { + return "\t[-add " + + "[-readonly] [-order HASH|LOCAL|RANDOM|HASH_ALL] " + + "-owner -group -mode ]"; + } else if (cmd.equals("-update")) { + return "\t[-update " + + " " + + "[-readonly] [-order HASH|LOCAL|RANDOM|HASH_ALL] " + + "-owner -group -mode ]"; + } else if (cmd.equals("-rm")) { + return "\t[-rm ]"; + } else if (cmd.equals("-ls")) { + return "\t[-ls ]"; + } else if (cmd.equals("-setQuota")) { + return "\t[-setQuota -nsQuota -ssQuota " + + "]"; + } else if (cmd.equals("-clrQuota")) { + return "\t[-clrQuota ]"; + } else if (cmd.equals("-safemode")) { + return "\t[-safemode enter | leave | get]"; + } else if (cmd.equals("-nameservice")) { + return "\t[-nameservice enable | disable ]"; + } else if (cmd.equals("-getDisabledNameservices")) { + return "\t[-getDisabledNameservices]"; + } + return getUsage(null); + } + + /** + * Usage: validates the maximum number of arguments for a command. + * @param arg List of of command line parameters. + */ + private void validateMax(String[] arg) { + if (arg[0].equals("-rm")) { + if (arg.length > 2) { + throw new IllegalArgumentException( + "Too many arguments, Max=1 argument allowed"); + } + } else if (arg[0].equals("-ls")) { + if (arg.length > 2) { + throw new IllegalArgumentException( + "Too many arguments, Max=1 argument allowed"); + } + } else if (arg[0].equals("-clrQuota")) { + if (arg.length > 2) { + throw new IllegalArgumentException( + "Too many arguments, Max=1 argument allowed"); + } + } else if (arg[0].equals("-safemode")) { + if (arg.length > 2) { + throw new IllegalArgumentException( + "Too many arguments, Max=1 argument allowed only"); + } + } else if (arg[0].equals("-nameservice")) { + if (arg.length > 3) { + throw new IllegalArgumentException( + "Too many arguments, Max=2 arguments allowed"); + } + } else if (arg[0].equals("-getDisabledNameservices")) { + if (arg.length > 1) { + throw new IllegalArgumentException("No arguments allowed"); + } + } + } + @Override public int run(String[] argv) throws Exception { if (argv.length < 1) { @@ -129,43 +199,43 @@ public int run(String[] argv) throws Exception { if ("-add".equals(cmd)) { if (argv.length < 4) { System.err.println("Not enough parameters specified for cmd " + cmd); - printUsage(); + printUsage(cmd); return exitCode; } } else if ("-update".equals(cmd)) { if (argv.length < 4) { System.err.println("Not enough parameters specified for cmd " + cmd); - printUsage(); + printUsage(cmd); return exitCode; } - } else if ("-rm".equalsIgnoreCase(cmd)) { + } else if ("-rm".equals(cmd)) { if (argv.length < 2) { System.err.println("Not enough parameters specified for cmd " + cmd); - printUsage(); + printUsage(cmd); return exitCode; } - } else if ("-setQuota".equalsIgnoreCase(cmd)) { + } else if ("-setQuota".equals(cmd)) { if (argv.length < 4) { System.err.println("Not enough parameters specified for cmd " + cmd); - printUsage(); + printUsage(cmd); return exitCode; } - } else if ("-clrQuota".equalsIgnoreCase(cmd)) { + } else if ("-clrQuota".equals(cmd)) { if (argv.length < 2) { System.err.println("Not enough parameters specified for cmd " + cmd); - printUsage(); + printUsage(cmd); return exitCode; } - } else if ("-safemode".equalsIgnoreCase(cmd)) { + } else if ("-safemode".equals(cmd)) { if (argv.length < 2) { System.err.println("Not enough parameters specified for cmd " + cmd); - printUsage(); + printUsage(cmd); return exitCode; } - } else if ("-nameservice".equalsIgnoreCase(cmd)) { + } else if ("-nameservice".equals(cmd)) { if (argv.length < 3) { System.err.println("Not enough parameters specificed for cmd " + cmd); - printUsage(); + printUsage(cmd); return exitCode; } } @@ -189,13 +259,18 @@ public int run(String[] argv) throws Exception { Exception debugException = null; exitCode = 0; try { + validateMax(argv); if ("-add".equals(cmd)) { if (addMount(argv, i)) { System.out.println("Successfully added mount point " + argv[i]); + } else { + exitCode = -1; } } else if ("-update".equals(cmd)) { if (updateMount(argv, i)) { System.out.println("Successfully updated mount point " + argv[i]); + } else { + exitCode = -1; } } else if ("-rm".equals(cmd)) { if (removeMount(argv[i])) { @@ -226,14 +301,13 @@ public int run(String[] argv) throws Exception { } else if ("-getDisabledNameservices".equals(cmd)) { getDisabledNameservices(); } else { - printUsage(); - return exitCode; + throw new IllegalArgumentException("Unknown Command: " + cmd); } } catch (IllegalArgumentException arge) { debugException = arge; exitCode = -1; System.err.println(cmd.substring(1) + ": " + arge.getLocalizedMessage()); - printUsage(); + printUsage(cmd); } catch (RemoteException e) { // This is a error returned by the server. // Print out the first line of the error message, ignore the stack trace. @@ -299,6 +373,9 @@ public boolean addMount(String[] parameters, int i) throws IOException { i++; short modeValue = Short.parseShort(parameters[i], 8); mode = new FsPermission(modeValue); + } else { + printUsage("-add"); + return false; } i++; @@ -451,6 +528,9 @@ public boolean updateMount(String[] parameters, int i) throws IOException { i++; short modeValue = Short.parseShort(parameters[i], 8); mode = new FsPermission(modeValue); + } else { + printUsage("-update"); + return false; } i++; @@ -605,6 +685,9 @@ private boolean setQuota(String[] parameters, int i) throws IOException { throw new IllegalArgumentException( "Cannot parse ssQuota: " + parameters[i]); } + } else { + throw new IllegalArgumentException( + "Invalid argument : " + parameters[i]); } i++; @@ -632,8 +715,8 @@ private boolean setQuota(String[] parameters, int i) throws IOException { * @throws IOException Error clearing the mount point. */ private boolean clrQuota(String mount) throws IOException { - return updateQuota(mount, HdfsConstants.QUOTA_DONT_SET, - HdfsConstants.QUOTA_DONT_SET); + return updateQuota(mount, HdfsConstants.QUOTA_RESET, + HdfsConstants.QUOTA_RESET); } /** @@ -668,8 +751,8 @@ private boolean updateQuota(String mount, long nsQuota, long ssQuota) long nsCount = existingEntry.getQuota().getFileAndDirectoryCount(); long ssCount = existingEntry.getQuota().getSpaceConsumed(); // If nsQuota and ssQuota were unset, clear nsQuota and ssQuota. - if (nsQuota == HdfsConstants.QUOTA_DONT_SET && - ssQuota == HdfsConstants.QUOTA_DONT_SET) { + if (nsQuota == HdfsConstants.QUOTA_RESET && + ssQuota == HdfsConstants.QUOTA_RESET) { nsCount = RouterQuotaUsage.QUOTA_USAGE_COUNT_DEFAULT; ssCount = RouterQuotaUsage.QUOTA_USAGE_COUNT_DEFAULT; } else { @@ -712,6 +795,8 @@ private void manageSafeMode(String cmd) throws IOException { } else if (cmd.equals("get")) { boolean result = getSafeMode(); System.out.println("Safe Mode: " + result); + } else { + throw new IllegalArgumentException("Invalid argument: " + cmd); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml index 8806cb27de9..3f560431ac7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml @@ -31,6 +31,14 @@ + + dfs.federation.router.default.nameservice.enable + true + + The default subcluster is enabled to read and write files. + + + dfs.federation.router.rpc.enable true @@ -93,6 +101,14 @@ + + dfs.federation.router.connection.creator.queue-size + 100 + + Size of async connection creator queue. + + + dfs.federation.router.connection.pool-size 1 @@ -393,6 +409,15 @@ + + dfs.federation.router.mount-table.cache.enable + true + + Set to true to enable mount table cache (Path to Remote Location cache). + Disabling the cache is recommended when a large amount of unique paths are queried. + + + dfs.federation.router.quota.enable false diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md index 70c622668ba..edc99187031 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md @@ -62,7 +62,7 @@ Each Router has two roles: #### Federated interface The Router receives a client request, checks the State Store for the correct subcluster, and forwards the request to the active NameNode of that subcluster. The reply from the NameNode then flows in the opposite direction. -The Routers are stateless and can be behind a load balancer. +The Routers are stateless and can be behind a load balancer. For health checking, you can use /isActive endpoint as a health probe (e.g. http://ROUTER_HOSTNAME:ROUTER_PORT/isActive). For performance, the Router also caches remote mount table entries and the state of the subclusters. To make sure that changes have been propagated to all Routers, each Router heartbeats its state to the State Store. @@ -175,7 +175,7 @@ Deployment By default, the Router is ready to take requests and monitor the NameNode in the local machine. It needs to know the State Store endpoint by setting `dfs.federation.router.store.driver.class`. -The rest of the options are documented in [hdfs-default.xml](../hadoop-hdfs/hdfs-default.xml). +The rest of the options are documented in [hdfs-rbf-default.xml](../hadoop-hdfs-rbf/hdfs-rbf-default.xml). Once the Router is configured, it can be started: @@ -214,6 +214,7 @@ Mount table permission can be set by following command: The option mode is UNIX-style permissions for the mount table. Permissions are specified in octal, e.g. 0755. By default, this is set to 0755. +#### Quotas Router-based federation supports global quota at mount table level. Mount table entries may spread multiple subclusters and the global quota will be accounted across these subclusters. @@ -229,6 +230,31 @@ Ls command will show below information for each mount table entry: Source Destinations Owner Group Mode Quota/Usage /path ns0->/path root supergroup rwxr-xr-x [NsQuota: 50/0, SsQuota: 100 B/0 B] +#### Multiple subclusters +A mount point also supports mapping multiple subclusters. +For example, to create a mount point that stores files in subclusters `ns1` and `ns2`. + + [hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -add /data ns1,ns2 /data -order SPACE + +When listing `/data`, it will show all the folders and files in both subclusters. +For deciding where to create a new file/folder it uses the order parameter, it currently supports the following methods: + +* HASH: Follow consistent hashing in the first level. Deeper levels will be in the one of the parent. +* LOCAL: Try to write data in the local subcluster. +* RANDOM: Random subcluster. This is usually useful for balancing the load across. Folders are created in all subclusters. +* HASH_ALL: Follow consistent hashing at all the levels. This approach tries to balance the reads and writes evenly across subclusters. Folders are created in all subclusters. +* SPACE: Try to write data in the subcluster with the most available space. Folders are created in all subclusters. + +For the hash-based approaches, the difference is that HASH would make all the files/folders within a folder belong to the same subcluster while HASH_ALL will spread all files under a mount point. +For example, assuming we have a HASH mount point for `/data/hash`, files and folders under `/data/hash/folder0` will all be in the same subcluster. +On the other hand, a HASH_ALL mount point for `/data/hash_all`, will spread files under `/data/hash_all/folder0` across all the subclusters for that mount point (subfolders will be created to all subclusters). + +RANDOM can be used for reading and writing data from/into different subclusters. +The common use for this approach is to have the same data in multiple subclusters and balance the reads across subclusters. +For example, if thousands of containers need to read the same data (e.g., a library), one can use RANDOM to read the data from any of the subclusters. + +Note that consistency of the data across subclusters is not guaranteed by the Router. + ### Disabling nameservices To prevent accessing a nameservice (sublcuster), it can be disabled from the federation. @@ -290,7 +316,7 @@ Router configuration -------------------- One can add the configurations for Router-based federation to **hdfs-site.xml**. -The main options are documented in [hdfs-default.xml](../hadoop-hdfs/hdfs-default.xml). +The main options are documented in [hdfs-rbf-default.xml](../hadoop-hdfs-rbf/hdfs-rbf-default.xml). The configuration values are described in this section. ### RPC server @@ -330,6 +356,18 @@ The administration server to manage the Mount Table. | dfs.federation.router.admin-bind-host | 0.0.0.0 | The actual address the RPC admin server will bind to. | | dfs.federation.router.admin.handler.count | 1 | The number of server threads for the router to handle RPC requests from admin. | +### HTTP Server + +The HTTP Server to provide Web UI and the HDFS REST interface ([WebHDFS](../hadoop-hdfs/WebHDFS.html)) for the clients. The default URL is "`http://router_host:50071`". + +| Property | Default | Description| +|:---- |:---- |:---- | +| dfs.federation.router.http.enable | `true` | If `true`, the HTTP service to handle client requests in the router is enabled. | +| dfs.federation.router.http-address | 0.0.0.0:50071 | HTTP address that handles the web requests to the Router. | +| dfs.federation.router.http-bind-host | 0.0.0.0 | The actual address the HTTP server will bind to. | +| dfs.federation.router.https-address | 0.0.0.0:50072 | HTTPS address that handles the web requests to the Router. | +| dfs.federation.router.https-bind-host | 0.0.0.0 | The actual address the HTTPS server will bind to. | + ### State Store The connection to the State Store and the internal caching at the Router. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockResolver.java index 36cce391aea..f5636ceccd1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MockResolver.java @@ -129,6 +129,9 @@ public void updateActiveNamenode( // Return a copy of the list because it is updated periodically List namenodes = this.resolver.get(nameserviceId); + if (namenodes == null) { + namenodes = new ArrayList<>(); + } return Collections.unmodifiableList(new ArrayList<>(namenodes)); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestInitializeMountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestInitializeMountTableResolver.java new file mode 100644 index 00000000000..5db7531c4dd --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestInitializeMountTableResolver.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.resolver; + +import org.apache.hadoop.conf.Configuration; +import org.junit.Test; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICE_ID; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_NAMESERVICES; +import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_DEFAULT_NAMESERVICE; +import static org.junit.Assert.assertEquals; + +/** + * Test {@link MountTableResolver} initialization. + */ +public class TestInitializeMountTableResolver { + + @Test + public void testDefaultNameserviceIsMissing() { + Configuration conf = new Configuration(); + MountTableResolver mountTable = new MountTableResolver(conf); + assertEquals("", mountTable.getDefaultNamespace()); + } + + @Test + public void testDefaultNameserviceWithEmptyString() { + Configuration conf = new Configuration(); + conf.set(DFS_ROUTER_DEFAULT_NAMESERVICE, ""); + MountTableResolver mountTable = new MountTableResolver(conf); + assertEquals("", mountTable.getDefaultNamespace()); + } + + @Test + public void testRouterDefaultNameservice() { + Configuration conf = new Configuration(); + conf.set(DFS_ROUTER_DEFAULT_NAMESERVICE, "router_ns"); // this is priority + conf.set(DFS_NAMESERVICE_ID, "ns_id"); + conf.set(DFS_NAMESERVICES, "nss"); + MountTableResolver mountTable = new MountTableResolver(conf); + assertEquals("router_ns", mountTable.getDefaultNamespace()); + } + + @Test + public void testNameserviceID() { + Configuration conf = new Configuration(); + conf.set(DFS_NAMESERVICE_ID, "ns_id"); // this is priority + conf.set(DFS_NAMESERVICES, "nss"); + MountTableResolver mountTable = new MountTableResolver(conf); + assertEquals("ns_id", mountTable.getDefaultNamespace()); + } + + @Test + public void testSingleNameservices() { + Configuration conf = new Configuration(); + conf.set(DFS_NAMESERVICES, "ns1"); + MountTableResolver mountTable = new MountTableResolver(conf); + assertEquals("ns1", mountTable.getDefaultNamespace()); + } + + @Test + public void testMultipleNameservices() { + Configuration conf = new Configuration(); + conf.set(DFS_NAMESERVICES, "ns1,ns2"); + MountTableResolver mountTable = new MountTableResolver(conf); + assertEquals("ns1", mountTable.getDefaultNamespace()); + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java index cb3b472ced0..5e3b861df23 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMountTableResolver.java @@ -17,9 +17,11 @@ */ package org.apache.hadoop.hdfs.server.federation.resolver; +import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_MOUNT_TABLE_CACHE_ENABLE; import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.FEDERATION_MOUNT_TABLE_MAX_CACHE_SIZE; import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_DEFAULT_NAMESERVICE; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -37,6 +39,7 @@ import org.apache.hadoop.hdfs.server.federation.router.Router; import org.apache.hadoop.hdfs.server.federation.store.MountTableStore; import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; +import org.apache.hadoop.test.GenericTestUtils; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; @@ -173,6 +176,31 @@ public void testDestination() throws IOException { } + @Test + public void testDefaultNameServiceEnable() throws IOException { + assertTrue(mountTable.isDefaultNSEnable()); + mountTable.setDefaultNameService("3"); + mountTable.removeEntry("/"); + + assertEquals("3->/unknown", + mountTable.getDestinationForPath("/unknown").toString()); + + Map map = getMountTableEntry("4", "/unknown"); + mountTable.addEntry(MountTable.newInstance("/unknown", map)); + mountTable.setDefaultNSEnable(false); + assertFalse(mountTable.isDefaultNSEnable()); + + assertEquals("4->/unknown", + mountTable.getDestinationForPath("/unknown").toString()); + try { + mountTable.getDestinationForPath("/"); + fail("The getDestinationForPath call should fail."); + } catch (IOException ioe) { + GenericTestUtils.assertExceptionContains( + "the default nameservice is disabled to read or write", ioe); + } + } + private void compareLists(List list1, String[] list2) { assertEquals(list1.size(), list2.length); for (String item : list2) { @@ -472,6 +500,35 @@ public void testUpdate() throws IOException { assertNull(entry2); } + @Test + public void testDisableLocalCache() throws IOException { + Configuration conf = new Configuration(); + // Disable mount table cache + conf.setBoolean(FEDERATION_MOUNT_TABLE_CACHE_ENABLE, false); + conf.setStrings(DFS_ROUTER_DEFAULT_NAMESERVICE, "0"); + MountTableResolver tmpMountTable = new MountTableResolver(conf); + + // Root mount point + Map map = getMountTableEntry("1", "/"); + tmpMountTable.addEntry(MountTable.newInstance("/", map)); + + // /tmp + map = getMountTableEntry("2", "/tmp"); + tmpMountTable.addEntry(MountTable.newInstance("/tmp", map)); + + // Check localCache is null + try { + tmpMountTable.getCacheSize(); + fail("getCacheSize call should fail."); + } catch (IOException e) { + GenericTestUtils.assertExceptionContains("localCache is null", e); + } + + // Check resolve path without cache + assertEquals("2->/tmp/tesfile1.txt", + tmpMountTable.getDestinationForPath("/tmp/tesfile1.txt").toString()); + } + @Test public void testCacheCleaning() throws Exception { for (int i = 0; i < 1000; i++) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestDisableRouterQuota.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestDisableRouterQuota.java new file mode 100644 index 00000000000..2632f5936fb --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestDisableRouterQuota.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.router; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder; + +import java.io.IOException; + +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +/** + * Test the behavior when disabling the Router quota. + */ +public class TestDisableRouterQuota { + + private static Router router; + + @BeforeClass + public static void setUp() throws Exception { + // Build and start a router + router = new Router(); + Configuration routerConf = new RouterConfigBuilder() + .quota(false) //set false to verify the quota disabled in Router + .rpc() + .build(); + router.init(routerConf); + router.setRouterId("TestRouterId"); + router.start(); + } + + @AfterClass + public static void tearDown() throws IOException { + if (router != null) { + router.stop(); + router.close(); + } + } + + @Before + public void checkDisableQuota() { + assertFalse(router.isQuotaEnabled()); + } + + @Test + public void testSetQuota() throws Exception { + long nsQuota = 1024; + long ssQuota = 1024; + + try { + Quota quotaModule = router.getRpcServer().getQuotaModule(); + quotaModule.setQuota("/test", nsQuota, ssQuota, null); + fail("The setQuota call should fail."); + } catch (IOException ioe) { + GenericTestUtils.assertExceptionContains( + "The quota system is disabled in Router.", ioe); + } + } + + @Test + public void testGetQuotaUsage() throws Exception { + try { + Quota quotaModule = router.getRpcServer().getQuotaModule(); + quotaModule.getQuotaUsage("/test"); + fail("The getQuotaUsage call should fail."); + } catch (IOException ioe) { + GenericTestUtils.assertExceptionContains( + "The quota system is disabled in Router.", ioe); + } + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java index f8cf009479a..db4be292fd6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java @@ -18,9 +18,11 @@ package org.apache.hadoop.hdfs.server.federation.router; import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; import java.io.IOException; import java.net.InetSocketAddress; @@ -185,4 +187,20 @@ public void testRouterRpcWithNoSubclusters() throws IOException { router.stop(); router.close(); } + + @Test + public void testRouterIDInRouterRpcClient() throws Exception { + + Router router = new Router(); + router.init(new RouterConfigBuilder(conf).rpc().build()); + router.setRouterId("Router-0"); + RemoteMethod remoteMethod = mock(RemoteMethod.class); + + intercept(IOException.class, "Router-0", + () -> router.getRpcServer().getRPCClient() + .invokeSingle("ns0", remoteMethod)); + + router.stop(); + router.close(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdmin.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdmin.java index 769bfe71ed8..c834dcfd7f3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdmin.java @@ -64,6 +64,8 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.internal.util.reflection.Whitebox; /** * The administrator interface of the {@link Router} implemented by @@ -101,6 +103,12 @@ public static void globalSetUp() throws Exception { membership.registerNamenode( createNamenodeReport("ns1", "nn1", HAServiceState.ACTIVE)); stateStore.refreshCaches(true); + + RouterRpcServer spyRpcServer = + Mockito.spy(routerContext.getRouter().createRpcServer()); + Whitebox + .setInternalState(routerContext.getRouter(), "rpcServer", spyRpcServer); + Mockito.doReturn(null).when(spyRpcServer).getFileInfo(Mockito.anyString()); } @AfterClass diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java index 7e04e617fbf..d968c602fb0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java @@ -82,6 +82,7 @@ public static void globalSetUp() throws Exception { .stateStore() .admin() .rpc() + .safemode() .build(); cluster.addRouterOverrides(conf); @@ -114,6 +115,14 @@ public static void globalSetUp() throws Exception { Mockito.anyLong(), Mockito.anyLong(), Mockito.any()); Whitebox.setInternalState( routerContext.getRouter().getRpcServer(), "quotaCall", quota); + + RouterRpcServer spyRpcServer = + Mockito.spy(routerContext.getRouter().createRpcServer()); + Whitebox + .setInternalState(routerContext.getRouter(), "rpcServer", spyRpcServer); + + Mockito.doReturn(null).when(spyRpcServer).getFileInfo(Mockito.anyString()); + } @AfterClass @@ -215,6 +224,24 @@ public void testAddOrderMountTable() throws Exception { testAddOrderMountTable(DestinationOrder.HASH_ALL); } + @Test + public void testAddOrderErrorMsg() throws Exception { + DestinationOrder order = DestinationOrder.HASH; + final String mnt = "/newAdd1" + order; + final String nsId = "ns0,ns1"; + final String dest = "/changAdd"; + + String[] argv1 = new String[] {"-add", mnt, nsId, dest, "-order", + order.toString()}; + assertEquals(0, ToolRunner.run(admin, argv1)); + + // Add the order with wrong command + String[] argv = new String[] {"-add", mnt, nsId, dest, "-orde", + order.toString()}; + assertEquals(-1, ToolRunner.run(admin, argv)); + + } + private void testAddOrderMountTable(DestinationOrder order) throws Exception { final String mnt = "/" + order; @@ -258,6 +285,13 @@ public void testListMountTable() throws Exception { assertEquals(0, ToolRunner.run(admin, argv)); assertTrue(out.toString().contains(src)); + // Test with wrong number of arguments + argv = new String[] {"-ls", srcWithSlash, "check", "check2"}; + System.setErr(new PrintStream(err)); + ToolRunner.run(admin, argv); + assertTrue( + err.toString().contains("Too many arguments, Max=1 argument allowed")); + out.reset(); GetMountTableEntriesRequest getRequest = GetMountTableEntriesRequest .newInstance("/"); @@ -308,6 +342,13 @@ public void testRemoveMountTable() throws Exception { assertEquals(0, ToolRunner.run(admin, argv)); assertTrue(out.toString().contains( "Cannot remove mount point " + invalidPath)); + + // test wrong number of arguments + System.setErr(new PrintStream(err)); + argv = new String[] {"-rm", src, "check" }; + ToolRunner.run(admin, argv); + assertTrue(err.toString() + .contains("Too many arguments, Max=1 argument allowed")); } @Test @@ -380,7 +421,7 @@ public void testMountTablePermissions() throws Exception { argv = new String[] {"-add", "/testpath2-2", "ns0", "/testdir2-2", "-owner", TEST_USER, "-group", TEST_USER, "-mode", "0255"}; assertEquals(0, ToolRunner.run(admin, argv)); - verifyExecutionResult("/testpath2-2", false, 0, 0); + verifyExecutionResult("/testpath2-2", false, -1, 0); // set mount table entry with read and write permission argv = new String[] {"-add", "/testpath2-3", "ns0", "/testdir2-3", @@ -427,6 +468,74 @@ private void verifyExecutionResult(String mount, boolean canRead, assertEquals(rmCommandCode, ToolRunner.run(admin, argv)); } + @Test + public void testInvalidArgumentMessage() throws Exception { + String nsId = "ns0"; + String src = "/testSource"; + System.setOut(new PrintStream(out)); + String[] argv = new String[] {"-add", src, nsId}; + assertEquals(-1, ToolRunner.run(admin, argv)); + assertTrue(out.toString().contains( + "\t[-add " + + "[-readonly] [-order HASH|LOCAL|RANDOM|HASH_ALL] " + + "-owner -group -mode ]")); + out.reset(); + + argv = new String[] {"-update", src, nsId}; + assertEquals(-1, ToolRunner.run(admin, argv)); + assertTrue(out.toString().contains( + "\t[-update " + + "[-readonly] [-order HASH|LOCAL|RANDOM|HASH_ALL] " + + "-owner -group -mode ]")); + out.reset(); + + argv = new String[] {"-rm"}; + assertEquals(-1, ToolRunner.run(admin, argv)); + assertTrue(out.toString().contains("\t[-rm ]")); + out.reset(); + + argv = new String[] {"-setQuota", src}; + assertEquals(-1, ToolRunner.run(admin, argv)); + assertTrue(out.toString() + .contains("\t[-setQuota -nsQuota -ssQuota " + + "]")); + out.reset(); + + argv = new String[] {"-clrQuota"}; + assertEquals(-1, ToolRunner.run(admin, argv)); + assertTrue(out.toString().contains("\t[-clrQuota ]")); + out.reset(); + + argv = new String[] {"-safemode"}; + assertEquals(-1, ToolRunner.run(admin, argv)); + assertTrue(out.toString().contains("\t[-safemode enter | leave | get]")); + out.reset(); + + argv = new String[] {"-nameservice", nsId}; + assertEquals(-1, ToolRunner.run(admin, argv)); + assertTrue(out.toString() + .contains("\t[-nameservice enable | disable ]")); + out.reset(); + + argv = new String[] {"-Random"}; + assertEquals(-1, ToolRunner.run(admin, argv)); + String expected = "Usage: hdfs routeradmin :\n" + + "\t[-add " + + "[-readonly] [-order HASH|LOCAL|RANDOM|HASH_ALL] " + + "-owner -group -mode ]\n" + + "\t[-update " + + " " + "[-readonly] [-order HASH|LOCAL|RANDOM|HASH_ALL] " + + "-owner -group -mode ]\n" + "\t[-rm ]\n" + + "\t[-ls ]\n" + + "\t[-setQuota -nsQuota -ssQuota " + + "]\n" + "\t[-clrQuota ]\n" + + "\t[-safemode enter | leave | get]\n" + + "\t[-nameservice enable | disable ]\n" + + "\t[-getDisabledNameservices]"; + assertTrue(out.toString(), out.toString().contains(expected)); + out.reset(); + } + @Test public void testSetAndClearQuota() throws Exception { String nsId = "ns0"; @@ -446,10 +555,10 @@ public void testSetAndClearQuota() throws Exception { // verify the default quota set assertEquals(RouterQuotaUsage.QUOTA_USAGE_COUNT_DEFAULT, quotaUsage.getFileAndDirectoryCount()); - assertEquals(HdfsConstants.QUOTA_DONT_SET, quotaUsage.getQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quotaUsage.getQuota()); assertEquals(RouterQuotaUsage.QUOTA_USAGE_COUNT_DEFAULT, quotaUsage.getSpaceConsumed()); - assertEquals(HdfsConstants.QUOTA_DONT_SET, quotaUsage.getSpaceQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quotaUsage.getSpaceQuota()); long nsQuota = 50; long ssQuota = 100; @@ -493,23 +602,36 @@ public void testSetAndClearQuota() throws Exception { quotaUsage = mountTable.getQuota(); // verify if quota unset successfully - assertEquals(HdfsConstants.QUOTA_DONT_SET, quotaUsage.getQuota()); - assertEquals(HdfsConstants.QUOTA_DONT_SET, quotaUsage.getSpaceQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quotaUsage.getQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quotaUsage.getSpaceQuota()); + + // verify wrong arguments + System.setErr(new PrintStream(err)); + argv = new String[] {"-clrQuota", src, "check"}; + ToolRunner.run(admin, argv); + assertTrue(err.toString(), + err.toString().contains("Too many arguments, Max=1 argument allowed")); + + argv = new String[] {"-setQuota", src, "check", "check2"}; + err.reset(); + ToolRunner.run(admin, argv); + assertTrue(err.toString().contains("Invalid argument : check")); } @Test public void testManageSafeMode() throws Exception { // ensure the Router become RUNNING state waitState(RouterServiceState.RUNNING); - assertFalse(routerContext.getRouter().getRpcServer().isInSafeMode()); + assertFalse(routerContext.getRouter().getSafemodeService().isInSafeMode()); assertEquals(0, ToolRunner.run(admin, new String[] {"-safemode", "enter"})); // verify state assertEquals(RouterServiceState.SAFEMODE, routerContext.getRouter().getRouterState()); - assertTrue(routerContext.getRouter().getRpcServer().isInSafeMode()); + assertTrue(routerContext.getRouter().getSafemodeService().isInSafeMode()); System.setOut(new PrintStream(out)); + System.setErr(new PrintStream(err)); assertEquals(0, ToolRunner.run(admin, new String[] {"-safemode", "get"})); assertTrue(out.toString().contains("true")); @@ -519,12 +641,25 @@ public void testManageSafeMode() throws Exception { // verify state assertEquals(RouterServiceState.RUNNING, routerContext.getRouter().getRouterState()); - assertFalse(routerContext.getRouter().getRpcServer().isInSafeMode()); + assertFalse(routerContext.getRouter().getSafemodeService().isInSafeMode()); out.reset(); assertEquals(0, ToolRunner.run(admin, new String[] {"-safemode", "get"})); assertTrue(out.toString().contains("false")); + + out.reset(); + assertEquals(-1, ToolRunner.run(admin, + new String[] {"-safemode", "get", "-random", "check" })); + assertTrue(err.toString(), err.toString() + .contains("safemode: Too many arguments, Max=1 argument allowed only")); + err.reset(); + + assertEquals(-1, + ToolRunner.run(admin, new String[] {"-safemode", "check" })); + assertTrue(err.toString(), + err.toString().contains("safemode: Invalid argument: check")); + err.reset(); } @Test @@ -595,6 +730,15 @@ public void testNameserviceManager() throws Exception { new String[] {"-nameservice", "wrong", "ns0"})); assertTrue("Got error: " + err.toString(), err.toString().startsWith("nameservice: Unknown command: wrong")); + + err.reset(); + ToolRunner.run(admin, + new String[] {"-nameservice", "enable", "ns0", "check"}); + assertTrue( + err.toString().contains("Too many arguments, Max=2 arguments allowed")); + err.reset(); + ToolRunner.run(admin, new String[] {"-getDisabledNameservices", "check"}); + assertTrue(err.toString().contains("No arguments allowed")); } /** @@ -762,6 +906,43 @@ public void testUpdateOrderMountTable() throws Exception { testUpdateOrderMountTable(DestinationOrder.HASH_ALL); } + @Test + public void testOrderErrorMsg() throws Exception { + String nsId = "ns0"; + DestinationOrder order = DestinationOrder.HASH; + String src = "/testod" + order.toString(); + String dest = "/testUpd"; + String[] argv = new String[] {"-add", src, nsId, dest}; + assertEquals(0, ToolRunner.run(admin, argv)); + + stateStore.loadCache(MountTableStoreImpl.class, true); + GetMountTableEntriesRequest getRequest = GetMountTableEntriesRequest + .newInstance(src); + GetMountTableEntriesResponse getResponse = client.getMountTableManager() + .getMountTableEntries(getRequest); + + // Ensure mount table added successfully + MountTable mountTable = getResponse.getEntries().get(0); + assertEquals(src, mountTable.getSourcePath()); + assertEquals(nsId, mountTable.getDestinations().get(0).getNameserviceId()); + assertEquals(dest, mountTable.getDestinations().get(0).getDest()); + assertEquals(DestinationOrder.HASH, mountTable.getDestOrder()); + + argv = new String[] {"-update", src, nsId, dest, "-order", + order.toString()}; + assertEquals(0, ToolRunner.run(admin, argv)); + + // Update the order with wrong command + argv = new String[] {"-update", src + "a", nsId, dest + "a", "-orde", + order.toString()}; + assertEquals(-1, ToolRunner.run(admin, argv)); + + // Update without order argument + argv = new String[] {"-update", src, nsId, dest, order.toString()}; + assertEquals(-1, ToolRunner.run(admin, argv)); + + } + private void testUpdateOrderMountTable(DestinationOrder order) throws Exception { // Add a mount table diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java index 431b394796b..6a29446f802 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.NamenodeContext; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext; @@ -460,8 +461,10 @@ private MountTable getMountTable(String path) throws IOException { public void testQuotaSynchronization() throws IOException { long updateNsQuota = 3; long updateSsQuota = 4; + FileSystem nnFs = nnContext1.getFileSystem(); + nnFs.mkdirs(new Path("/testsync")); MountTable mountTable = MountTable.newInstance("/quotaSync", - Collections.singletonMap("ns0", "/"), Time.now(), Time.now()); + Collections.singletonMap("ns0", "/testsync"), Time.now(), Time.now()); mountTable.setQuota(new RouterQuotaUsage.Builder().quota(1) .spaceQuota(2).build()); // Add new mount table @@ -469,7 +472,7 @@ public void testQuotaSynchronization() throws IOException { // ensure the quota is not set as updated value QuotaUsage realQuota = nnContext1.getFileSystem() - .getQuotaUsage(new Path("/")); + .getQuotaUsage(new Path("/testsync")); assertNotEquals(updateNsQuota, realQuota.getQuota()); assertNotEquals(updateSsQuota, realQuota.getSpaceQuota()); @@ -489,9 +492,26 @@ public void testQuotaSynchronization() throws IOException { // verify if the quota is updated in real path realQuota = nnContext1.getFileSystem().getQuotaUsage( - new Path("/")); + new Path("/testsync")); assertEquals(updateNsQuota, realQuota.getQuota()); assertEquals(updateSsQuota, realQuota.getSpaceQuota()); + + // Clear the quota + mountTable.setQuota(new RouterQuotaUsage.Builder() + .quota(HdfsConstants.QUOTA_RESET) + .spaceQuota(HdfsConstants.QUOTA_RESET).build()); + + updateRequest = UpdateMountTableEntryRequest + .newInstance(mountTable); + client = routerContext.getAdminClient(); + mountTableManager = client.getMountTableManager(); + mountTableManager.updateMountTableEntry(updateRequest); + + // verify if the quota is updated in real path + realQuota = nnContext1.getFileSystem().getQuotaUsage( + new Path("/testsync")); + assertEquals(HdfsConstants.QUOTA_RESET, realQuota.getQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, realQuota.getSpaceQuota()); } @Test diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuotaManager.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuotaManager.java index ce3ee174757..4a1dd2e091b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuotaManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuotaManager.java @@ -81,8 +81,8 @@ public void testGetQuotaUsage() { // test case2: get quota from an no-quota set path RouterQuotaUsage.Builder quota = new RouterQuotaUsage.Builder() - .quota(HdfsConstants.QUOTA_DONT_SET) - .spaceQuota(HdfsConstants.QUOTA_DONT_SET); + .quota(HdfsConstants.QUOTA_RESET) + .spaceQuota(HdfsConstants.QUOTA_RESET); manager.put("/noQuotaSet", quota.build()); quotaGet = manager.getQuotaUsage("/noQuotaSet"); // it should return null @@ -90,36 +90,36 @@ public void testGetQuotaUsage() { // test case3: get quota from an quota-set path quota.quota(1); - quota.spaceQuota(HdfsConstants.QUOTA_DONT_SET); + quota.spaceQuota(HdfsConstants.QUOTA_RESET); manager.put("/hasQuotaSet", quota.build()); quotaGet = manager.getQuotaUsage("/hasQuotaSet"); assertEquals(1, quotaGet.getQuota()); - assertEquals(HdfsConstants.QUOTA_DONT_SET, quotaGet.getSpaceQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quotaGet.getSpaceQuota()); // test case4: get quota with an non-exist child path quotaGet = manager.getQuotaUsage("/hasQuotaSet/file"); // it will return the nearest ancestor which quota was set assertEquals(1, quotaGet.getQuota()); - assertEquals(HdfsConstants.QUOTA_DONT_SET, quotaGet.getSpaceQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quotaGet.getSpaceQuota()); // test case5: get quota with an child path which its parent // wasn't quota set - quota.quota(HdfsConstants.QUOTA_DONT_SET); - quota.spaceQuota(HdfsConstants.QUOTA_DONT_SET); + quota.quota(HdfsConstants.QUOTA_RESET); + quota.spaceQuota(HdfsConstants.QUOTA_RESET); manager.put("/hasQuotaSet/noQuotaSet", quota.build()); // here should returns the quota of path /hasQuotaSet // (the nearest ancestor which quota was set) quotaGet = manager.getQuotaUsage("/hasQuotaSet/noQuotaSet/file"); assertEquals(1, quotaGet.getQuota()); - assertEquals(HdfsConstants.QUOTA_DONT_SET, quotaGet.getSpaceQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quotaGet.getSpaceQuota()); // test case6: get quota with an child path which its parent was quota set quota.quota(2); - quota.spaceQuota(HdfsConstants.QUOTA_DONT_SET); + quota.spaceQuota(HdfsConstants.QUOTA_RESET); manager.put("/hasQuotaSet/hasQuotaSet", quota.build()); // here should return the quota of path /hasQuotaSet/hasQuotaSet quotaGet = manager.getQuotaUsage("/hasQuotaSet/hasQuotaSet/file"); assertEquals(2, quotaGet.getQuota()); - assertEquals(HdfsConstants.QUOTA_DONT_SET, quotaGet.getSpaceQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quotaGet.getSpaceQuota()); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterSafemode.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterSafemode.java index f16ceb58f44..9c1aeb2b3f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterSafemode.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterSafemode.java @@ -28,14 +28,17 @@ import static org.junit.Assert.fail; import java.io.IOException; +import java.net.InetSocketAddress; import java.net.URISyntaxException; import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder; +import org.apache.hadoop.hdfs.tools.federation.RouterAdmin; import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.service.Service.STATE; import org.apache.hadoop.util.Time; +import org.apache.hadoop.util.ToolRunner; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -60,12 +63,12 @@ public static void create() throws IOException { // 2 sec startup standby conf.setTimeDuration(DFS_ROUTER_SAFEMODE_EXTENSION, TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS); - // 1 sec cache refresh + // 200 ms cache refresh conf.setTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS, - TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS); - // 2 sec post cache update before entering safemode (2 intervals) + 200, TimeUnit.MILLISECONDS); + // 1 sec post cache update before entering safemode (2 intervals) conf.setTimeDuration(DFS_ROUTER_SAFEMODE_EXPIRATION, - TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS); + TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS); conf.set(RBFConfigKeys.DFS_ROUTER_RPC_BIND_HOST_KEY, "0.0.0.0"); conf.set(RBFConfigKeys.DFS_ROUTER_RPC_ADDRESS_KEY, "127.0.0.1:0"); @@ -77,6 +80,7 @@ public static void create() throws IOException { // RPC + State Store + Safe Mode only conf = new RouterConfigBuilder(conf) .rpc() + .admin() .safemode() .stateStore() .metrics() @@ -118,7 +122,7 @@ public void testSafemodeService() throws IOException { public void testRouterExitSafemode() throws InterruptedException, IllegalStateException, IOException { - assertTrue(router.getRpcServer().isInSafeMode()); + assertTrue(router.getSafemodeService().isInSafeMode()); verifyRouter(RouterServiceState.SAFEMODE); // Wait for initial time in milliseconds @@ -129,7 +133,7 @@ public void testRouterExitSafemode() TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS); Thread.sleep(interval); - assertFalse(router.getRpcServer().isInSafeMode()); + assertFalse(router.getSafemodeService().isInSafeMode()); verifyRouter(RouterServiceState.RUNNING); } @@ -138,7 +142,7 @@ public void testRouterEnterSafemode() throws IllegalStateException, IOException, InterruptedException { // Verify starting state - assertTrue(router.getRpcServer().isInSafeMode()); + assertTrue(router.getSafemodeService().isInSafeMode()); verifyRouter(RouterServiceState.SAFEMODE); // We should be in safe mode for DFS_ROUTER_SAFEMODE_EXTENSION time @@ -157,7 +161,7 @@ public void testRouterEnterSafemode() Thread.sleep(interval1); // Running - assertFalse(router.getRpcServer().isInSafeMode()); + assertFalse(router.getSafemodeService().isInSafeMode()); verifyRouter(RouterServiceState.RUNNING); // Disable cache @@ -167,12 +171,12 @@ public void testRouterEnterSafemode() long interval2 = conf.getTimeDuration(DFS_ROUTER_SAFEMODE_EXPIRATION, TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS) + - conf.getTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS, + 2 * conf.getTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS, TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS); Thread.sleep(interval2); // Safemode - assertTrue(router.getRpcServer().isInSafeMode()); + assertTrue(router.getSafemodeService().isInSafeMode()); verifyRouter(RouterServiceState.SAFEMODE); } @@ -180,7 +184,7 @@ public void testRouterEnterSafemode() public void testRouterRpcSafeMode() throws IllegalStateException, IOException { - assertTrue(router.getRpcServer().isInSafeMode()); + assertTrue(router.getSafemodeService().isInSafeMode()); verifyRouter(RouterServiceState.SAFEMODE); // If the Router is in Safe Mode, we should get a SafeModeException @@ -194,6 +198,38 @@ public void testRouterRpcSafeMode() assertTrue("We should have thrown a safe mode exception", exception); } + @Test + public void testRouterManualSafeMode() throws Exception { + InetSocketAddress adminAddr = router.getAdminServerAddress(); + conf.setSocketAddr(RBFConfigKeys.DFS_ROUTER_ADMIN_ADDRESS_KEY, adminAddr); + RouterAdmin admin = new RouterAdmin(conf); + + assertTrue(router.getSafemodeService().isInSafeMode()); + verifyRouter(RouterServiceState.SAFEMODE); + + // Wait until the Router exit start up safe mode + long interval = conf.getTimeDuration(DFS_ROUTER_SAFEMODE_EXTENSION, + TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS) + 300; + Thread.sleep(interval); + verifyRouter(RouterServiceState.RUNNING); + + // Now enter safe mode via Router admin command - it should work + assertEquals(0, ToolRunner.run(admin, new String[] {"-safemode", "enter"})); + verifyRouter(RouterServiceState.SAFEMODE); + + // Wait for update interval of the safe mode service, it should still in + // safe mode. + interval = 2 * conf.getTimeDuration( + DFS_ROUTER_CACHE_TIME_TO_LIVE_MS, TimeUnit.SECONDS.toMillis(1), + TimeUnit.MILLISECONDS); + Thread.sleep(interval); + verifyRouter(RouterServiceState.SAFEMODE); + + // Exit safe mode via admin command + assertEquals(0, ToolRunner.run(admin, new String[] {"-safemode", "leave"})); + verifyRouter(RouterServiceState.RUNNING); + } + private void verifyRouter(RouterServiceState status) throws IllegalStateException, IOException { assertEquals(status, router.getRouterState()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java index 3cf7c9198a9..f8be9f0a05b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hdfs.server.federation.store.driver; import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.getStateStoreConfiguration; +import static org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.FEDERATION_STORE_ZK_PARENT_PATH; +import static org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import java.io.IOException; import java.util.concurrent.TimeUnit; @@ -29,7 +33,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys; +import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils; import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl; +import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord; +import org.apache.hadoop.hdfs.server.federation.store.records.DisabledNameservice; +import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState; +import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; +import org.apache.hadoop.hdfs.server.federation.store.records.RouterState; +import org.apache.zookeeper.CreateMode; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; @@ -42,6 +53,7 @@ private static TestingServer curatorTestingServer; private static CuratorFramework curatorFramework; + private static String baseZNode; @BeforeClass public static void setupCluster() throws Exception { @@ -61,6 +73,9 @@ public static void setupCluster() throws Exception { // Disable auto-repair of connection conf.setLong(RBFConfigKeys.FEDERATION_STORE_CONNECTION_TEST_MS, TimeUnit.HOURS.toMillis(1)); + + baseZNode = conf.get(FEDERATION_STORE_ZK_PARENT_PATH, + FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT); getStateStore(conf); } @@ -78,6 +93,44 @@ public void startup() throws IOException { removeAll(getStateStoreDriver()); } + private String generateFakeZNode( + Class recordClass) throws IOException { + String nodeName = StateStoreUtils.getRecordName(recordClass); + String primaryKey = "test"; + + if (nodeName != null) { + return baseZNode + "/" + nodeName + "/" + primaryKey; + } + return null; + } + + private void testGetNullRecord(StateStoreDriver driver) throws Exception { + testGetNullRecord(driver, MembershipState.class); + testGetNullRecord(driver, MountTable.class); + testGetNullRecord(driver, RouterState.class); + testGetNullRecord(driver, DisabledNameservice.class); + } + + private void testGetNullRecord( + StateStoreDriver driver, Class recordClass) throws Exception { + driver.removeAll(recordClass); + + String znode = generateFakeZNode(recordClass); + assertNull(curatorFramework.checkExists().forPath(znode)); + + curatorFramework.create().withMode(CreateMode.PERSISTENT) + .withACL(null).forPath(znode, null); + assertNotNull(curatorFramework.checkExists().forPath(znode)); + + driver.get(recordClass); + assertNull(curatorFramework.checkExists().forPath(znode)); + } + + @Test + public void testGetNullRecord() throws Exception { + testGetNullRecord(getStateStoreDriver()); + } + @Test public void testInsert() throws IllegalArgumentException, IllegalAccessException, IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMountTable.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMountTable.java index 43cf1766009..055527384eb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMountTable.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMountTable.java @@ -84,9 +84,9 @@ public void testGetterSetter() throws IOException { RouterQuotaUsage quota = record.getQuota(); assertEquals(0, quota.getFileAndDirectoryCount()); - assertEquals(HdfsConstants.QUOTA_DONT_SET, quota.getQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quota.getQuota()); assertEquals(0, quota.getSpaceConsumed()); - assertEquals(HdfsConstants.QUOTA_DONT_SET, quota.getSpaceQuota()); + assertEquals(HdfsConstants.QUOTA_RESET, quota.getSpaceQuota()); MountTable record2 = MountTable.newInstance(SRC, DST_MAP, DATE_CREATED, DATE_MOD); diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.1.1.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.1.1.xml new file mode 100644 index 00000000000..80c49469f44 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.1.1.xml @@ -0,0 +1,676 @@ + + + + + + + + + + + A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem}. This is loosely modelled after +Google's GFS.

+ +

The most important difference is that unlike GFS, Hadoop DFS files +have strictly one writer at any one time. Bytes are always appended +to the end of the writer's stream. There is no notion of "record appends" +or "mutations" that are then checked or reordered. Writers simply emit +a byte stream. That byte stream is guaranteed to be stored in the +order written.

]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This method must return as quickly as possible, since it's called + in a critical section of the NameNode's operation. + + @param succeeded Whether authorization succeeded. + @param userName Name of the user executing the request. + @param addr Remote address of the request. + @param cmd The requested command. + @param src Path of affected source file. + @param dst Path of affected destination file (if any). + @param stat File information for operations that change the file's + metadata (permissions, owner, times, etc).]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index fcd5ae1940f..065c175de44 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -580,6 +580,23 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> - + + java9 + + [9,) + + + + + maven-compiler-plugin + + + org/apache/hadoop/hdfs/TestDFSClientFailover.java + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index 38be348bbdf..94426a561fb 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -62,7 +62,8 @@ function hadoop_usage hadoop_add_subcommand "portmap" daemon "run a portmap service" hadoop_add_subcommand "secondarynamenode" daemon "run the DFS secondary namenode" hadoop_add_subcommand "snapshotDiff" client "diff two snapshots of a directory or diff the current directory contents with a snapshot" - hadoop_add_subcommand "storagepolicies" admin "list/get/set block storage policies" + hadoop_add_subcommand "storagepolicies" admin "list/get/set/satisfyStoragePolicy block storage policies" + hadoop_add_subcommand "sps" daemon "run external storagepolicysatisfier" hadoop_add_subcommand "version" client "print the version" hadoop_add_subcommand "zkfc" daemon "run the ZK Failover Controller daemon" hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" false @@ -201,6 +202,10 @@ function hdfscmd_case storagepolicies) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.StoragePolicyAdmin ;; + sps) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.server.sps.ExternalStoragePolicySatisfier + ;; version) HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo ;; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index cc902b00772..a7e7b9bf870 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -23,10 +23,12 @@ import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.net.DFSNetworkTopology; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRackFaultTolerant; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.RamDiskReplicaLruTracker; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.ReservedSpaceCalculator; +import org.apache.hadoop.hdfs.web.URLConnectionFactory; import org.apache.hadoop.http.HttpConfig; /** @@ -96,6 +98,8 @@ public static final int DFS_PROVIDED_ALIASMAP_INMEMORY_BATCH_SIZE_DEFAULT = 500; public static final String DFS_PROVIDED_ALIASMAP_INMEMORY_ENABLED = "dfs.provided.aliasmap.inmemory.enabled"; public static final boolean DFS_PROVIDED_ALIASMAP_INMEMORY_ENABLED_DEFAULT = false; + public static final String DFS_PROVIDED_ALIASMAP_INMEMORY_SERVER_LOG = "dfs.provided.aliasmap.inmemory.server.log"; + public static final boolean DFS_PROVIDED_ALIASMAP_INMEMORY_SERVER_LOG_DEFAULT = false; public static final String DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY = HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY; @@ -241,6 +245,9 @@ public static final int DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_DEFAULT = 1; + public static final String DFS_NAMENODE_MAX_CORRUPT_FILE_BLOCKS_RETURNED_KEY = "dfs.namenode.max-corrupt-file-blocks-returned"; + public static final int DFS_NAMENODE_MAX_CORRUPT_FILE_BLOCKS_RETURNED_DEFAULT = 100; + public static final String DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY = HdfsClientConfigKeys.DeprecatedKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY; public static final int DFS_NAMENODE_REPLICATION_MAX_STREAMS_DEFAULT = 2; @@ -383,11 +390,19 @@ public static final String DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS = "dfs.namenode.path.based.cache.refresh.interval.ms"; public static final long DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT = 30000L; + public static final String DFS_NAMENODE_CACHING_ENABLED_KEY = + "dfs.namenode.caching.enabled"; + public static final boolean DFS_NAMENODE_CACHING_ENABLED_DEFAULT = true; /** Pending period of block deletion since NameNode startup */ public static final String DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_SEC_KEY = "dfs.namenode.startup.delay.block.deletion.sec"; public static final long DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_SEC_DEFAULT = 0L; + /** Block deletion increment. */ + public static final String DFS_NAMENODE_BLOCK_DELETION_INCREMENT_KEY = + "dfs.namenode.block.deletion.increment"; + public static final int DFS_NAMENODE_BLOCK_DELETION_INCREMENT_DEFAULT = 1000; + public static final String DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES = HdfsClientConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES; public static final boolean DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT = @@ -607,6 +622,49 @@ public static final String DFS_MOVER_MAX_NO_MOVE_INTERVAL_KEY = "dfs.mover.max-no-move-interval"; public static final int DFS_MOVER_MAX_NO_MOVE_INTERVAL_DEFAULT = 60*1000; // One minute + // StoragePolicySatisfier (SPS) related configurations + public static final String DFS_STORAGE_POLICY_SATISFIER_MODE_KEY = + "dfs.storage.policy.satisfier.mode"; + public static final String DFS_STORAGE_POLICY_SATISFIER_MODE_DEFAULT = + StoragePolicySatisfierMode.NONE.toString(); + public static final String DFS_STORAGE_POLICY_SATISFIER_QUEUE_LIMIT_KEY = + "dfs.storage.policy.satisfier.queue.limit"; + public static final int DFS_STORAGE_POLICY_SATISFIER_QUEUE_LIMIT_DEFAULT = + 1000; + public static final String DFS_SPS_WORK_MULTIPLIER_PER_ITERATION = + "dfs.storage.policy.satisfier.work.multiplier.per.iteration"; + public static final int DFS_SPS_WORK_MULTIPLIER_PER_ITERATION_DEFAULT = + 1; + public static final String DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_KEY = + "dfs.storage.policy.satisfier.recheck.timeout.millis"; + public static final int DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_DEFAULT = + 1 * 60 * 1000; + public static final String DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_KEY = + "dfs.storage.policy.satisfier.self.retry.timeout.millis"; + public static final int DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_DEFAULT = + 5 * 60 * 1000; + public static final String DFS_STORAGE_POLICY_SATISFIER_MAX_RETRY_ATTEMPTS_KEY = + "dfs.storage.policy.satisfier.retry.max.attempts"; + public static final int DFS_STORAGE_POLICY_SATISFIER_MAX_RETRY_ATTEMPTS_DEFAULT = + 3; + public static final String DFS_SPS_MAX_OUTSTANDING_PATHS_KEY = + "dfs.storage.policy.satisfier.max.outstanding.paths"; + public static final int DFS_SPS_MAX_OUTSTANDING_PATHS_DEFAULT = 10000; + // SPS datanode cache config, defaulting to 5mins. + public static final String DFS_SPS_DATANODE_CACHE_REFRESH_INTERVAL_MS = + "dfs.storage.policy.satisfier.datanode.cache.refresh.interval.ms"; + public static final long DFS_SPS_DATANODE_CACHE_REFRESH_INTERVAL_MS_DEFAULT = + 300000L; + + // SPS keytab configurations, by default it is disabled. + public static final String DFS_SPS_ADDRESS_KEY = + "dfs.storage.policy.satisfier.address"; + public static final String DFS_SPS_ADDRESS_DEFAULT= "0.0.0.0:0"; + public static final String DFS_SPS_KEYTAB_FILE_KEY = + "dfs.storage.policy.satisfier.keytab.file"; + public static final String DFS_SPS_KERBEROS_PRINCIPAL_KEY = + "dfs.storage.policy.satisfier.kerberos.principal"; + public static final String DFS_DATANODE_ADDRESS_KEY = "dfs.datanode.address"; public static final int DFS_DATANODE_DEFAULT_PORT = 9866; public static final String DFS_DATANODE_ADDRESS_DEFAULT = "0.0.0.0:" + DFS_DATANODE_DEFAULT_PORT; @@ -1028,6 +1086,8 @@ public static final String DFS_QJOURNAL_GET_JOURNAL_STATE_TIMEOUT_KEY = "dfs.qjournal.get-journal-state.timeout.ms"; public static final String DFS_QJOURNAL_NEW_EPOCH_TIMEOUT_KEY = "dfs.qjournal.new-epoch.timeout.ms"; public static final String DFS_QJOURNAL_WRITE_TXNS_TIMEOUT_KEY = "dfs.qjournal.write-txns.timeout.ms"; + public static final String DFS_QJOURNAL_HTTP_OPEN_TIMEOUT_KEY = "dfs.qjournal.http.open.timeout.ms"; + public static final String DFS_QJOURNAL_HTTP_READ_TIMEOUT_KEY = "dfs.qjournal.http.read.timeout.ms"; public static final int DFS_QJOURNAL_START_SEGMENT_TIMEOUT_DEFAULT = 20000; public static final int DFS_QJOURNAL_PREPARE_RECOVERY_TIMEOUT_DEFAULT = 120000; public static final int DFS_QJOURNAL_ACCEPT_RECOVERY_TIMEOUT_DEFAULT = 120000; @@ -1036,6 +1096,8 @@ public static final int DFS_QJOURNAL_GET_JOURNAL_STATE_TIMEOUT_DEFAULT = 120000; public static final int DFS_QJOURNAL_NEW_EPOCH_TIMEOUT_DEFAULT = 120000; public static final int DFS_QJOURNAL_WRITE_TXNS_TIMEOUT_DEFAULT = 20000; + public static final int DFS_QJOURNAL_HTTP_OPEN_TIMEOUT_DEFAULT = URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT; + public static final int DFS_QJOURNAL_HTTP_READ_TIMEOUT_DEFAULT = URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT; public static final String DFS_MAX_NUM_BLOCKS_TO_LOG_KEY = "dfs.namenode.max-num-blocks-to-log"; public static final long DFS_MAX_NUM_BLOCKS_TO_LOG_DEFAULT = 1000l; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index f7cd32b5585..6dd366f6504 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -64,8 +64,8 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -99,7 +99,8 @@ @InterfaceAudience.Private public class DFSUtil { - public static final Log LOG = LogFactory.getLog(DFSUtil.class.getName()); + public static final Logger LOG = + LoggerFactory.getLogger(DFSUtil.class.getName()); private DFSUtil() { /* Hidden constructor */ } @@ -1455,7 +1456,7 @@ public static int getReplWorkMultiplier(Configuration conf) { "It should be a positive, non-zero integer value."); return blocksReplWorkMultiplier; } - + /** * Get SPNEGO keytab Key from configuration * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HdfsDtFetcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HdfsDtFetcher.java index 02aa4b98f31..4fcc319e8c7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HdfsDtFetcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HdfsDtFetcher.java @@ -21,8 +21,8 @@ import java.io.IOException; import java.net.URI; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -40,7 +40,8 @@ * compilation units. Resolution of fetcher impl will be done at runtime. */ public class HdfsDtFetcher implements DtFetcher { - private static final Log LOG = LogFactory.getLog(HdfsDtFetcher.class); + private static final Logger LOG = + LoggerFactory.getLogger(HdfsDtFetcher.class); private static final String SERVICE_NAME = HdfsConstants.HDFS_URI_SCHEME; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java index b63d26b85ae..bb555ef2592 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java @@ -25,8 +25,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo; @@ -73,7 +73,8 @@ @InterfaceAudience.Private public class NameNodeProxies { - private static final Log LOG = LogFactory.getLog(NameNodeProxies.class); + private static final Logger LOG = + LoggerFactory.getLogger(NameNodeProxies.class); /** * Creates the namenode proxy with the passed protocol. This will handle diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SWebHdfsDtFetcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SWebHdfsDtFetcher.java index 46f9b007699..18dd720f621 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SWebHdfsDtFetcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SWebHdfsDtFetcher.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.web.WebHdfsConstants; import org.apache.hadoop.io.Text; @@ -28,7 +28,8 @@ * DtFetcher for SWebHdfsFileSystem using the base class HdfsDtFetcher impl. */ public class SWebHdfsDtFetcher extends HdfsDtFetcher { - private static final Log LOG = LogFactory.getLog(SWebHdfsDtFetcher.class); + private static final Logger LOG = + LoggerFactory.getLogger(SWebHdfsDtFetcher.class); private static final String SERVICE_NAME = WebHdfsConstants.SWEBHDFS_SCHEME; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/WebHdfsDtFetcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/WebHdfsDtFetcher.java index c2bb8522e84..e8ef5d734e7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/WebHdfsDtFetcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/WebHdfsDtFetcher.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.web.WebHdfsConstants; import org.apache.hadoop.io.Text; @@ -28,7 +28,8 @@ * DtFetcher for WebHdfsFileSystem using the base class HdfsDtFetcher impl. */ public class WebHdfsDtFetcher extends HdfsDtFetcher { - private static final Log LOG = LogFactory.getLog(WebHdfsDtFetcher.class); + private static final Logger LOG = + LoggerFactory.getLogger(WebHdfsDtFetcher.class); private static final String SERVICE_NAME = WebHdfsConstants.WEBHDFS_SCHEME; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java index 5425bd5af5a..5d881d09385 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java @@ -21,14 +21,14 @@ import java.io.IOException; import java.net.SocketTimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.net.unix.DomainSocket; @InterfaceAudience.Private public class DomainPeerServer implements PeerServer { - static final Log LOG = LogFactory.getLog(DomainPeerServer.class); + static final Logger LOG = LoggerFactory.getLogger(DomainPeerServer.class); private final DomainSocket sock; DomainPeerServer(DomainSocket sock) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java index 40d2b330def..9fc6692b4aa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java @@ -23,8 +23,8 @@ import java.net.SocketTimeoutException; import java.nio.channels.ServerSocketChannel; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter.SecureResources; @@ -32,7 +32,7 @@ @InterfaceAudience.Private public class TcpPeerServer implements PeerServer { - static final Log LOG = LogFactory.getLog(TcpPeerServer.class); + static final Logger LOG = LoggerFactory.getLogger(TcpPeerServer.class); private final ServerSocket serverSocket; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java index ac46d5291ee..e51529e2b1b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java @@ -225,6 +225,8 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdateBlockForPipelineResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdatePipelineRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdatePipelineResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SatisfyStoragePolicyRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SatisfyStoragePolicyResponseProto; import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos.CreateEncryptionZoneResponseProto; import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos.CreateEncryptionZoneRequestProto; import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos.GetEZForPathResponseProto; @@ -253,7 +255,7 @@ import org.apache.hadoop.hdfs.protocol.proto.ErasureCodingProtos.UnsetErasureCodingPolicyResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ErasureCodingProtos.GetErasureCodingCodecsRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ErasureCodingProtos.GetErasureCodingCodecsResponseProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos; +import org.apache.hadoop.hdfs.protocol.proto.*; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockStoragePolicyProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DatanodeIDProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DatanodeInfoProto; @@ -407,6 +409,10 @@ private static final CheckAccessResponseProto VOID_CHECKACCESS_RESPONSE = CheckAccessResponseProto.getDefaultInstance(); + private static final SatisfyStoragePolicyResponseProto + VOID_SATISFYSTORAGEPOLICY_RESPONSE = SatisfyStoragePolicyResponseProto + .getDefaultInstance(); + /** * Constructor * @@ -1886,4 +1892,16 @@ public ListOpenFilesResponseProto listOpenFiles(RpcController controller, throw new ServiceException(e); } } + + @Override + public SatisfyStoragePolicyResponseProto satisfyStoragePolicy( + RpcController controller, + SatisfyStoragePolicyRequestProto request) throws ServiceException { + try { + server.satisfyStoragePolicy(request.getSrc()); + } catch (IOException e) { + throw new ServiceException(e); + } + return VOID_SATISFYSTORAGEPOLICY_RESPONSE; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java index 9cc45168835..e4125dc9e41 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java @@ -138,7 +138,8 @@ public HeartbeatResponse sendHeartbeat(DatanodeRegistration registration, VolumeFailureSummary volumeFailureSummary, boolean requestFullBlockReportLease, @Nonnull SlowPeerReports slowPeers, - @Nonnull SlowDiskReports slowDisks) throws IOException { + @Nonnull SlowDiskReports slowDisks) + throws IOException { HeartbeatRequestProto.Builder builder = HeartbeatRequestProto.newBuilder() .setRegistration(PBHelper.convert(registration)) .setXmitsInProgress(xmitsInProgress).setXceiverCount(xceiverCount) @@ -161,6 +162,7 @@ public HeartbeatResponse sendHeartbeat(DatanodeRegistration registration, if (slowDisks.haveSlowDisks()) { builder.addAllSlowDisks(PBHelper.convertSlowDiskInfo(slowDisks)); } + HeartbeatResponseProto resp; try { resp = rpcProxy.sendHeartbeat(NULL_CONTROLLER, builder.build()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InMemoryAliasMapProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InMemoryAliasMapProtocolClientSideTranslatorPB.java index 2025c16d1c3..d9e984b45c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InMemoryAliasMapProtocolClientSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/InMemoryAliasMapProtocolClientSideTranslatorPB.java @@ -167,6 +167,9 @@ public InMemoryAliasMapProtocolClientSideTranslatorPB( public Optional read(@Nonnull Block block) throws IOException { + if (block == null) { + throw new IOException("Block cannot be null"); + } ReadRequestProto request = ReadRequestProto .newBuilder() @@ -191,6 +194,9 @@ public InMemoryAliasMapProtocolClientSideTranslatorPB( public void write(@Nonnull Block block, @Nonnull ProvidedStorageLocation providedStorageLocation) throws IOException { + if (block == null || providedStorageLocation == null) { + throw new IOException("Provided block and location cannot be null"); + } WriteRequestProto request = WriteRequestProto .newBuilder() diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java index 90c2c492800..d9367fb2263 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java @@ -35,6 +35,8 @@ import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetEditLogManifestResponseProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetMostRecentCheckpointTxIdRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetMostRecentCheckpointTxIdResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetNextSPSPathRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetNextSPSPathResponseProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransactionIdRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransactionIdResponseProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.IsRollingUpgradeRequestProto; @@ -257,4 +259,20 @@ public IsRollingUpgradeResponseProto isRollingUpgrade( return IsRollingUpgradeResponseProto.newBuilder() .setIsRollingUpgrade(isRollingUpgrade).build(); } + + @Override + public GetNextSPSPathResponseProto getNextSPSPath( + RpcController controller, GetNextSPSPathRequestProto request) + throws ServiceException { + try { + Long nextSPSPath = impl.getNextSPSPath(); + if (nextSPSPath == null) { + return GetNextSPSPathResponseProto.newBuilder().build(); + } + return GetNextSPSPathResponseProto.newBuilder().setSpsPath(nextSPSPath) + .build(); + } catch (IOException e) { + throw new ServiceException(e); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java index 632f8b7d741..3bd5986de62 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java @@ -33,6 +33,8 @@ import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetBlocksRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetEditLogManifestRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetMostRecentCheckpointTxIdRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetNextSPSPathRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetNextSPSPathResponseProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransactionIdRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.IsRollingUpgradeRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.IsRollingUpgradeResponseProto; @@ -263,4 +265,17 @@ public boolean isRollingUpgrade() throws IOException { throw ProtobufHelper.getRemoteException(e); } } + + @Override + public Long getNextSPSPath() throws IOException { + GetNextSPSPathRequestProto req = + GetNextSPSPathRequestProto.newBuilder().build(); + try { + GetNextSPSPathResponseProto nextSPSPath = + rpcProxy.getNextSPSPath(NULL_CONTROLLER, req); + return nextSPSPath.hasSpsPath() ? nextSPSPath.getSpsPath() : null; + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java index d2b48ccec53..26337237119 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java @@ -89,8 +89,9 @@ AsyncLogger createLogger(Configuration conf, NamespaceInfo nsInfo, /** * Format the log directory. * @param nsInfo the namespace info to format with + * @param force the force option to format */ - public ListenableFuture format(NamespaceInfo nsInfo); + public ListenableFuture format(NamespaceInfo nsInfo, boolean force); /** * @return whether or not the remote node has any valid data. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java index d46c2cf790f..6302b2ac307 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java @@ -23,8 +23,8 @@ import java.util.Map; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto; @@ -46,7 +46,7 @@ * {@link QuorumCall} instances. */ class AsyncLoggerSet { - static final Log LOG = LogFactory.getLog(AsyncLoggerSet.class); + static final Logger LOG = LoggerFactory.getLogger(AsyncLoggerSet.class); private final List loggers; @@ -299,12 +299,12 @@ void appendReport(StringBuilder sb) { return QuorumCall.create(calls); } - QuorumCall format(NamespaceInfo nsInfo) { + QuorumCall format(NamespaceInfo nsInfo, boolean force) { Map> calls = Maps.newHashMap(); for (AsyncLogger logger : loggers) { ListenableFuture future = - logger.format(nsInfo); + logger.format(nsInfo, force); calls.put(logger, future); } return QuorumCall.create(calls); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java index 30367357b82..4fca1bb0ad0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java @@ -502,11 +502,12 @@ private synchronized void unreserveQueueSpace(int size) { } @Override - public ListenableFuture format(final NamespaceInfo nsInfo) { + public ListenableFuture format(final NamespaceInfo nsInfo, + final boolean force) { return singleThreadExecutor.submit(new Callable() { @Override public Void call() throws Exception { - getProxy().format(journalId, nameServiceId, nsInfo); + getProxy().format(journalId, nameServiceId, nsInfo, force); return null; } }); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java index 7a70a3de334..ba2b20a7bbc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java @@ -30,8 +30,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -65,7 +65,7 @@ */ @InterfaceAudience.Private public class QuorumJournalManager implements JournalManager { - static final Log LOG = LogFactory.getLog(QuorumJournalManager.class); + static final Logger LOG = LoggerFactory.getLogger(QuorumJournalManager.class); // Timeouts for which the QJM will wait for each of the following actions. private final int startSegmentTimeoutMs; @@ -124,8 +124,6 @@ public QuorumJournalManager(Configuration conf, this.nsInfo = nsInfo; this.nameServiceId = nameServiceId; this.loggers = new AsyncLoggerSet(createLoggers(loggerFactory)); - this.connectionFactory = URLConnectionFactory - .newDefaultURLConnectionFactory(conf); // Configure timeouts. this.startSegmentTimeoutMs = conf.getInt( @@ -156,6 +154,15 @@ public QuorumJournalManager(Configuration conf, .DFS_QJM_OPERATIONS_TIMEOUT, DFSConfigKeys.DFS_QJM_OPERATIONS_TIMEOUT_DEFAULT, TimeUnit .MILLISECONDS); + + int connectTimeoutMs = conf.getInt( + DFSConfigKeys.DFS_QJOURNAL_HTTP_OPEN_TIMEOUT_KEY, + DFSConfigKeys.DFS_QJOURNAL_HTTP_OPEN_TIMEOUT_DEFAULT); + int readTimeoutMs = conf.getInt( + DFSConfigKeys.DFS_QJOURNAL_HTTP_READ_TIMEOUT_KEY, + DFSConfigKeys.DFS_QJOURNAL_HTTP_READ_TIMEOUT_DEFAULT); + this.connectionFactory = URLConnectionFactory + .newDefaultURLConnectionFactory(connectTimeoutMs, readTimeoutMs, conf); } protected List createLoggers( @@ -213,8 +220,8 @@ public static void checkJournalId(String jid) { } @Override - public void format(NamespaceInfo nsInfo) throws IOException { - QuorumCall call = loggers.format(nsInfo); + public void format(NamespaceInfo nsInfo, boolean force) throws IOException { + QuorumCall call = loggers.format(nsInfo, force); try { call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "format"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java index 5558bd54721..8dad26104c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java @@ -68,7 +68,7 @@ GetJournalStateResponseProto getJournalState(String journalId, * Format the underlying storage for the given namespace. */ void format(String journalId, String nameServiceId, - NamespaceInfo nsInfo) throws IOException; + NamespaceInfo nsInfo, boolean force) throws IOException; /** * Begin a new epoch. See the HDFS-3077 design doc for details. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java index 865d2969220..2ad19da0960 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java @@ -147,7 +147,7 @@ public FormatResponseProto format(RpcController controller, try { impl.format(request.getJid().getIdentifier(), request.hasNameServiceId() ? request.getNameServiceId() : null, - PBHelper.convert(request.getNsInfo())); + PBHelper.convert(request.getNsInfo()), request.getForce()); return FormatResponseProto.getDefaultInstance(); } catch (IOException ioe) { throw new ServiceException(ioe); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java index d7cd7b55811..42d35f57fb5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java @@ -136,11 +136,13 @@ private JournalIdProto convertJournalId(String jid) { @Override public void format(String jid, String nameServiceId, - NamespaceInfo nsInfo) throws IOException { + NamespaceInfo nsInfo, + boolean force) throws IOException { try { FormatRequestProto.Builder req = FormatRequestProto.newBuilder() .setJid(convertJournalId(jid)) - .setNsInfo(PBHelper.convert(nsInfo)); + .setNsInfo(PBHelper.convert(nsInfo)) + .setForce(force); if(nameServiceId != null) { req.setNameServiceId(nameServiceId); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java index 64ac11ca234..81b3f8c1a1f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java @@ -31,9 +31,9 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.apache.commons.lang3.StringEscapeUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.apache.commons.text.StringEscapeUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -67,7 +67,8 @@ public class GetJournalEditServlet extends HttpServlet { private static final long serialVersionUID = -4635891628211723009L; - private static final Log LOG = LogFactory.getLog(GetJournalEditServlet.class); + private static final Logger LOG = + LoggerFactory.getLogger(GetJournalEditServlet.class); static final String STORAGEINFO_PARAM = "storageInfo"; static final String JOURNAL_ID_PARAM = "jid"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java index 6bf4903b1ec..612fd3d19f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java @@ -204,10 +204,10 @@ private static void purgeMatching(File dir, List patterns, } } - void format(NamespaceInfo nsInfo) throws IOException { + void format(NamespaceInfo nsInfo, boolean force) throws IOException { unlockAll(); try { - sd.analyzeStorage(StartupOption.FORMAT, this, true); + sd.analyzeStorage(StartupOption.FORMAT, this, !force); } finally { sd.unlock(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java index 8f25d260b64..39afabc0792 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java @@ -32,8 +32,8 @@ import java.util.concurrent.TimeUnit; import org.apache.commons.lang3.Range; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.qjournal.protocol.JournalNotFormattedException; @@ -79,7 +79,7 @@ * the same JVM. */ public class Journal implements Closeable { - static final Log LOG = LogFactory.getLog(Journal.class); + static final Logger LOG = LoggerFactory.getLogger(Journal.class); // Current writing state @@ -227,13 +227,13 @@ private synchronized EditLogFile scanStorageForLatestEdits() throws IOException /** * Format the local storage with the given namespace. */ - void format(NamespaceInfo nsInfo) throws IOException { + void format(NamespaceInfo nsInfo, boolean force) throws IOException { Preconditions.checkState(nsInfo.getNamespaceID() != 0, "can't format with uninitialized namespace info: %s", nsInfo); LOG.info("Formatting journal id : " + journalId + " with namespace info: " + - nsInfo); - storage.format(nsInfo); + nsInfo + " and force: " + force); + storage.format(nsInfo, force); refreshCachedData(); } @@ -1045,7 +1045,7 @@ private void persistPaxosData(long segmentTxId, public synchronized void doPreUpgrade() throws IOException { // Do not hold file lock on committedTxnId, because the containing // directory will be renamed. It will be reopened lazily on next access. - IOUtils.cleanup(LOG, committedTxnId); + IOUtils.cleanupWithLogger(LOG, committedTxnId); storage.getJournalManager().doPreUpgrade(); } @@ -1087,7 +1087,7 @@ public synchronized void doUpgrade(StorageInfo sInfo) throws IOException { lastWriterEpoch.set(prevLastWriterEpoch.get()); committedTxnId.set(prevCommittedTxnId.get()); } finally { - IOUtils.cleanup(LOG, prevCommittedTxnId); + IOUtils.cleanupWithLogger(LOG, prevCommittedTxnId); } } @@ -1109,7 +1109,7 @@ public Boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, public synchronized void doRollback() throws IOException { // Do not hold file lock on committedTxnId, because the containing // directory will be renamed. It will be reopened lazily on next access. - IOUtils.cleanup(LOG, committedTxnId); + IOUtils.cleanupWithLogger(LOG, committedTxnId); storage.getJournalManager().doRollback(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java index 11a5c0451e5..3df69f1448a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java @@ -21,8 +21,8 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; @@ -68,7 +68,7 @@ */ @InterfaceAudience.Private public class JournalNode implements Tool, Configurable, JournalNodeMXBean { - public static final Log LOG = LogFactory.getLog(JournalNode.class); + public static final Logger LOG = LoggerFactory.getLogger(JournalNode.class); private Configuration conf; private JournalNodeRpcServer rpcServer; private JournalNodeHttpServer httpServer; @@ -285,7 +285,7 @@ public void stop(int rc) { } for (Journal j : journalsById.values()) { - IOUtils.cleanup(LOG, j); + IOUtils.cleanupWithLogger(LOG, j); } DefaultMetricsSystem.shutdown(); @@ -403,7 +403,7 @@ private void registerJNMXBean() { private class ErrorReporter implements StorageErrorReporter { @Override public void reportErrorOnFile(File f) { - LOG.fatal("Error reported on file " + f + "... exiting", + LOG.error("Error reported on file " + f + "... exiting", new Exception()); stop(1); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java index b1a3c9665d7..bfa9a222e4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java @@ -19,7 +19,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.BlockingService; -import org.apache.commons.logging.Log; +import org.slf4j.Logger; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -61,7 +61,7 @@ @VisibleForTesting public class JournalNodeRpcServer implements QJournalProtocol, InterQJournalProtocol { - private static final Log LOG = JournalNode.LOG; + private static final Logger LOG = JournalNode.LOG; private static final int HANDLER_COUNT = 5; private final JournalNode jn; private Server server; @@ -176,9 +176,10 @@ public NewEpochResponseProto newEpoch(String journalId, @Override public void format(String journalId, String nameServiceId, - NamespaceInfo nsInfo) + NamespaceInfo nsInfo, + boolean force) throws IOException { - jn.getOrCreateJournal(journalId, nameServiceId).format(nsInfo); + jn.getOrCreateJournal(journalId, nameServiceId).format(nsInfo, force); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java index 3d0cf34e595..21fbbe41d86 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java @@ -29,8 +29,8 @@ import java.util.Map; import org.apache.commons.lang3.ArrayUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -58,7 +58,8 @@ @InterfaceAudience.Private public class BlockTokenSecretManager extends SecretManager { - public static final Log LOG = LogFactory.getLog(BlockTokenSecretManager.class); + public static final Logger LOG = + LoggerFactory.getLogger(BlockTokenSecretManager.class); public static final Token DUMMY_TOKEN = new Token(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java index 3547c96a80f..ca0e6434597 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java @@ -27,8 +27,8 @@ import java.util.List; import java.util.Map.Entry; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; @@ -62,8 +62,8 @@ public class DelegationTokenSecretManager extends AbstractDelegationTokenSecretManager { - private static final Log LOG = LogFactory - .getLog(DelegationTokenSecretManager.class); + private static final Logger LOG = LoggerFactory + .getLogger(DelegationTokenSecretManager.class); private final FSNamesystem namesystem; private final SerializerCompat serializerCompat = new SerializerCompat(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryLevelDBAliasMapServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryLevelDBAliasMapServer.java index 1d06f132854..5c56736be43 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryLevelDBAliasMapServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/aliasmap/InMemoryLevelDBAliasMapServer.java @@ -39,6 +39,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PROVIDED_ALIASMAP_INMEMORY_RPC_ADDRESS; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PROVIDED_ALIASMAP_INMEMORY_RPC_ADDRESS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PROVIDED_ALIASMAP_INMEMORY_RPC_BIND_HOST; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PROVIDED_ALIASMAP_INMEMORY_SERVER_LOG; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PROVIDED_ALIASMAP_INMEMORY_SERVER_LOG_DEFAULT; import static org.apache.hadoop.hdfs.DFSUtil.getBindAddress; import static org.apache.hadoop.hdfs.protocol.proto.AliasMapProtocolProtos.*; import static org.apache.hadoop.hdfs.server.aliasmap.InMemoryAliasMap.CheckedFunction2; @@ -87,13 +89,17 @@ public void start() throws IOException { DFS_PROVIDED_ALIASMAP_INMEMORY_RPC_ADDRESS_DEFAULT, DFS_PROVIDED_ALIASMAP_INMEMORY_RPC_BIND_HOST); + boolean setVerbose = conf.getBoolean( + DFS_PROVIDED_ALIASMAP_INMEMORY_SERVER_LOG, + DFS_PROVIDED_ALIASMAP_INMEMORY_SERVER_LOG_DEFAULT); + aliasMapServer = new RPC.Builder(conf) .setProtocol(AliasMapProtocolPB.class) .setInstance(aliasMapProtocolService) .setBindAddress(rpcAddress.getHostName()) .setPort(rpcAddress.getPort()) .setNumHandlers(1) - .setVerbose(true) + .setVerbose(setVerbose) .build(); LOG.info("Starting InMemoryLevelDBAliasMapServer on {}", rpcAddress); @@ -144,11 +150,15 @@ public Configuration getConf() { public void close() { LOG.info("Stopping InMemoryLevelDBAliasMapServer"); try { - aliasMap.close(); + if (aliasMap != null) { + aliasMap.close(); + } } catch (IOException e) { LOG.error(e.getMessage()); } - aliasMapServer.stop(); + if (aliasMapServer != null) { + aliasMapServer.stop(); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java index 426c7ab0749..a58e391b016 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java @@ -36,8 +36,8 @@ import java.util.Set; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -172,7 +172,7 @@ @InterfaceAudience.Private public class Balancer { - static final Log LOG = LogFactory.getLog(Balancer.class); + static final Logger LOG = LoggerFactory.getLogger(Balancer.class); static final Path BALANCER_ID_PATH = new Path("/system/balancer.id"); @@ -724,7 +724,7 @@ static int run(Collection namenodes, final BalancerParameters p, } } finally { for(NameNodeConnector nnc : connectors) { - IOUtils.cleanup(LOG, nnc); + IOUtils.cleanupWithLogger(LOG, nnc); } } return ExitStatus.SUCCESS.getExitCode(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java index 060c013e37d..8a71417befd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java @@ -44,8 +44,8 @@ import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; @@ -88,7 +88,7 @@ /** Dispatching block replica moves between datanodes. */ @InterfaceAudience.Private public class Dispatcher { - static final Log LOG = LogFactory.getLog(Dispatcher.class); + static final Logger LOG = LoggerFactory.getLogger(Dispatcher.class); /** * the period of time to delay the usage of a DataNode after hitting diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java index b0dd7790850..114167ca444 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java @@ -31,8 +31,8 @@ import java.util.concurrent.atomic.AtomicLong; import com.google.common.base.Preconditions; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -62,11 +62,13 @@ */ @InterfaceAudience.Private public class NameNodeConnector implements Closeable { - private static final Log LOG = LogFactory.getLog(NameNodeConnector.class); + private static final Logger LOG = + LoggerFactory.getLogger(NameNodeConnector.class); public static final int DEFAULT_MAX_IDLE_ITERATIONS = 5; private static boolean write2IdFile = true; - + private static boolean checkOtherInstanceRunning = true; + /** Create {@link NameNodeConnector} for the given namenodes. */ public static List newNameNodeConnectors( Collection namenodes, String name, Path idPath, Configuration conf, @@ -101,6 +103,11 @@ public static void setWrite2IdFile(boolean write2IdFile) { NameNodeConnector.write2IdFile = write2IdFile; } + @VisibleForTesting + public static void checkOtherInstanceRunning(boolean toCheck) { + NameNodeConnector.checkOtherInstanceRunning = toCheck; + } + private final URI nameNodeUri; private final String blockpoolID; @@ -111,7 +118,7 @@ public static void setWrite2IdFile(boolean write2IdFile) { private final DistributedFileSystem fs; private final Path idPath; - private final OutputStream out; + private OutputStream out; private final List targetPaths; private final AtomicLong bytesMoved = new AtomicLong(); @@ -141,10 +148,12 @@ public NameNodeConnector(String name, URI nameNodeUri, Path idPath, this.keyManager = new KeyManager(blockpoolID, namenode, defaults.getEncryptDataTransfer(), conf); // if it is for test, we do not create the id file - out = checkAndMarkRunning(); - if (out == null) { - // Exit if there is another one running. - throw new IOException("Another " + name + " is running."); + if (checkOtherInstanceRunning) { + out = checkAndMarkRunning(); + if (out == null) { + // Exit if there is another one running. + throw new IOException("Another " + name + " is running."); + } } } @@ -269,6 +278,14 @@ private OutputStream checkAndMarkRunning() throws IOException { } } + /** + * Returns fallbackToSimpleAuth. This will be true or false during calls to + * indicate if a secure client falls back to simple auth. + */ + public AtomicBoolean getFallbackToSimpleAuth() { + return fallbackToSimpleAuth; + } + @Override public void close() { keyManager.close(); @@ -277,13 +294,19 @@ public void close() { IOUtils.closeStream(out); if (fs != null) { try { - fs.delete(idPath, true); + if (checkOtherInstanceRunning) { + fs.delete(idPath, true); + } } catch(IOException ioe) { LOG.warn("Failed to delete " + idPath, ioe); } } } + public NamenodeProtocol getNNProtocolConnection() { + return this.namenode; + } + @Override public String toString() { return getClass().getSimpleName() + "[namenodeUri=" + nameNodeUri diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceBlockPlacementPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceBlockPlacementPolicy.java index 8435b46e7a0..8f76e8bbcf4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceBlockPlacementPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/AvailableSpaceBlockPlacementPolicy.java @@ -25,8 +25,8 @@ import java.util.Random; import com.google.common.base.Preconditions; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -39,8 +39,8 @@ */ public class AvailableSpaceBlockPlacementPolicy extends BlockPlacementPolicyDefault { - private static final Log LOG = LogFactory - .getLog(AvailableSpaceBlockPlacementPolicy.class); + private static final Logger LOG = LoggerFactory + .getLogger(AvailableSpaceBlockPlacementPolicy.class); private static final Random RAND = new Random(); private int balancedPreference = (int) (100 * DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java index 111ade10bc3..d160f61fc8f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java @@ -52,7 +52,7 @@ /** * Block collection ID. */ - private long bcId; + private volatile long bcId; /** For implementing {@link LightWeightGSet.LinkedElement} interface. */ private LightWeightGSet.LinkedElement nextLinkedElement; @@ -262,6 +262,10 @@ public boolean isComplete() { return getBlockUCState().equals(BlockUCState.COMPLETE); } + public boolean isUnderRecovery() { + return getBlockUCState().equals(BlockUCState.UNDER_RECOVERY); + } + public final boolean isCompleteOrCommitted() { final BlockUCState state = getBlockUCState(); return state.equals(BlockUCState.COMPLETE) || diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java index 790cd77de97..8bc63c1214d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java @@ -244,6 +244,10 @@ final boolean hasNoStorage() { return true; } + /** + * This class contains datanode storage information and block index in the + * block group. + */ public static class StorageAndBlockIndex { private final DatanodeStorageInfo storage; private final byte blockIndex; @@ -253,10 +257,16 @@ final boolean hasNoStorage() { this.blockIndex = blockIndex; } + /** + * @return storage in the datanode. + */ public DatanodeStorageInfo getStorage() { return storage; } + /** + * @return block index in the block group. + */ public byte getBlockIndex() { return blockIndex; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 72ea1c06926..675221a1ec5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -47,6 +47,7 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; + import javax.management.ObjectName; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -68,6 +69,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock; @@ -91,6 +93,7 @@ import org.apache.hadoop.hdfs.server.namenode.Namesystem; import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfyManager; import org.apache.hadoop.hdfs.server.protocol.BlockCommand; import org.apache.hadoop.hdfs.server.protocol.BlockReportContext; import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations; @@ -425,6 +428,12 @@ public long getTotalECBlockGroups() { private final BlockIdManager blockIdManager; + /** + * For satisfying block storage policies. Instantiates if sps is enabled + * internally or externally. + */ + private StoragePolicySatisfyManager spsManager; + /** Minimum live replicas needed for the datanode to be transitioned * from ENTERING_MAINTENANCE to IN_MAINTENANCE. */ @@ -464,6 +473,8 @@ public BlockManager(final Namesystem namesystem, boolean haEnabled, DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_DEFAULT) * 1000L); + createSPSManager(conf); + blockTokenSecretManager = createBlockTokenSecretManager(conf); providedStorageMap = new ProvidedStorageMap(namesystem, this, conf); @@ -691,6 +702,9 @@ public void activate(Configuration conf, long blockTotal) { } public void close() { + if (getSPSManager() != null) { + getSPSManager().stop(); + } bmSafeMode.close(); try { redundancyThread.interrupt(); @@ -971,6 +985,10 @@ public boolean commitOrCompleteLastBlock(BlockCollection bc, return false; // no blocks in file yet if(lastBlock.isComplete()) return false; // already completed (e.g. by syncBlock) + if(lastBlock.isUnderRecovery()) { + throw new IOException("Commit or complete block " + commitBlock + + ", whereas it is under recovery."); + } final boolean committed = commitBlock(lastBlock, commitBlock); if (committed && lastBlock.isStriped()) { @@ -4153,6 +4171,10 @@ void processExtraRedundancyBlocksOnInService( int numExtraRedundancy = 0; while(it.hasNext()) { final BlockInfo block = it.next(); + if (block.isDeleted()) { + //Orphan block, will be handled eventually, skip + continue; + } int expectedReplication = this.getExpectedRedundancyNum(block); NumberReplicas num = countNodes(block); if (shouldProcessExtraRedundancy(num, expectedReplication)) { @@ -4428,6 +4450,14 @@ public long getMissingReplOneBlocksCount() { return this.neededReconstruction.getCorruptReplicationOneBlockSize(); } + public long getHighestPriorityReplicatedBlockCount(){ + return this.neededReconstruction.getHighestPriorityReplicatedBlockCount(); + } + + public long getHighestPriorityECBlockCount(){ + return this.neededReconstruction.getHighestPriorityECBlockCount(); + } + public BlockInfo addBlockCollection(BlockInfo block, BlockCollection bc) { return blocksMap.addBlockCollection(block, bc); @@ -4972,4 +5002,62 @@ public void setBlockRecoveryTimeout(long blockRecoveryTimeout) { public ProvidedStorageMap getProvidedStorageMap() { return providedStorageMap; } + + /** + * Create SPS manager instance. It manages the user invoked sps paths and does + * the movement. + * + * @param conf + * configuration + * @return true if the instance is successfully created, false otherwise. + */ + private boolean createSPSManager(final Configuration conf) { + return createSPSManager(conf, null); + } + + /** + * Create SPS manager instance. It manages the user invoked sps paths and does + * the movement. + * + * @param conf + * configuration + * @param spsMode + * satisfier mode + * @return true if the instance is successfully created, false otherwise. + */ + public boolean createSPSManager(final Configuration conf, + final String spsMode) { + // sps manager manages the user invoked sps paths and does the movement. + // StoragePolicySatisfier(SPS) configs + boolean storagePolicyEnabled = conf.getBoolean( + DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY, + DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT); + String modeVal = spsMode; + if (org.apache.commons.lang3.StringUtils.isBlank(modeVal)) { + modeVal = conf.get(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_DEFAULT); + } + StoragePolicySatisfierMode mode = StoragePolicySatisfierMode + .fromString(modeVal); + if (!storagePolicyEnabled || mode == StoragePolicySatisfierMode.NONE) { + LOG.info("Storage policy satisfier is disabled"); + return false; + } + spsManager = new StoragePolicySatisfyManager(conf, namesystem); + return true; + } + + /** + * Nullify SPS manager as this feature is disabled fully. + */ + public void disableSPS() { + spsManager = null; + } + + /** + * @return sps manager. + */ + public StoragePolicySatisfyManager getSPSManager() { + return spsManager; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java index 8de17ef5c37..5a981e96af6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java @@ -418,8 +418,10 @@ boolean leaveSafeMode(boolean force) { } /** - * Increment number of safe blocks if current block has reached minimal - * replication. + * Increment number of safe blocks if the current block is contiguous + * and it has reached minimal replication or + * if the current block is striped and the number of its actual data blocks + * reaches the number of data units specified by the erasure coding policy. * If safe mode is not currently on, this is a no-op. * @param storageNum current number of replicas or number of internal blocks * of a striped block group @@ -433,9 +435,9 @@ synchronized void incrementSafeBlockCount(int storageNum, return; } - final int safe = storedBlock.isStriped() ? + final int safeNumberOfNodes = storedBlock.isStriped() ? ((BlockInfoStriped)storedBlock).getRealDataBlockNum() : safeReplication; - if (storageNum == safe) { + if (storageNum == safeNumberOfNodes) { this.blockSafe++; // Report startup progress only if we haven't completed startup yet. @@ -453,8 +455,10 @@ synchronized void incrementSafeBlockCount(int storageNum, } /** - * Decrement number of safe blocks if current block has fallen below minimal - * replication. + * Decrement number of safe blocks if the current block is contiguous + * and it has just fallen below minimal replication or + * if the current block is striped and its actual data blocks has just fallen + * below the number of data units specified by erasure coding policy. * If safe mode is not currently on, this is a no-op. */ synchronized void decrementSafeBlockCount(BlockInfo b) { @@ -463,9 +467,11 @@ synchronized void decrementSafeBlockCount(BlockInfo b) { return; } + final int safeNumberOfNodes = b.isStriped() ? + ((BlockInfoStriped)b).getRealDataBlockNum() : safeReplication; BlockInfo storedBlock = blockManager.getStoredBlock(b); if (storedBlock.isComplete() && - blockManager.countNodes(b).liveReplicas() == safeReplication - 1) { + blockManager.countNodes(b).liveReplicas() == safeNumberOfNodes - 1) { this.blockSafe--; assert blockSafe >= 0; checkSafeMode(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index c94232fbcb1..d396845d488 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -72,11 +72,11 @@ protected StringBuilder initialValue() { .withInitial(() -> new HashMap()); private enum NodeNotChosenReason { - NOT_IN_SERVICE("the node isn't in service"), + NOT_IN_SERVICE("the node is not in service"), NODE_STALE("the node is stale"), NODE_TOO_BUSY("the node is too busy"), TOO_MANY_NODES_ON_RACK("the rack has too many chosen nodes"), - NOT_ENOUGH_STORAGE_SPACE("no enough storage space to place the block"); + NOT_ENOUGH_STORAGE_SPACE("not enough storage space to place the block"); private final String text; @@ -280,7 +280,9 @@ protected void chooseFavouredNodes(String src, int numOfReplicas, if (avoidLocalNode) { results = new ArrayList<>(chosenStorage); Set excludedNodeCopy = new HashSet<>(excludedNodes); - excludedNodeCopy.add(writer); + if (writer != null) { + excludedNodeCopy.add(writer); + } localNode = chooseTarget(numOfReplicas, writer, excludedNodeCopy, blocksize, maxNodesPerRack, results, avoidStaleNodes, storagePolicy, @@ -910,6 +912,24 @@ private static void logNodeIsNotChosen(DatanodeDescriptor node, reasonMap.put(reason, base + 1); } + /** + * Determine if a datanode should be chosen based on current workload. + * + * @param node The target datanode + * @return Return true if the datanode should be excluded, otherwise false + */ + boolean excludeNodeByLoad(DatanodeDescriptor node){ + final double maxLoad = considerLoadFactor * + stats.getInServiceXceiverAverage(); + final int nodeLoad = node.getXceiverCount(); + if ((nodeLoad > maxLoad) && (maxLoad > 0)) { + logNodeIsNotChosen(node, NodeNotChosenReason.NODE_TOO_BUSY, + "(load: " + nodeLoad + " > " + maxLoad + ")"); + return true; + } + return false; + } + /** * Determine if a datanode is good for placing block. * @@ -921,7 +941,7 @@ private static void logNodeIsNotChosen(DatanodeDescriptor node, * @param results A list containing currently chosen nodes. Used to check if * too many nodes has been chosen in the target rack. * @param avoidStaleNodes Whether or not to avoid choosing stale nodes - * @return Reture true if the datanode is good candidate, otherwise false + * @return Return true if the datanode is good candidate, otherwise false */ boolean isGoodDatanode(DatanodeDescriptor node, int maxTargetPerRack, boolean considerLoad, @@ -941,13 +961,8 @@ boolean isGoodDatanode(DatanodeDescriptor node, } // check the communication traffic of the target machine - if (considerLoad) { - final double maxLoad = considerLoadFactor * - stats.getInServiceXceiverAverage(); - final int nodeLoad = node.getXceiverCount(); - if (nodeLoad > maxLoad) { - logNodeIsNotChosen(node, NodeNotChosenReason.NODE_TOO_BUSY, - "(load: " + nodeLoad + " > " + maxLoad + ")"); + if(considerLoad){ + if(excludeNodeByLoad(node)){ return false; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index 16ffb4346d5..12b5c33f6c7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -1066,4 +1066,3 @@ public boolean hasStorageType(StorageType type) { return false; } } - diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 9ebc693a235..430c0d44a7f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -24,17 +24,19 @@ import com.google.common.base.Preconditions; import com.google.common.net.InetAddresses; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.net.DFSNetworkTopology; import org.apache.hadoop.hdfs.protocol.*; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.BlockTargetPair; @@ -71,7 +73,7 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class DatanodeManager { - static final Log LOG = LogFactory.getLog(DatanodeManager.class); + static final Logger LOG = LoggerFactory.getLogger(DatanodeManager.class); private final Namesystem namesystem; private final BlockManager blockManager; @@ -1540,7 +1542,7 @@ private void setDatanodeDead(DatanodeDescriptor node) { } private BlockRecoveryCommand getBlockRecoveryCommand(String blockPoolId, - DatanodeDescriptor nodeinfo) { + DatanodeDescriptor nodeinfo) throws IOException { BlockInfo[] blocks = nodeinfo.getLeaseRecoveryCommand(Integer.MAX_VALUE); if (blocks == null) { return null; @@ -1548,7 +1550,10 @@ private BlockRecoveryCommand getBlockRecoveryCommand(String blockPoolId, BlockRecoveryCommand brCommand = new BlockRecoveryCommand(blocks.length); for (BlockInfo b : blocks) { BlockUnderConstructionFeature uc = b.getUnderConstructionFeature(); - assert uc != null; + if(uc == null) { + throw new IOException("Recovery block " + b + + "where it is not under construction."); + } final DatanodeStorageInfo[] storages = uc.getExpectedStorageLocations(); // Skip stale nodes during recovery final List recoveryLocations = @@ -1676,7 +1681,6 @@ private void addCacheCommands(String blockPoolId, DatanodeDescriptor nodeinfo, (double) (totalReplicateBlocks * maxTransfers) / totalBlocks); int numECTasks = (int) Math.ceil( (double) (totalECBlocks * maxTransfers) / totalBlocks); - if (LOG.isDebugEnabled()) { LOG.debug("Pending replication tasks: " + numReplicationTasks + " erasure-coded tasks: " + numECTasks); @@ -1966,5 +1970,26 @@ public String getSlowDisksReport() { return slowDiskTracker != null ? slowDiskTracker.getSlowDiskReportAsJsonString() : null; } + + /** + * Generates datanode reports for the given report type. + * + * @param type + * type of the datanode report + * @return array of DatanodeStorageReports + */ + public DatanodeStorageReport[] getDatanodeStorageReport( + DatanodeReportType type) { + final List datanodes = getDatanodeListForReport(type); + + DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes + .size()]; + for (int i = 0; i < reports.length; i++) { + final DatanodeDescriptor d = datanodes.get(i); + reports[i] = new DatanodeStorageReport( + new DatanodeInfoBuilder().setFrom(d).build(), d.getStorageReports()); + } + return reports; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java index 59f907fe085..b7bf6747b16 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -50,7 +50,8 @@ * resolutions are only done during the loading time to minimize the latency. */ public class HostFileManager extends HostConfigManager { - private static final Log LOG = LogFactory.getLog(HostFileManager.class); + private static final Logger LOG = + LoggerFactory.getLogger(HostFileManager.class); private Configuration conf; private HostSet includes = new HostSet(); private HostSet excludes = new HostSet(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java index e3f228d2947..40ea98053fa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java @@ -92,6 +92,10 @@ private final LongAdder corruptReplicationOneBlocks = new LongAdder(); private final LongAdder lowRedundancyECBlockGroups = new LongAdder(); private final LongAdder corruptECBlockGroups = new LongAdder(); + private final LongAdder highestPriorityLowRedundancyReplicatedBlocks + = new LongAdder(); + private final LongAdder highestPriorityLowRedundancyECBlocks + = new LongAdder(); /** Create an object. */ LowRedundancyBlocks() { @@ -162,6 +166,18 @@ long getCorruptReplicationOneBlocks() { return corruptReplicationOneBlocks.longValue(); } + /** Return the number of under replicated blocks + * with the highest priority to recover */ + long getHighestPriorityReplicatedBlockCount() { + return highestPriorityLowRedundancyReplicatedBlocks.longValue(); + } + + /** Return the number of under replicated EC blocks + * with the highest priority to recover */ + long getHighestPriorityECBlockCount() { + return highestPriorityLowRedundancyECBlocks.longValue(); + } + /** * Return low redundancy striped blocks excluding corrupt blocks. */ @@ -300,6 +316,9 @@ private void incrementBlockStat(BlockInfo blockInfo, int priLevel, if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS) { corruptECBlockGroups.increment(); } + if (priLevel == QUEUE_HIGHEST_PRIORITY) { + highestPriorityLowRedundancyECBlocks.increment(); + } } else { lowRedundancyBlocks.increment(); if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS) { @@ -308,6 +327,9 @@ private void incrementBlockStat(BlockInfo blockInfo, int priLevel, corruptReplicationOneBlocks.increment(); } } + if (priLevel == QUEUE_HIGHEST_PRIORITY) { + highestPriorityLowRedundancyReplicatedBlocks.increment(); + } } } @@ -380,6 +402,9 @@ private void decrementBlockStat(BlockInfo blockInfo, int priLevel, if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS) { corruptECBlockGroups.decrement(); } + if (priLevel == QUEUE_HIGHEST_PRIORITY) { + highestPriorityLowRedundancyECBlocks.decrement(); + } } else { lowRedundancyBlocks.decrement(); if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS) { @@ -391,6 +416,9 @@ private void decrementBlockStat(BlockInfo blockInfo, int priLevel, "should be non-negative"; } } + if (priLevel == QUEUE_HIGHEST_PRIORITY) { + highestPriorityLowRedundancyReplicatedBlocks.decrement(); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java index c3098f3cff0..c6e2263af92 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java @@ -24,6 +24,7 @@ import java.util.regex.Pattern; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.datanode.DataNodeLayoutVersion; @@ -364,6 +365,10 @@ public void write(DataOutput out) throws IOException { String XATTR_ERASURECODING_POLICY = "system.hdfs.erasurecoding.policy"; + String XATTR_SATISFY_STORAGE_POLICY = "user.hdfs.sps"; + + Path MOVER_ID_PATH = new Path("/system/mover.id"); + long BLOCK_GROUP_INDEX_MASK = 15; byte MAX_BLOCKS_IN_GROUP = 16; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java index 637c679b9ff..498a09351c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.server.common; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; @@ -53,7 +53,7 @@ public static final String CURRENT_CONF = "current.conf"; public static final String DELEGATION_PARAMETER_NAME = DelegationParam.NAME; public static final String NAMENODE_ADDRESS = "nnaddr"; - private static final Log LOG = LogFactory.getLog(JspHelper.class); + private static final Logger LOG = LoggerFactory.getLogger(JspHelper.class); /** Private constructor for preventing creating JspHelper object. */ private JspHelper() {} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/MetricsLoggerTask.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/MetricsLoggerTask.java index 40c048c5a3e..051e2d2c525 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/MetricsLoggerTask.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/MetricsLoggerTask.java @@ -32,8 +32,9 @@ import javax.management.ObjectName; import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.metrics2.util.MBeans; import org.apache.log4j.Appender; import org.apache.log4j.AsyncAppender; @@ -43,7 +44,8 @@ */ public class MetricsLoggerTask implements Runnable { - public static final Log LOG = LogFactory.getLog(MetricsLoggerTask.class); + public static final Logger LOG = + LoggerFactory.getLogger(MetricsLoggerTask.class); private static ObjectName objectName = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java index 5dee16a9294..4e30e50414e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java @@ -38,8 +38,8 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -57,7 +57,8 @@ @InterfaceAudience.Private public final class Util { - private final static Log LOG = LogFactory.getLog(Util.class.getName()); + private final static Logger LOG = + LoggerFactory.getLogger(Util.class.getName()); public final static String FILE_LENGTH = "File-Length"; public final static String CONTENT_LENGTH = "Content-Length"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockDispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockDispatcher.java new file mode 100644 index 00000000000..f87fcaef054 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockDispatcher.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.common.sps; + +import static org.apache.hadoop.hdfs.protocolPB.PBHelperClient.vintPrefixed; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.Socket; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.datatransfer.BlockPinningException; +import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil; +import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair; +import org.apache.hadoop.hdfs.protocol.datatransfer.Sender; +import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataEncryptionKeyFactory; +import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.SaslDataTransferClient; +import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status; +import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; +import org.apache.hadoop.hdfs.server.protocol.BlockStorageMovementCommand.BlockMovingInfo; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.token.Token; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Dispatching block replica moves between datanodes to satisfy the storage + * policy. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class BlockDispatcher { + private static final Logger LOG = LoggerFactory + .getLogger(BlockDispatcher.class); + + private final boolean connectToDnViaHostname; + private final int socketTimeout; + private final int ioFileBufferSize; + + /** + * Construct block dispatcher details. + * + * @param sockTimeout + * soTimeout + * @param ioFileBuffSize + * file io buffer size + * @param connectToDatanodeViaHostname + * true represents connect via hostname, false otw + */ + public BlockDispatcher(int sockTimeout, int ioFileBuffSize, + boolean connectToDatanodeViaHostname) { + this.socketTimeout = sockTimeout; + this.ioFileBufferSize = ioFileBuffSize; + this.connectToDnViaHostname = connectToDatanodeViaHostname; + } + + /** + * Moves the given block replica to the given target node and wait for the + * response. + * + * @param blkMovingInfo + * block to storage info + * @param saslClient + * SASL for DataTransferProtocol on behalf of a client + * @param eb + * extended block info + * @param sock + * target node's socket + * @param km + * for creation of an encryption key + * @param accessToken + * connection block access token + * @return status of the block movement + */ + public BlockMovementStatus moveBlock(BlockMovingInfo blkMovingInfo, + SaslDataTransferClient saslClient, ExtendedBlock eb, Socket sock, + DataEncryptionKeyFactory km, Token accessToken) { + LOG.info("Start moving block:{} from src:{} to destin:{} to satisfy " + + "storageType, sourceStoragetype:{} and destinStoragetype:{}", + blkMovingInfo.getBlock(), blkMovingInfo.getSource(), + blkMovingInfo.getTarget(), blkMovingInfo.getSourceStorageType(), + blkMovingInfo.getTargetStorageType()); + DataOutputStream out = null; + DataInputStream in = null; + try { + NetUtils.connect(sock, + NetUtils.createSocketAddr( + blkMovingInfo.getTarget().getXferAddr(connectToDnViaHostname)), + socketTimeout); + // Set read timeout so that it doesn't hang forever against + // unresponsive nodes. Datanode normally sends IN_PROGRESS response + // twice within the client read timeout period (every 30 seconds by + // default). Here, we make it give up after "socketTimeout * 5" period + // of no response. + sock.setSoTimeout(socketTimeout * 5); + sock.setKeepAlive(true); + OutputStream unbufOut = sock.getOutputStream(); + InputStream unbufIn = sock.getInputStream(); + LOG.debug("Connecting to datanode {}", blkMovingInfo.getTarget()); + + IOStreamPair saslStreams = saslClient.socketSend(sock, unbufOut, + unbufIn, km, accessToken, blkMovingInfo.getTarget()); + unbufOut = saslStreams.out; + unbufIn = saslStreams.in; + out = new DataOutputStream( + new BufferedOutputStream(unbufOut, ioFileBufferSize)); + in = new DataInputStream( + new BufferedInputStream(unbufIn, ioFileBufferSize)); + sendRequest(out, eb, accessToken, blkMovingInfo.getSource(), + blkMovingInfo.getTargetStorageType()); + receiveResponse(in); + + LOG.info( + "Successfully moved block:{} from src:{} to destin:{} for" + + " satisfying storageType:{}", + blkMovingInfo.getBlock(), blkMovingInfo.getSource(), + blkMovingInfo.getTarget(), blkMovingInfo.getTargetStorageType()); + return BlockMovementStatus.DN_BLK_STORAGE_MOVEMENT_SUCCESS; + } catch (BlockPinningException e) { + // Pinned block won't be able to move to a different node. So, its not + // required to do retries, just marked as SUCCESS. + LOG.debug("Pinned block can't be moved, so skipping block:{}", + blkMovingInfo.getBlock(), e); + return BlockMovementStatus.DN_BLK_STORAGE_MOVEMENT_SUCCESS; + } catch (IOException e) { + // TODO: handle failure retries + LOG.warn( + "Failed to move block:{} from src:{} to destin:{} to satisfy " + + "storageType:{}", + blkMovingInfo.getBlock(), blkMovingInfo.getSource(), + blkMovingInfo.getTarget(), blkMovingInfo.getTargetStorageType(), e); + return BlockMovementStatus.DN_BLK_STORAGE_MOVEMENT_FAILURE; + } finally { + IOUtils.closeStream(out); + IOUtils.closeStream(in); + IOUtils.closeSocket(sock); + } + } + + /** Send a reportedBlock replace request to the output stream. */ + private static void sendRequest(DataOutputStream out, ExtendedBlock eb, + Token accessToken, DatanodeInfo source, + StorageType targetStorageType) throws IOException { + new Sender(out).replaceBlock(eb, targetStorageType, accessToken, + source.getDatanodeUuid(), source, null); + } + + /** Receive a reportedBlock copy response from the input stream. */ + private static void receiveResponse(DataInputStream in) throws IOException { + BlockOpResponseProto response = BlockOpResponseProto + .parseFrom(vintPrefixed(in)); + while (response.getStatus() == Status.IN_PROGRESS) { + // read intermediate responses + response = BlockOpResponseProto.parseFrom(vintPrefixed(in)); + } + String logInfo = "reportedBlock move is failed"; + DataTransferProtoUtil.checkBlockOpStatus(response, logInfo); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockMovementAttemptFinished.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockMovementAttemptFinished.java new file mode 100644 index 00000000000..29c5e9ce881 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockMovementAttemptFinished.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.common.sps; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; + +/** + * This class represents status from a block movement task. This will have the + * information of the task which was successful or failed due to errors. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class BlockMovementAttemptFinished { + private final Block block; + private final DatanodeInfo src; + private final DatanodeInfo target; + private final StorageType targetType; + private final BlockMovementStatus status; + + /** + * Construct movement attempt finished info. + * + * @param block + * block + * @param src + * src datanode + * @param target + * target datanode + * @param targetType + * target storage type + * @param status + * movement status + */ + public BlockMovementAttemptFinished(Block block, DatanodeInfo src, + DatanodeInfo target, StorageType targetType, BlockMovementStatus status) { + this.block = block; + this.src = src; + this.target = target; + this.targetType = targetType; + this.status = status; + } + + /** + * @return details of the block, which attempted to move from src to target + * node. + */ + public Block getBlock() { + return block; + } + + /** + * @return the target datanode where it moved the block. + */ + public DatanodeInfo getTargetDatanode() { + return target; + } + + /** + * @return target storage type. + */ + public StorageType getTargetType() { + return targetType; + } + + /** + * @return block movement status code. + */ + public BlockMovementStatus getStatus() { + return status; + } + + @Override + public String toString() { + return new StringBuilder().append("Block movement attempt finished(\n ") + .append(" block : ").append(block).append(" src node: ").append(src) + .append(" target node: ").append(target).append(" target type: ") + .append(targetType).append(" movement status: ") + .append(status).append(")").toString(); + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockMovementStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockMovementStatus.java new file mode 100644 index 00000000000..f70d84f73c4 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockMovementStatus.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.common.sps; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Block movement status code. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public enum BlockMovementStatus { + /** Success. */ + DN_BLK_STORAGE_MOVEMENT_SUCCESS(0), + /** + * Failure due to generation time stamp mismatches or network errors + * or no available space. + */ + DN_BLK_STORAGE_MOVEMENT_FAILURE(-1); + + // TODO: need to support different type of failures. Failure due to network + // errors, block pinned, no space available etc. + + private final int code; + + BlockMovementStatus(int code) { + this.code = code; + } + + /** + * @return the status code. + */ + int getStatusCode() { + return code; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockStorageMovementTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockStorageMovementTracker.java new file mode 100644 index 00000000000..4ee415e0b08 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlockStorageMovementTracker.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.common.sps; + +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is used to track the completion of block movement future tasks. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class BlockStorageMovementTracker implements Runnable { + private static final Logger LOG = LoggerFactory + .getLogger(BlockStorageMovementTracker.class); + private final CompletionService + moverCompletionService; + private final BlocksMovementsStatusHandler blksMovementsStatusHandler; + + private volatile boolean running = true; + + /** + * BlockStorageMovementTracker constructor. + * + * @param moverCompletionService + * completion service. + * @param handler + * blocks movements status handler + */ + public BlockStorageMovementTracker( + CompletionService moverCompletionService, + BlocksMovementsStatusHandler handler) { + this.moverCompletionService = moverCompletionService; + this.blksMovementsStatusHandler = handler; + } + + @Override + public void run() { + while (running) { + try { + Future future = moverCompletionService + .take(); + if (future != null) { + BlockMovementAttemptFinished result = future.get(); + LOG.debug("Completed block movement. {}", result); + if (running && blksMovementsStatusHandler != null) { + // handle completed block movement. + blksMovementsStatusHandler.handle(result); + } + } + } catch (InterruptedException e) { + if (running) { + LOG.error("Exception while moving block replica to target storage" + + " type", e); + } + } catch (ExecutionException e) { + // TODO: Do we need failure retries and implement the same if required. + LOG.error("Exception while moving block replica to target storage type", + e); + } + } + } + + /** + * Sets running flag to false. + */ + public void stopTracking() { + running = false; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlocksMovementsStatusHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlocksMovementsStatusHandler.java new file mode 100644 index 00000000000..ab67424fc1c --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/BlocksMovementsStatusHandler.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.common.sps; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Blocks movements status handler, which can be used to collect details of the + * completed block movements. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface BlocksMovementsStatusHandler { + + /** + * Collect all the storage movement attempt finished blocks. + * + * @param moveAttemptFinishedBlk + * storage movement attempt finished block + */ + void handle(BlockMovementAttemptFinished moveAttemptFinishedBlk); +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/package-info.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/package-info.java new file mode 100644 index 00000000000..fcffbe952d8 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/sps/package-info.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package provides commonly used classes for the block movement. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.hdfs.server.common.sps; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index a94d2df4315..8f7a1861d24 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -511,6 +511,7 @@ HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) outliersReportDue && dn.getDiskMetrics() != null ? SlowDiskReports.create(dn.getDiskMetrics().getDiskOutliersStats()) : SlowDiskReports.EMPTY_REPORT; + HeartbeatResponse response = bpNamenode.sendHeartbeat(bpRegistration, reports, dn.getFSDataset().getCacheCapacity(), @@ -527,6 +528,7 @@ HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) // If the report was due and successfully sent, schedule the next one. scheduler.scheduleNextOutlierReport(); } + return response; } @@ -682,7 +684,8 @@ private void offerService() throws Exception { } } } - if (ibrManager.sendImmediately() || sendHeartbeat) { + if (!dn.areIBRDisabledForTests() && + (ibrManager.sendImmediately()|| sendHeartbeat)) { ibrManager.sendIBRs(bpNamenode, bpRegistration, bpos.getBlockPoolId()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index 94835e22081..34f6c33003f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -275,7 +275,9 @@ void syncBlock(List syncList) throws IOException { } // recover() guarantees syncList will have at least one replica with RWR // or better state. - assert minLength != Long.MAX_VALUE : "wrong minLength"; + if (minLength == Long.MAX_VALUE) { + throw new IOException("Incorrect block size"); + } newBlock.setNumBytes(minLength); break; case RUR: diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index 268007f0532..bff47fa6ba6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -682,16 +682,17 @@ private void readChecksum(byte[] buf, final int checksumOffset, + " at offset " + offset + " for block " + block, e); ris.closeChecksumStream(); if (corruptChecksumOk) { - if (checksumOffset < checksumLen) { + if (checksumLen > 0) { // Just fill the array with zeros. - Arrays.fill(buf, checksumOffset, checksumLen, (byte) 0); + Arrays.fill(buf, checksumOffset, checksumOffset + checksumLen, + (byte) 0); } } else { throw e; } } } - + /** * Compute checksum for chunks and verify the checksum that is read from * the metadata file is correct. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 7df92f6083c..c9803958ad1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -293,6 +293,8 @@ " and rolling upgrades."; static final int CURRENT_BLOCK_FORMAT_VERSION = 1; + public static final int MAX_VOLUME_FAILURE_TOLERATED_LIMIT = -1; + public static final String MAX_VOLUME_FAILURES_TOLERATED_MSG = "should be greater than -1"; /** A list of property that are reconfigurable at runtime. */ private static final List RECONFIGURABLE_PROPERTIES = @@ -329,6 +331,7 @@ public static InetSocketAddress createSocketAddr(String target) { ThreadGroup threadGroup = null; private DNConf dnConf; private volatile boolean heartbeatsDisabledForTests = false; + private volatile boolean ibrDisabledForTests = false; private volatile boolean cacheReportsDisabledForTests = false; private DataStorage storage = null; @@ -1332,6 +1335,15 @@ boolean areHeartbeatsDisabledForTests() { } @VisibleForTesting + void setIBRDisabledForTest(boolean disabled) { + this.ibrDisabledForTests = disabled; + } + + @VisibleForTesting + boolean areIBRDisabledForTests() { + return this.ibrDisabledForTests; + } + void setCacheReportsDisabledForTest(boolean disabled) { this.cacheReportsDisabledForTests = disabled; } @@ -1389,10 +1401,11 @@ void startDataNode(List dataDirectories, int volFailuresTolerated = dnConf.getVolFailuresTolerated(); int volsConfigured = dnConf.getVolsConfigured(); - if (volFailuresTolerated < 0 || volFailuresTolerated >= volsConfigured) { + if (volFailuresTolerated < MAX_VOLUME_FAILURE_TOLERATED_LIMIT + || volFailuresTolerated >= volsConfigured) { throw new DiskErrorException("Invalid value configured for " + "dfs.datanode.failed.volumes.tolerated - " + volFailuresTolerated - + ". Value configured is either less than 0 or >= " + + ". Value configured is either greater than -1 or >= " + "to the number of configured volumes (" + volsConfigured + ")."); } @@ -1973,7 +1986,7 @@ public void shutdown() { } } } - + List bposArray = (this.blockPoolManager == null) ? new ArrayList() : this.blockPoolManager.getAllNamenodeThreads(); @@ -3139,6 +3152,11 @@ public String getDiskBalancerStatus() { } } + @Override + public boolean isSecurityEnabled() { + return UserGroupInformation.isSecurityEnabled(); + } + public void refreshNamenodes(Configuration conf) throws IOException { blockPoolManager.refreshNamenodes(conf); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java index b5f0cd0a5d3..9d11e148847 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java @@ -146,4 +146,11 @@ * @return list of slow disks */ String getSlowDisks(); + + /** + * Gets if security is enabled. + * + * @return true, if security is enabled. + */ + boolean isSecurityEnabled(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java index 39665e3e954..99584d9f3c3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java @@ -39,8 +39,8 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.lang3.time.FastDateFormat; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.AutoCloseableLock; @@ -60,7 +60,8 @@ */ @InterfaceAudience.Private public class DirectoryScanner implements Runnable { - private static final Log LOG = LogFactory.getLog(DirectoryScanner.class); + private static final Logger LOG = + LoggerFactory.getLogger(DirectoryScanner.class); private static final int MILLIS_PER_SECOND = 1000; private static final String START_MESSAGE = "Periodic Directory Tree Verification scan" @@ -450,7 +451,7 @@ private void scan() { if (d < blockpoolReport.length) { // There may be multiple on-disk records for the same block, don't increment // the memory record pointer if so. - ScanInfo nextInfo = blockpoolReport[Math.min(d, blockpoolReport.length - 1)]; + ScanInfo nextInfo = blockpoolReport[d]; if (nextInfo.getBlockId() != info.getBlockId()) { ++m; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProfilingFileIoEvents.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProfilingFileIoEvents.java index 83ee5f67530..2da3b1e8f98 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProfilingFileIoEvents.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ProfilingFileIoEvents.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.server.datanode; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -37,7 +37,8 @@ */ @InterfaceAudience.Private class ProfilingFileIoEvents { - static final Log LOG = LogFactory.getLog(ProfilingFileIoEvents.class); + static final Logger LOG = + LoggerFactory.getLogger(ProfilingFileIoEvents.class); private final boolean isEnabled; private final int sampleRangeMax; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ShortCircuitRegistry.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ShortCircuitRegistry.java index ea9e72ce443..3df83cfbad4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ShortCircuitRegistry.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ShortCircuitRegistry.java @@ -32,8 +32,8 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.hdfs.ExtendedBlockId; @@ -81,7 +81,8 @@ * The counterpart of this class on the client is {@link DfsClientShmManager}. */ public class ShortCircuitRegistry { - public static final Log LOG = LogFactory.getLog(ShortCircuitRegistry.class); + public static final Logger LOG = + LoggerFactory.getLogger(ShortCircuitRegistry.class); private static final int SHM_LENGTH = 8192; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java index 3889e2317c2..30602c0f29c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/DatasetVolumeChecker.java @@ -28,6 +28,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; @@ -153,10 +154,11 @@ public DatasetVolumeChecker(Configuration conf, Timer timer) lastAllVolumesCheck = timer.monotonicNow() - minDiskCheckGapMs; - if (maxVolumeFailuresTolerated < 0) { + if (maxVolumeFailuresTolerated < DataNode.MAX_VOLUME_FAILURE_TOLERATED_LIMIT) { throw new DiskErrorException("Invalid value configured for " + DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY + " - " - + maxVolumeFailuresTolerated + " (should be non-negative)"); + + maxVolumeFailuresTolerated + " " + + DataNode.MAX_VOLUME_FAILURES_TOLERATED_MSG); } delegateChecker = new ThrottledAsyncChecker<>( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/StorageLocationChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/StorageLocationChecker.java index 81575e2e9d9..dabaa832141 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/StorageLocationChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/StorageLocationChecker.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdfs.server.datanode.StorageLocation.CheckContext; import org.apache.hadoop.util.DiskChecker.DiskErrorException; @@ -105,10 +106,11 @@ public StorageLocationChecker(Configuration conf, Timer timer) DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, DFS_DATANODE_FAILED_VOLUMES_TOLERATED_DEFAULT); - if (maxVolumeFailuresTolerated < 0) { + if (maxVolumeFailuresTolerated < DataNode.MAX_VOLUME_FAILURE_TOLERATED_LIMIT) { throw new DiskErrorException("Invalid value configured for " + DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY + " - " - + maxVolumeFailuresTolerated + " (should be non-negative)"); + + maxVolumeFailuresTolerated + " " + + DataNode.MAX_VOLUME_FAILURES_TOLERATED_MSG); } this.timer = timer; @@ -213,12 +215,22 @@ public StorageLocationChecker(Configuration conf, Timer timer) } } - if (failedLocations.size() > maxVolumeFailuresTolerated) { - throw new DiskErrorException("Too many failed volumes - " - + "current valid volumes: " + goodLocations.size() - + ", volumes configured: " + dataDirs.size() - + ", volumes failed: " + failedLocations.size() - + ", volume failures tolerated: " + maxVolumeFailuresTolerated); + if (maxVolumeFailuresTolerated == DataNode.MAX_VOLUME_FAILURE_TOLERATED_LIMIT) { + if (dataDirs.size() == failedLocations.size()) { + throw new DiskErrorException("Too many failed volumes - " + + "current valid volumes: " + goodLocations.size() + + ", volumes configured: " + dataDirs.size() + + ", volumes failed: " + failedLocations.size() + + ", volume failures tolerated: " + maxVolumeFailuresTolerated); + } + } else { + if (failedLocations.size() > maxVolumeFailuresTolerated) { + throw new DiskErrorException("Too many failed volumes - " + + "current valid volumes: " + goodLocations.size() + + ", volumes configured: " + dataDirs.size() + + ", volumes failed: " + failedLocations.size() + + ", volume failures tolerated: " + maxVolumeFailuresTolerated); + } } if (goodLocations.size() == 0) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/AvailableSpaceVolumeChoosingPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/AvailableSpaceVolumeChoosingPolicy.java index efe222f6ed7..67a66fd0ff2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/AvailableSpaceVolumeChoosingPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/AvailableSpaceVolumeChoosingPolicy.java @@ -27,8 +27,8 @@ import java.util.List; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; @@ -46,7 +46,8 @@ public class AvailableSpaceVolumeChoosingPolicy implements VolumeChoosingPolicy, Configurable { - private static final Log LOG = LogFactory.getLog(AvailableSpaceVolumeChoosingPolicy.class); + private static final Logger LOG = + LoggerFactory.getLogger(AvailableSpaceVolumeChoosingPolicy.class); private Object[] syncLocks; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/RoundRobinVolumeChoosingPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/RoundRobinVolumeChoosingPolicy.java index b9bcf1ff270..2d924c02362 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/RoundRobinVolumeChoosingPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/RoundRobinVolumeChoosingPolicy.java @@ -20,8 +20,8 @@ import java.io.IOException; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; @@ -31,7 +31,8 @@ */ public class RoundRobinVolumeChoosingPolicy implements VolumeChoosingPolicy { - public static final Log LOG = LogFactory.getLog(RoundRobinVolumeChoosingPolicy.class); + public static final Logger LOG = + LoggerFactory.getLogger(RoundRobinVolumeChoosingPolicy.class); // curVolumes stores the RR counters of each storage type. // The ordinal of storage type in org.apache.hadoop.fs.StorageType diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/VolumeChoosingPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/VolumeChoosingPolicy.java index 8cbc0587b02..e9fa37bdea4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/VolumeChoosingPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/VolumeChoosingPolicy.java @@ -32,7 +32,7 @@ * Choose a volume to place a replica, * given a list of volumes and the replica size sought for storage. * - * The implementations of this interface must be thread-safe. + * The caller should synchronize access to the list of volumes. * * @param volumes - a list of available volumes. * @param replicaSize - the size of the replica for which a volume is sought. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java index 3f9de78d1cc..2adfb6bb52b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java @@ -32,8 +32,8 @@ import java.util.Scanner; import java.util.concurrent.atomic.AtomicLong; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CachingGetSpaceUsed; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -71,7 +71,7 @@ * This class is synchronized by {@link FsVolumeImpl}. */ class BlockPoolSlice { - static final Log LOG = LogFactory.getLog(BlockPoolSlice.class); + static final Logger LOG = LoggerFactory.getLogger(BlockPoolSlice.class); private final String bpid; private final FsVolumeImpl volume; // volume to which this BlockPool belongs to @@ -764,7 +764,7 @@ void shutdown(BlockListAsLongs blocksListToPersist) { } if (dfsUsage instanceof CachingGetSpaceUsed) { - IOUtils.cleanup(LOG, ((CachingGetSpaceUsed) dfsUsage)); + IOUtils.cleanupWithLogger(LOG, ((CachingGetSpaceUsed) dfsUsage)); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetAsyncDiskService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetAsyncDiskService.java index 4929b5e94ee..81213a033f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetAsyncDiskService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetAsyncDiskService.java @@ -30,8 +30,8 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DatanodeUtil; @@ -59,7 +59,8 @@ * They should be combined. */ class FsDatasetAsyncDiskService { - public static final Log LOG = LogFactory.getLog(FsDatasetAsyncDiskService.class); + public static final Logger LOG = + LoggerFactory.getLogger(FsDatasetAsyncDiskService.class); // ThreadPool core pool size private static final int CORE_THREADS_PER_VOLUME = 1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 89c278a34c4..27196c2772f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -237,6 +237,7 @@ public LengthInputStream getMetaDataInputStream(ExtendedBlock b) final FsDatasetCache cacheManager; private final Configuration conf; private final int volFailuresTolerated; + private final int volsConfigured; private volatile boolean fsRunning; final ReplicaMap volumeMap; @@ -285,15 +286,32 @@ public LengthInputStream getMetaDataInputStream(ExtendedBlock b) List volumeFailureInfos = getInitialVolumeFailureInfos( dataLocations, storage); - int volsConfigured = datanode.getDnConf().getVolsConfigured(); + volsConfigured = datanode.getDnConf().getVolsConfigured(); int volsFailed = volumeFailureInfos.size(); - if (volsFailed > volFailuresTolerated) { - throw new DiskErrorException("Too many failed volumes - " - + "current valid volumes: " + storage.getNumStorageDirs() - + ", volumes configured: " + volsConfigured - + ", volumes failed: " + volsFailed - + ", volume failures tolerated: " + volFailuresTolerated); + if (volFailuresTolerated < DataNode.MAX_VOLUME_FAILURE_TOLERATED_LIMIT + || volFailuresTolerated >= volsConfigured) { + throw new DiskErrorException("Invalid value configured for " + + "dfs.datanode.failed.volumes.tolerated - " + volFailuresTolerated + + ". Value configured is either less than maxVolumeFailureLimit or greater than " + + "to the number of configured volumes (" + volsConfigured + ")."); + } + if (volFailuresTolerated == DataNode.MAX_VOLUME_FAILURE_TOLERATED_LIMIT) { + if (volsConfigured == volsFailed) { + throw new DiskErrorException( + "Too many failed volumes - " + "current valid volumes: " + + storage.getNumStorageDirs() + ", volumes configured: " + + volsConfigured + ", volumes failed: " + volsFailed + + ", volume failures tolerated: " + volFailuresTolerated); + } + } else { + if (volsFailed > volFailuresTolerated) { + throw new DiskErrorException( + "Too many failed volumes - " + "current valid volumes: " + + storage.getNumStorageDirs() + ", volumes configured: " + + volsConfigured + ", volumes failed: " + volsFailed + + ", volume failures tolerated: " + volFailuresTolerated); + } } storageMap = new ConcurrentHashMap(); @@ -597,7 +615,13 @@ public long getBlockPoolUsed(String bpid) throws IOException { */ @Override // FsDatasetSpi public boolean hasEnoughResource() { - return getNumFailedVolumes() <= volFailuresTolerated; + if (volFailuresTolerated == DataNode.MAX_VOLUME_FAILURE_TOLERATED_LIMIT) { + // If volFailuresTolerated configured maxVolumeFailureLimit then minimum + // one volume is required. + return volumes.getVolumes().size() >= 1; + } else { + return getNumFailedVolumes() <= volFailuresTolerated; + } } /** @@ -1373,6 +1397,9 @@ public ReplicaHandler createRbw( datanode.getMetrics().incrRamDiskBlocksWrite(); } catch (DiskOutOfSpaceException de) { // Ignore the exception since we just fall back to persistent storage. + LOG.warn("Insufficient space for placing the block on a transient " + + "volume, fall back to persistent storage: " + + de.getMessage()); } finally { if (ref == null) { cacheManager.release(b.getNumBytes()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java index d6969c42e6d..a77faf2cec8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -45,7 +45,8 @@ * They should be combined. */ class RamDiskAsyncLazyPersistService { - public static final Log LOG = LogFactory.getLog(RamDiskAsyncLazyPersistService.class); + public static final Logger LOG = + LoggerFactory.getLogger(RamDiskAsyncLazyPersistService.class); // ThreadPool core pool size private static final int CORE_THREADS_PER_VOLUME = 1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaTracker.java index 335ed703abd..07e520117f6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskReplicaTracker.java @@ -20,8 +20,8 @@ import com.google.common.base.Preconditions; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -36,7 +36,8 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public abstract class RamDiskReplicaTracker { - static final Log LOG = LogFactory.getLog(RamDiskReplicaTracker.class); + static final Logger LOG = + LoggerFactory.getLogger(RamDiskReplicaTracker.class); FsDatasetImpl fsDataset; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java index 4349c26b053..1c7850608ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java @@ -41,8 +41,8 @@ import io.netty.handler.ssl.SslHandler; import io.netty.handler.stream.ChunkedWriteHandler; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -87,7 +87,7 @@ private final RestCsrfPreventionFilter restCsrfPreventionFilter; private InetSocketAddress httpAddress; private InetSocketAddress httpsAddress; - static final Log LOG = LogFactory.getLog(DatanodeHttpServer.class); + static final Logger LOG = LoggerFactory.getLogger(DatanodeHttpServer.class); // HttpServer threads are only used for the web UI and basic servlets, so // set them to the minimum possible diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/RestCsrfPreventionFilterHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/RestCsrfPreventionFilterHandler.java index 4958bb59202..be29eaf58a5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/RestCsrfPreventionFilterHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/RestCsrfPreventionFilterHandler.java @@ -30,7 +30,7 @@ import io.netty.handler.codec.http.HttpResponseStatus; import io.netty.util.ReferenceCountUtil; -import org.apache.commons.logging.Log; +import org.slf4j.Logger; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.security.http.RestCsrfPreventionFilter; @@ -46,7 +46,7 @@ final class RestCsrfPreventionFilterHandler extends SimpleChannelInboundHandler { - private static final Log LOG = DatanodeHttpServer.LOG; + private static final Logger LOG = DatanodeHttpServer.LOG; private final RestCsrfPreventionFilter restCsrfPreventionFilter; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/SimpleHttpProxyHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/SimpleHttpProxyHandler.java index 9d659f1f577..9a2e0b71a44 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/SimpleHttpProxyHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/SimpleHttpProxyHandler.java @@ -34,7 +34,7 @@ import io.netty.handler.codec.http.HttpRequest; import io.netty.handler.codec.http.HttpRequestEncoder; import io.netty.handler.codec.http.HttpResponseEncoder; -import org.apache.commons.logging.Log; +import org.slf4j.Logger; import java.net.InetSocketAddress; @@ -53,7 +53,7 @@ private String uri; private Channel proxiedChannel; private final InetSocketAddress host; - static final Log LOG = DatanodeHttpServer.LOG; + static final Logger LOG = DatanodeHttpServer.LOG; SimpleHttpProxyHandler(InetSocketAddress host) { this.host = host; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/DataNodeUGIProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/DataNodeUGIProvider.java index fa6f676dd32..366f47f2963 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/DataNodeUGIProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/DataNodeUGIProvider.java @@ -13,8 +13,8 @@ */ package org.apache.hadoop.hdfs.server.datanode.web.webhdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; @@ -43,7 +43,7 @@ private final ParameterParser params; @VisibleForTesting static Cache ugiCache; - public static final Log LOG = LogFactory.getLog(Client.class); + public static final Logger LOG = LoggerFactory.getLogger(Client.class); DataNodeUGIProvider(ParameterParser params) { this.params = params; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java index dce1f02ceaa..b56b0d09ac2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java @@ -23,7 +23,7 @@ import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.DefaultFullHttpResponse; import io.netty.handler.codec.http.HttpResponseStatus; -import org.apache.commons.logging.Log; +import org.slf4j.Logger; import org.apache.hadoop.hdfs.web.JsonUtil; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.StandbyException; @@ -43,7 +43,7 @@ import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.APPLICATION_JSON_UTF8; class ExceptionHandler { - static Log LOG = WebHdfsHandler.LOG; + private static final Logger LOG = WebHdfsHandler.LOG; static DefaultFullHttpResponse exceptionCaught(Throwable cause) { Exception e = cause instanceof Exception ? (Exception) cause : new Exception(cause); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/HdfsWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/HdfsWriter.java index b5654ab8bde..3d928ff3eed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/HdfsWriter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/HdfsWriter.java @@ -23,7 +23,7 @@ import io.netty.handler.codec.http.DefaultHttpResponse; import io.netty.handler.codec.http.HttpContent; import io.netty.handler.codec.http.LastHttpContent; -import org.apache.commons.logging.Log; +import org.slf4j.Logger; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.io.IOUtils; @@ -37,7 +37,7 @@ private final DFSClient client; private final OutputStream out; private final DefaultHttpResponse response; - private static final Log LOG = WebHdfsHandler.LOG; + private static final Logger LOG = WebHdfsHandler.LOG; HdfsWriter(DFSClient client, OutputStream out, DefaultHttpResponse response) { this.client = client; @@ -82,8 +82,8 @@ public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { } private void releaseDfsResources() { - IOUtils.cleanup(LOG, out); - IOUtils.cleanup(LOG, client); + IOUtils.cleanupWithLogger(LOG, out); + IOUtils.cleanupWithLogger(LOG, client); } private void releaseDfsResourcesAndThrow() throws Exception { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java index 9a4b670f1b6..b01e3c983a1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java @@ -29,8 +29,8 @@ import io.netty.handler.codec.http.HttpRequest; import io.netty.handler.codec.http.QueryStringDecoder; import io.netty.handler.stream.ChunkedStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; @@ -87,8 +87,8 @@ import static org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier.HDFS_DELEGATION_KIND; public class WebHdfsHandler extends SimpleChannelInboundHandler { - static final Log LOG = LogFactory.getLog(WebHdfsHandler.class); - static final Log REQLOG = LogFactory.getLog("datanode.webhdfs"); + static final Logger LOG = LoggerFactory.getLogger(WebHdfsHandler.class); + static final Logger REQLOG = LoggerFactory.getLogger("datanode.webhdfs"); public static final String WEBHDFS_PREFIX = WebHdfsFileSystem.PATH_PREFIX; public static final int WEBHDFS_PREFIX_LENGTH = WEBHDFS_PREFIX.length(); public static final String APPLICATION_OCTET_STREAM = @@ -295,7 +295,7 @@ private void onGetFileChecksum(ChannelHandlerContext ctx) throws IOException { dfsclient.close(); dfsclient = null; } finally { - IOUtils.cleanup(LOG, dfsclient); + IOUtils.cleanupWithLogger(LOG, dfsclient); } final byte[] js = JsonUtil.toJsonString(checksum).getBytes(StandardCharsets.UTF_8); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java index 968a5a77f08..eddef336902 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java @@ -27,7 +27,7 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.text.StrBuilder; +import org.apache.commons.text.TextStringBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -491,7 +491,7 @@ protected int getDefaultTop() { /** * Put output line to log and string buffer. * */ - protected void recordOutput(final StrBuilder result, + protected void recordOutput(final TextStringBuilder result, final String outputLine) { LOG.info(outputLine); result.appendln(outputLine); @@ -501,7 +501,7 @@ protected void recordOutput(final StrBuilder result, * Parse top number of nodes to be processed. * @return top number of nodes to be processed. */ - protected int parseTopNodes(final CommandLine cmd, final StrBuilder result) + protected int parseTopNodes(final CommandLine cmd, final TextStringBuilder result) throws IllegalArgumentException { String outputLine = ""; int nodes = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/PlanCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/PlanCommand.java index 90cc0c48009..dab95595120 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/PlanCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/PlanCommand.java @@ -23,7 +23,7 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.text.StrBuilder; +import org.apache.commons.text.TextStringBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; @@ -89,7 +89,7 @@ public PlanCommand(Configuration conf, final PrintStream ps) { */ @Override public void execute(CommandLine cmd) throws Exception { - StrBuilder result = new StrBuilder(); + TextStringBuilder result = new TextStringBuilder(); String outputLine = ""; LOG.debug("Processing Plan Command."); Preconditions.checkState(cmd.hasOption(DiskBalancerCLI.PLAN)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ReportCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ReportCommand.java index 5f4e0f716f0..4f75aff1a51 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ReportCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/ReportCommand.java @@ -25,7 +25,7 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.text.StrBuilder; +import org.apache.commons.text.TextStringBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.diskbalancer.DiskBalancerException; import org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerDataNode; @@ -67,7 +67,7 @@ public ReportCommand(Configuration conf, final PrintStream ps) { @Override public void execute(CommandLine cmd) throws Exception { - StrBuilder result = new StrBuilder(); + TextStringBuilder result = new TextStringBuilder(); String outputLine = "Processing report command"; recordOutput(result, outputLine); @@ -99,7 +99,7 @@ public void execute(CommandLine cmd) throws Exception { getPrintStream().println(result.toString()); } - private void handleTopReport(final CommandLine cmd, final StrBuilder result, + private void handleTopReport(final CommandLine cmd, final TextStringBuilder result, final String nodeFormat) throws IllegalArgumentException { Collections.sort(getCluster().getNodes(), Collections.reverseOrder()); @@ -131,7 +131,7 @@ private void handleTopReport(final CommandLine cmd, final StrBuilder result, } } - private void handleNodeReport(final CommandLine cmd, StrBuilder result, + private void handleNodeReport(final CommandLine cmd, TextStringBuilder result, final String nodeFormat, final String volumeFormat) throws Exception { String outputLine = ""; /* @@ -175,7 +175,7 @@ private void handleNodeReport(final CommandLine cmd, StrBuilder result, /** * Put node report lines to string buffer. */ - private void recordNodeReport(StrBuilder result, DiskBalancerDataNode dbdn, + private void recordNodeReport(TextStringBuilder result, DiskBalancerDataNode dbdn, final String nodeFormat, final String volumeFormat) throws Exception { final String trueStr = "True"; final String falseStr = "False"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java index a9fd7f0e22a..e43b83e39ce 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java @@ -21,9 +21,10 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import com.google.common.base.Preconditions; import org.apache.hadoop.hdfs.web.JsonUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; @@ -35,6 +36,9 @@ private static final ObjectReader READER = new ObjectMapper().readerFor(DiskBalancerVolume.class); + private static final Logger LOG = + LoggerFactory.getLogger(DiskBalancerVolume.class); + private String path; private long capacity; private String storageType; @@ -269,10 +273,13 @@ public long getUsed() { * @param dfsUsedSpace - dfsUsedSpace for this volume. */ public void setUsed(long dfsUsedSpace) { - Preconditions.checkArgument(dfsUsedSpace < this.getCapacity(), - "DiskBalancerVolume.setUsed: dfsUsedSpace(%s) < capacity(%s)", - dfsUsedSpace, getCapacity()); - this.used = dfsUsedSpace; + if (dfsUsedSpace > this.getCapacity()) { + LOG.warn("Volume usage ("+dfsUsedSpace+") is greater than capacity ("+ + this.getCapacity()+"). Setting volume usage to the capacity"); + this.used = this.getCapacity(); + } else { + this.used = dfsUsedSpace; + } } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java index c5d14d25dae..80df71195d1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java @@ -22,8 +22,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.commons.cli.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; @@ -41,11 +41,13 @@ import org.apache.hadoop.hdfs.server.balancer.Matcher; import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.namenode.ErasureCodingPolicyManager; import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.security.SecurityUtil; @@ -68,9 +70,7 @@ @InterfaceAudience.Private public class Mover { - static final Log LOG = LogFactory.getLog(Mover.class); - - static final Path MOVER_ID_PATH = new Path("/system/mover.id"); + static final Logger LOG = LoggerFactory.getLogger(Mover.class); private static class StorageMap { private final StorageGroupMap sources @@ -645,7 +645,7 @@ static int run(Map> namenodes, Configuration conf) List connectors = Collections.emptyList(); try { connectors = NameNodeConnector.newNameNodeConnectors(namenodes, - Mover.class.getSimpleName(), MOVER_ID_PATH, conf, + Mover.class.getSimpleName(), HdfsServerConstants.MOVER_ID_PATH, conf, NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS); while (connectors.size() > 0) { @@ -655,10 +655,11 @@ static int run(Map> namenodes, Configuration conf) NameNodeConnector nnc = iter.next(); final Mover m = new Mover(nnc, conf, retryCount, excludedPinnedBlocks); + final ExitStatus r = m.run(); if (r == ExitStatus.SUCCESS) { - IOUtils.cleanup(LOG, nnc); + IOUtils.cleanupWithLogger(LOG, nnc); iter.remove(); } else if (r != ExitStatus.IN_PROGRESS) { if (r == ExitStatus.NO_MOVE_PROGRESS) { @@ -681,7 +682,7 @@ static int run(Map> namenodes, Configuration conf) return ExitStatus.SUCCESS.getExitCode(); } finally { for (NameNodeConnector nnc : connectors) { - IOUtils.cleanup(LOG, nnc); + IOUtils.cleanupWithLogger(LOG, nnc); } } } @@ -719,7 +720,7 @@ private static Options buildCliOptions() { } } } finally { - IOUtils.cleanup(LOG, reader); + IOUtils.cleanupWithLogger(LOG, reader); } return list.toArray(new String[list.size()]); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclEntryStatusFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclEntryStatusFormat.java index 82aa214bf74..2c5b23b07c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclEntryStatusFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclEntryStatusFormat.java @@ -38,7 +38,8 @@ * [1:3) -- the type of the entry (AclEntryType)
* [3:6) -- the permission of the entry (FsAction)
* [6:7) -- A flag to indicate whether Named entry or not
- * [7:32) -- the name of the entry, which is an ID that points to a
+ * [7:8) -- Reserved
+ * [8:32) -- the name of the entry, which is an ID that points to a
* string in the StringTableSection.
*/ public enum AclEntryStatusFormat { @@ -47,7 +48,8 @@ TYPE(SCOPE.BITS, 2), PERMISSION(TYPE.BITS, 3), NAMED_ENTRY_CHECK(PERMISSION.BITS, 1), - NAME(NAMED_ENTRY_CHECK.BITS, 25); + RESERVED(NAMED_ENTRY_CHECK.BITS, 1), + NAME(RESERVED.BITS, 24); private final LongBitFormat BITS; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclFeature.java index 97d47597429..6d546f81c4f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclFeature.java @@ -72,6 +72,11 @@ public boolean equals(Object o) { return Arrays.equals(entries, ((AclFeature) o).entries); } + @Override + public String toString() { + return "AclFeature : " + Integer.toHexString(hashCode()) + " Size of entries : " + entries.length; + } + @Override public int hashCode() { return Arrays.hashCode(entries); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java index e1ddfb909cf..eac91bf4832 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java @@ -42,7 +42,7 @@ } @Override - public void format(NamespaceInfo nsInfo) { + public void format(NamespaceInfo nsInfo, boolean force) { // format() should only get called at startup, before any BNs // can register with the NN. throw new UnsupportedOperationException( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java index ab026f01d0c..8a2949243b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java @@ -25,6 +25,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_DEFAULT; import java.io.DataInput; import java.io.DataOutputStream; @@ -171,6 +173,21 @@ private final SerializerCompat serializerCompat = new SerializerCompat(); + /** + * Whether caching is enabled. + * + * If caching is disabled, we will not process cache reports or store + * information about what is cached where. We also do not start the + * CacheReplicationMonitor thread. This will save resources, but provide + * less functionality. + * + * Even when caching is disabled, we still store path-based cache + * information. This information is stored in the edit log and fsimage. We + * don't want to lose it just because a configuration setting was turned off. + * However, we will not act on this information if caching is disabled. + */ + private final boolean enabled; + /** * The CacheReplicationMonitor. */ @@ -194,6 +211,8 @@ public PersistState(CacheManagerSection section, this.namesystem = namesystem; this.blockManager = blockManager; this.nextDirectiveId = 1; + this.enabled = conf.getBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, + DFS_NAMENODE_CACHING_ENABLED_DEFAULT); this.maxListCachePoolsResponses = conf.getInt( DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES, DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES_DEFAULT); @@ -211,10 +230,13 @@ public PersistState(CacheManagerSection section, DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT); cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT; } - this.cachedBlocks = new LightWeightGSet( + this.cachedBlocks = enabled ? new LightWeightGSet( LightWeightGSet.computeCapacity(cachedBlocksPercent, - "cachedBlocks")); + "cachedBlocks")) : new LightWeightGSet<>(0); + } + public boolean isEnabled() { + return enabled; } /** @@ -229,6 +251,12 @@ void clear() { } public void startMonitorThread() { + if (!isEnabled()) { + LOG.info("Not starting CacheReplicationMonitor as name-node caching" + + " is disabled."); + return; + } + crmLock.lock(); try { if (this.monitor == null) { @@ -242,6 +270,10 @@ public void startMonitorThread() { } public void stopMonitorThread() { + if (!isEnabled()) { + return; + } + crmLock.lock(); try { if (this.monitor != null) { @@ -945,6 +977,12 @@ private void setCachedLocations(LocatedBlock block) { public final void processCacheReport(final DatanodeID datanodeID, final List blockIds) throws IOException { + if (!enabled) { + LOG.debug("Ignoring cache report from {} because {} = false. " + + "number of blocks: {}", datanodeID, + DFS_NAMENODE_CACHING_ENABLED_KEY, blockIds.size()); + return; + } namesystem.writeLock(); final long startTime = Time.monotonicNow(); final long endTime; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java index a2613d999da..20b1d25434a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java @@ -21,8 +21,6 @@ import javax.annotation.Nonnull; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java index aeee87dc80c..186bc3d7276 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java @@ -19,8 +19,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -30,7 +30,8 @@ @InterfaceAudience.Private public class CheckpointConf { - private static final Log LOG = LogFactory.getLog(CheckpointConf.class); + private static final Logger LOG = + LoggerFactory.getLogger(CheckpointConf.class); /** How often to checkpoint regardless of number of txns */ private final long checkpointPeriod; // in seconds diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java index 318acfbef46..14749d01ab8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java @@ -28,8 +28,8 @@ import java.net.URL; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; @@ -56,8 +56,8 @@ * (1) time or (2) the size of the edits file. */ class Checkpointer extends Daemon { - public static final Log LOG = - LogFactory.getLog(Checkpointer.class.getName()); + public static final Logger LOG = + LoggerFactory.getLogger(Checkpointer.class.getName()); private final BackupNode backupNode; volatile boolean shouldRun; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java index c81f82cde32..14548e9e0dd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java @@ -22,8 +22,8 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.security.AccessControlException; @@ -48,8 +48,8 @@ private int sleepNanoSec = 0; public static final String REPLICATED = "Replicated"; - public static final Log LOG = LogFactory - .getLog(ContentSummaryComputationContext.class); + public static final Logger LOG = LoggerFactory + .getLogger(ContentSummaryComputationContext.class); private FSPermissionChecker pc; /** @@ -191,6 +191,8 @@ public String getErasureCodingPolicyName(INode inode) { .getEnabledPolicyByName(ecPolicyName) .getName(); } + } else if (inode.getParent() != null) { + return getErasureCodingPolicyName(inode.getParent()); } } catch (IOException ioe) { LOG.warn("Encountered error getting ec policy for " diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java index 2fb369c9504..e3b8e425ba6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java @@ -21,8 +21,8 @@ import javax.servlet.http.HttpServletRequest; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.common.JspHelper; import org.apache.hadoop.security.UserGroupInformation; @@ -34,7 +34,8 @@ /** For java.io.Serializable */ private static final long serialVersionUID = 1L; - static final Log LOG = LogFactory.getLog(DfsServlet.class.getCanonicalName()); + static final Logger LOG = + LoggerFactory.getLogger(DfsServlet.class.getCanonicalName()); protected UserGroupInformation getUGI(HttpServletRequest request, Configuration conf) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java index 14d5b5464ed..43fc949dc8e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java @@ -21,8 +21,8 @@ import java.net.InetSocketAddress; import java.util.Arrays; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.NameNodeProxies; import org.apache.hadoop.hdfs.server.common.Storage; @@ -43,7 +43,8 @@ * int, int, byte[]) */ class EditLogBackupOutputStream extends EditLogOutputStream { - private static final Log LOG = LogFactory.getLog(EditLogFileOutputStream.class); + private static final Logger LOG = + LoggerFactory.getLogger(EditLogFileOutputStream.class); static final int DEFAULT_BUFFER_SIZE = 256; private final JournalProtocol backupNode; // RPC proxy to backup node diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java index 36c2232d3b8..95a305e735a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java @@ -29,8 +29,8 @@ import java.net.URL; import java.security.PrivilegedExceptionAction; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.protocol.LayoutFlags; @@ -71,7 +71,7 @@ private FSEditLogOp.Reader reader = null; private FSEditLogLoader.PositionTrackingInputStream tracker = null; private DataInputStream dataIn = null; - static final Log LOG = LogFactory.getLog(EditLogInputStream.class); + static final Logger LOG = LoggerFactory.getLogger(EditLogInputStream.class); /** * Open an EditLogInputStream for the given file. @@ -161,7 +161,7 @@ private void init(boolean verifyLayoutVersion) state = State.OPEN; } finally { if (reader == null) { - IOUtils.cleanup(LOG, dataIn, tracker, bin, fStream); + IOUtils.cleanupWithLogger(LOG, dataIn, tracker, bin, fStream); state = State.CLOSED; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java index 830814c70eb..9f06ce9d5fc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java @@ -26,8 +26,8 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -42,7 +42,8 @@ */ @InterfaceAudience.Private public class EditLogFileOutputStream extends EditLogOutputStream { - private static final Log LOG = LogFactory.getLog(EditLogFileOutputStream.class); + private static final Logger LOG = + LoggerFactory.getLogger(EditLogFileOutputStream.class); public static final int MIN_PREALLOCATION_LENGTH = 1024 * 1024; private File file; @@ -161,7 +162,7 @@ public void close() throws IOException { fp.close(); fp = null; } finally { - IOUtils.cleanup(LOG, fc, fp); + IOUtils.cleanupWithLogger(LOG, fc, fp); doubleBuf = null; fc = null; fp = null; @@ -174,7 +175,7 @@ public void abort() throws IOException { if (fp == null) { return; } - IOUtils.cleanup(LOG, fp); + IOUtils.cleanupWithLogger(LOG, fp); fp = null; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditsDoubleBuffer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditsDoubleBuffer.java index 4e1dab069b0..93f35f76e3c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditsDoubleBuffer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditsDoubleBuffer.java @@ -24,8 +24,8 @@ import java.util.Arrays; import org.apache.commons.codec.binary.Hex; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.Writer; @@ -43,7 +43,8 @@ */ @InterfaceAudience.Private public class EditsDoubleBuffer { - protected static final Log LOG = LogFactory.getLog(EditsDoubleBuffer.class); + protected static final Logger LOG = + LoggerFactory.getLogger(EditsDoubleBuffer.class); private TxnBuffer bufCurrent; // current buffer for writing private TxnBuffer bufReady; // buffer ready for flushing diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java index 3a310dae871..e7de05b00d0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java @@ -356,7 +356,7 @@ public synchronized void removePolicy(String name) { /** * Disable an erasure coding policy by policyName. */ - public synchronized void disablePolicy(String name) { + public synchronized boolean disablePolicy(String name) { ErasureCodingPolicyInfo info = policiesByName.get(name); if (info == null) { throw new HadoopIllegalArgumentException("The policy name " + @@ -367,27 +367,32 @@ public synchronized void disablePolicy(String name) { enabledPoliciesByName.remove(name); enabledPolicies = enabledPoliciesByName.values().toArray(new ErasureCodingPolicy[0]); + info.setState(ErasureCodingPolicyState.DISABLED); + LOG.info("Disable the erasure coding policy " + name); + return true; } - info.setState(ErasureCodingPolicyState.DISABLED); - LOG.info("Disable the erasure coding policy " + name); + return false; } /** * Enable an erasure coding policy by policyName. */ - public synchronized void enablePolicy(String name) { + public synchronized boolean enablePolicy(String name) { final ErasureCodingPolicyInfo info = policiesByName.get(name); if (info == null) { throw new HadoopIllegalArgumentException("The policy name " + name + " does not exist"); } - + if (enabledPoliciesByName.containsKey(name)) { + return false; + } final ErasureCodingPolicy ecPolicy = info.getPolicy(); enabledPoliciesByName.put(name, ecPolicy); info.setState(ErasureCodingPolicyState.ENABLED); enabledPolicies = enabledPoliciesByName.values().toArray(new ErasureCodingPolicy[0]); LOG.info("Enable the erasure coding policy " + name); + return true; } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java index 769c13757b2..b0bc5e40ebe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; +import org.apache.hadoop.hdfs.protocol.NoECPolicySetException; import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.WritableUtils; @@ -206,6 +207,9 @@ static FileStatus unsetErasureCodingPolicy(final FSNamesystem fsn, } if (xAttrs != null) { fsn.getEditLog().logRemoveXAttrs(src, xAttrs, logRetryCache); + } else { + throw new NoECPolicySetException( + "No erasure coding policy explicitly set on " + src); } return fsd.getAuditFileInfo(iip); } @@ -253,11 +257,16 @@ static void removeErasureCodingPolicy(final FSNamesystem fsn, * rebuilding * @throws IOException */ - static void enableErasureCodingPolicy(final FSNamesystem fsn, + static boolean enableErasureCodingPolicy(final FSNamesystem fsn, String ecPolicyName, final boolean logRetryCache) throws IOException { Preconditions.checkNotNull(ecPolicyName); - fsn.getErasureCodingPolicyManager().enablePolicy(ecPolicyName); - fsn.getEditLog().logEnableErasureCodingPolicy(ecPolicyName, logRetryCache); + boolean success = + fsn.getErasureCodingPolicyManager().enablePolicy(ecPolicyName); + if (success) { + fsn.getEditLog().logEnableErasureCodingPolicy(ecPolicyName, + logRetryCache); + } + return success; } /** @@ -269,11 +278,16 @@ static void enableErasureCodingPolicy(final FSNamesystem fsn, * rebuilding * @throws IOException */ - static void disableErasureCodingPolicy(final FSNamesystem fsn, + static boolean disableErasureCodingPolicy(final FSNamesystem fsn, String ecPolicyName, final boolean logRetryCache) throws IOException { Preconditions.checkNotNull(ecPolicyName); - fsn.getErasureCodingPolicyManager().disablePolicy(ecPolicyName); - fsn.getEditLog().logDisableErasureCodingPolicy(ecPolicyName, logRetryCache); + boolean success = + fsn.getErasureCodingPolicyManager().disablePolicy(ecPolicyName); + if (success) { + fsn.getEditLog().logDisableErasureCodingPolicy(ecPolicyName, + logRetryCache); + } + return success; } private static List removeErasureCodingPolicyXAttr( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java index 45bb6b41047..2f0a0fc2984 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java @@ -110,10 +110,7 @@ static INodesInPath createAncestorDirectories( * Create all ancestor directories and return the parent inodes. * * @param fsd FSDirectory - * @param existing The INodesInPath instance containing all the existing - * ancestral INodes - * @param children The relative path from the parent towards children, - * starting with "/" + * @param iip inodes in path to the fs directory * @param perm the permission of the directory. Note that all ancestors * created along the path has implicit {@code u+wx} permissions. * @param inheritPerms if the ancestor directories should inherit permissions diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSatisfyStoragePolicyOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSatisfyStoragePolicyOp.java new file mode 100644 index 00000000000..3f873d7eea5 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirSatisfyStoragePolicyOp.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY; + +import java.io.IOException; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.List; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.XAttr; +import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.hdfs.XAttrHelper; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfyManager; + +import com.google.common.collect.Lists; + +/** + * Helper class to perform storage policy satisfier related operations. + */ +final class FSDirSatisfyStoragePolicyOp { + + /** + * Private constructor for preventing FSDirSatisfyStoragePolicyOp object + * creation. Static-only class. + */ + private FSDirSatisfyStoragePolicyOp() { + } + + /** + * Satisfy storage policy function which will add the entry to SPS call queue + * and will perform satisfaction async way. + * + * @param fsd + * fs directory + * @param bm + * block manager + * @param src + * source path + * @param logRetryCache + * whether to record RPC ids in editlog for retry cache rebuilding + * @return file status info + * @throws IOException + */ + static FileStatus satisfyStoragePolicy(FSDirectory fsd, BlockManager bm, + String src, boolean logRetryCache) throws IOException { + + assert fsd.getFSNamesystem().hasWriteLock(); + FSPermissionChecker pc = fsd.getPermissionChecker(); + INodesInPath iip; + fsd.writeLock(); + try { + + // check operation permission. + iip = fsd.resolvePath(pc, src, DirOp.WRITE); + if (fsd.isPermissionEnabled()) { + fsd.checkPathAccess(pc, iip, FsAction.WRITE); + } + INode inode = FSDirectory.resolveLastINode(iip); + if (inode.isFile() && inode.asFile().numBlocks() == 0) { + if (NameNode.LOG.isInfoEnabled()) { + NameNode.LOG.info( + "Skipping satisfy storage policy on path:{} as " + + "this file doesn't have any blocks!", + inode.getFullPathName()); + } + } else if (inodeHasSatisfyXAttr(inode)) { + NameNode.LOG + .warn("Cannot request to call satisfy storage policy on path: " + + inode.getFullPathName() + + ", as this file/dir was already called for satisfying " + + "storage policy."); + } else { + XAttr satisfyXAttr = XAttrHelper + .buildXAttr(XATTR_SATISFY_STORAGE_POLICY); + List xAttrs = Arrays.asList(satisfyXAttr); + List existingXAttrs = XAttrStorage.readINodeXAttrs(inode); + List newXAttrs = FSDirXAttrOp.setINodeXAttrs(fsd, existingXAttrs, + xAttrs, EnumSet.of(XAttrSetFlag.CREATE)); + XAttrStorage.updateINodeXAttrs(inode, newXAttrs, + iip.getLatestSnapshotId()); + fsd.getEditLog().logSetXAttrs(src, xAttrs, logRetryCache); + + // Adding directory in the pending queue, so FileInodeIdCollector + // process directory child in batch and recursively + StoragePolicySatisfyManager spsManager = + fsd.getBlockManager().getSPSManager(); + if (spsManager != null) { + spsManager.addPathId(inode.getId()); + } + } + } finally { + fsd.writeUnlock(); + } + return fsd.getAuditFileInfo(iip); + } + + static boolean unprotectedSatisfyStoragePolicy(INode inode, FSDirectory fsd) { + if (inode.isFile() && inode.asFile().numBlocks() == 0) { + return false; + } else { + // Adding directory in the pending queue, so FileInodeIdCollector process + // directory child in batch and recursively + StoragePolicySatisfyManager spsManager = + fsd.getBlockManager().getSPSManager(); + if (spsManager != null) { + spsManager.addPathId(inode.getId()); + } + return true; + } + } + + private static boolean inodeHasSatisfyXAttr(INode inode) { + final XAttrFeature f = inode.getXAttrFeature(); + if (inode.isFile() && f != null + && f.getXAttr(XATTR_SATISFY_STORAGE_POLICY) != null) { + return true; + } + return false; + } + + static void removeSPSXattr(FSDirectory fsd, INode inode, XAttr spsXAttr) + throws IOException { + try { + fsd.writeLock(); + List existingXAttrs = XAttrStorage.readINodeXAttrs(inode); + existingXAttrs.remove(spsXAttr); + XAttrStorage.updateINodeXAttrs(inode, existingXAttrs, INodesInPath + .fromINode(inode).getLatestSnapshotId()); + List xAttrs = Lists.newArrayListWithCapacity(1); + xAttrs.add(spsXAttr); + fsd.getEditLog().logRemoveXAttrs(inode.getFullPathName(), xAttrs, false); + } finally { + fsd.writeUnlock(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java index 03c349c3dee..2875708b72d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java @@ -269,19 +269,27 @@ static LocatedBlock storeAllocatedBlock(FSNamesystem fsn, String src, BlockManager bm, String src, DatanodeInfo[] excludedNodes, String[] favoredNodes, EnumSet flags, ValidateAddBlockResult r) throws IOException { - Node clientNode = bm.getDatanodeManager() - .getDatanodeByHost(r.clientMachine); - if (clientNode == null) { - clientNode = getClientNode(bm, r.clientMachine); - } + Node clientNode = null; - Set excludedNodesSet = null; - if (excludedNodes != null) { - excludedNodesSet = new HashSet<>(excludedNodes.length); - Collections.addAll(excludedNodesSet, excludedNodes); + boolean ignoreClientLocality = (flags != null + && flags.contains(AddBlockFlag.IGNORE_CLIENT_LOCALITY)); + + // If client locality is ignored, clientNode remains 'null' to indicate + if (!ignoreClientLocality) { + clientNode = bm.getDatanodeManager().getDatanodeByHost(r.clientMachine); + if (clientNode == null) { + clientNode = getClientNode(bm, r.clientMachine); + } } - List favoredNodesList = (favoredNodes == null) ? null - : Arrays.asList(favoredNodes); + + Set excludedNodesSet = + (excludedNodes == null) ? new HashSet<>() + : new HashSet<>(Arrays.asList(excludedNodes)); + + List favoredNodesList = + (favoredNodes == null) ? Collections.emptyList() + : Arrays.asList(favoredNodes); + // choose targets for the new block to be allocated. return bm.chooseTarget4NewBlock(src, r.numTargets, clientNode, excludedNodesSet, r.blockSize, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java index 9e95f90d866..1cb414d6859 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirXAttrOp.java @@ -42,6 +42,7 @@ import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_ENCRYPTION_ZONE; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY; class FSDirXAttrOp { private static final XAttr KEYID_XATTR = @@ -294,6 +295,12 @@ static INode unprotectedSetXAttrs( } } + // Add inode id to movement queue if xattrs contain satisfy xattr. + if (XATTR_SATISFY_STORAGE_POLICY.equals(xaName)) { + FSDirSatisfyStoragePolicyOp.unprotectedSatisfyStoragePolicy(inode, fsd); + continue; + } + if (!isFile && SECURITY_XATTR_UNREADABLE_BY_SUPERUSER.equals(xaName)) { throw new IOException("Can only set '" + SECURITY_XATTR_UNREADABLE_BY_SUPERUSER + "' on a file."); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 1b1448252ca..2a976d22f4a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -58,6 +58,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo.UpdatedReplicationInfo; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfyManager; import org.apache.hadoop.hdfs.util.ByteArray; import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.hdfs.util.ReadOnlyList; @@ -91,6 +92,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_ENCRYPTION_ZONE; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY; import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID; /** @@ -1400,10 +1402,27 @@ public final void addToInodeMap(INode inode) { if (!inode.isSymlink()) { final XAttrFeature xaf = inode.getXAttrFeature(); addEncryptionZone((INodeWithAdditionalFields) inode, xaf); + StoragePolicySatisfyManager spsManager = + namesystem.getBlockManager().getSPSManager(); + if (spsManager != null && spsManager.isEnabled()) { + addStoragePolicySatisfier((INodeWithAdditionalFields) inode, xaf); + } } } } + private void addStoragePolicySatisfier(INodeWithAdditionalFields inode, + XAttrFeature xaf) { + if (xaf == null) { + return; + } + XAttr xattr = xaf.getXAttr(XATTR_SATISFY_STORAGE_POLICY); + if (xattr == null) { + return; + } + FSDirSatisfyStoragePolicyOp.unprotectedSatisfyStoragePolicy(inode, this); + } + private void addEncryptionZone(INodeWithAdditionalFields inode, XAttrFeature xaf) { if (xaf == null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index d6fb212c7d9..547ad577c38 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -417,13 +417,14 @@ synchronized void close() { * File-based journals are skipped, since they are formatted by the * Storage format code. */ - synchronized void formatNonFileJournals(NamespaceInfo nsInfo) throws IOException { + synchronized void formatNonFileJournals(NamespaceInfo nsInfo, boolean force) + throws IOException { Preconditions.checkState(state == State.BETWEEN_LOG_SEGMENTS, "Bad state: %s", state); for (JournalManager jm : journalSet.getJournalManagers()) { if (!(jm instanceof FileJournalManager)) { - jm.format(nsInfo); + jm.format(nsInfo, force); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java index 5990c2273fd..2b47398f40c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java @@ -24,10 +24,12 @@ import java.util.List; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; -import java.util.concurrent.TimeUnit; +import java.util.concurrent.Semaphore; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.util.ExitUtil; @@ -35,7 +37,7 @@ import com.google.common.base.Preconditions; class FSEditLogAsync extends FSEditLog implements Runnable { - static final Log LOG = LogFactory.getLog(FSEditLog.class); + static final Logger LOG = LoggerFactory.getLogger(FSEditLog.class); // use separate mutex to avoid possible deadlock when stopping the thread. private final Object syncThreadLock = new Object(); @@ -145,15 +147,68 @@ public boolean logEdit() { edit.logSyncWait(); } + // draining permits is intended to provide a high priority reservation. + // however, release of outstanding permits must be postponed until + // drained permits are restored to avoid starvation. logic has some races + // but is good enough to serve its purpose. + private Semaphore overflowMutex = new Semaphore(8){ + private AtomicBoolean draining = new AtomicBoolean(); + private AtomicInteger pendingReleases = new AtomicInteger(); + @Override + public int drainPermits() { + draining.set(true); + return super.drainPermits(); + } + // while draining, count the releases until release(int) + private void tryRelease(int permits) { + pendingReleases.getAndAdd(permits); + if (!draining.get()) { + super.release(pendingReleases.getAndSet(0)); + } + } + @Override + public void release() { + tryRelease(1); + } + @Override + public void release(int permits) { + draining.set(false); + tryRelease(permits); + } + }; + private void enqueueEdit(Edit edit) { if (LOG.isDebugEnabled()) { LOG.debug("logEdit " + edit); } try { - if (!editPendingQ.offer(edit, 1, TimeUnit.SECONDS)) { + // not checking for overflow yet to avoid penalizing performance of + // the common case. if there is persistent overflow, a mutex will be + // use to throttle contention on the queue. + if (!editPendingQ.offer(edit)) { Preconditions.checkState( isSyncThreadAlive(), "sync thread is not alive"); - editPendingQ.put(edit); + if (Thread.holdsLock(this)) { + // if queue is full, synchronized caller must immediately relinquish + // the monitor before re-offering to avoid deadlock with sync thread + // which needs the monitor to write transactions. + int permits = overflowMutex.drainPermits(); + try { + do { + this.wait(1000); // will be notified by next logSync. + } while (!editPendingQ.offer(edit)); + } finally { + overflowMutex.release(permits); + } + } else { + // mutex will throttle contention during persistent overflow. + overflowMutex.acquire(); + try { + editPendingQ.put(edit); + } finally { + overflowMutex.release(); + } + } } } catch (Throwable t) { // should never happen! failure to enqueue an edit is fatal @@ -203,7 +258,7 @@ public void run() { private void terminate(Throwable t) { String message = "Exception while edit logging: "+t.getMessage(); - LOG.fatal(message, t); + LOG.error(message, t); ExitUtil.terminate(1, message); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 82e35bd353e..f3b6b843976 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -28,8 +28,8 @@ import java.util.EnumSet; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileSystem; @@ -121,7 +121,8 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class FSEditLogLoader { - static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName()); + static final Logger LOG = + LoggerFactory.getLogger(FSEditLogLoader.class.getName()); static final long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec private final FSNamesystem fsNamesys; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java index 5cfc0176f1d..3d347d929b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java @@ -37,8 +37,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -83,7 +83,8 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class FSImage implements Closeable { - public static final Log LOG = LogFactory.getLog(FSImage.class.getName()); + public static final Logger LOG = + LoggerFactory.getLogger(FSImage.class.getName()); protected FSEditLog editLog = null; private boolean isUpgradeFinalized = false; @@ -160,7 +161,8 @@ protected FSImage(Configuration conf, archivalManager = new NNStorageRetentionManager(conf, storage, editLog); } - void format(FSNamesystem fsn, String clusterId) throws IOException { + void format(FSNamesystem fsn, String clusterId, boolean force) + throws IOException { long fileCount = fsn.getFilesTotal(); // Expect 1 file, which is the root inode Preconditions.checkState(fileCount == 1, @@ -171,7 +173,7 @@ void format(FSNamesystem fsn, String clusterId) throws IOException { ns.clusterID = clusterId; storage.format(ns); - editLog.formatNonFileJournals(ns); + editLog.formatNonFileJournals(ns, force); saveFSImageInAllDirs(fsn, 0); } @@ -1135,7 +1137,7 @@ public synchronized void saveNamespace(FSNamesystem source, NameNodeFile nnf, getStorage().updateNameDirSize(); if (exitAfterSave.get()) { - LOG.fatal("NameNode process will exit now... The saved FsImage " + + LOG.error("NameNode process will exit now... The saved FsImage " + nnf + " is potentially corrupted."); ExitUtil.terminate(-1); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java index 83f9c9386c3..ae2a037146f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java @@ -37,7 +37,7 @@ import java.util.Map; import java.util.TreeMap; -import org.apache.commons.logging.Log; +import org.slf4j.Logger; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -181,7 +181,7 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class FSImageFormat { - private static final Log LOG = FSImage.LOG; + private static final Logger LOG = FSImage.LOG; // Static-only class private FSImageFormat() {} @@ -231,7 +231,7 @@ public void load(File file, boolean requireSameLayoutVersion) loader.load(file); } } finally { - IOUtils.cleanup(LOG, is); + IOUtils.cleanupWithLogger(LOG, is); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java index 5e60038ce7a..1571d57af33 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -26,8 +26,8 @@ import java.util.Iterator; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.permission.AclEntry; @@ -104,7 +104,8 @@ XAttr.NameSpace.values(); - private static final Log LOG = LogFactory.getLog(FSImageFormatPBINode.class); + private static final Logger LOG = + LoggerFactory.getLogger(FSImageFormatPBINode.class); public final static class Loader { public static PermissionStatus loadPermission(long id, @@ -439,6 +440,8 @@ private void loadRootINode(INodeSection.INode p) { } public final static class Saver { + private long numImageErrors; + private static long buildPermissionStatus(INodeAttributes n, final SaverContext.DeduplicationMap stringMap) { long userId = stringMap.getId(n.getUserName()); @@ -563,11 +566,13 @@ private static long buildPermissionStatus(INodeAttributes n, this.summary = summary; this.context = parent.getContext(); this.fsn = context.getSourceNamesystem(); + this.numImageErrors = 0; } void serializeINodeDirectorySection(OutputStream out) throws IOException { - Iterator iter = fsn.getFSDirectory() - .getINodeMap().getMapIterator(); + FSDirectory dir = fsn.getFSDirectory(); + Iterator iter = dir.getINodeMap() + .getMapIterator(); final ArrayList refList = parent.getSaverContext() .getRefList(); int i = 0; @@ -583,6 +588,17 @@ void serializeINodeDirectorySection(OutputStream out) throws IOException { INodeDirectorySection.DirEntry.Builder b = INodeDirectorySection. DirEntry.newBuilder().setParent(n.getId()); for (INode inode : children) { + // Error if the child inode doesn't exist in inodeMap + if (dir.getInode(inode.getId()) == null) { + FSImage.LOG.error( + "FSImageFormatPBINode#serializeINodeDirectorySection: " + + "Dangling child pointer found. Missing INode in " + + "inodeMap: id=" + inode.getId() + + "; path=" + inode.getFullPathName() + + "; parent=" + (inode.getParent() == null ? "null" : + inode.getParent().getFullPathName())); + ++numImageErrors; + } if (!inode.isReference()) { b.addChildren(inode.getId()); } else { @@ -711,6 +727,15 @@ private void save(OutputStream out, INodeSymlink n) throws IOException { .setId(n.getId()) .setName(ByteString.copyFrom(n.getLocalNameBytes())); } + + /** + * Number of non-fatal errors detected while writing the + * INodeSection and INodeDirectorySection sections. + * @return the number of non-fatal errors detected. + */ + public long getNumImageErrors() { + return numImageErrors; + } } private FSImageFormatPBINode() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java index 4ac20adc426..9752733c1c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java @@ -474,13 +474,15 @@ private static void saveFileSummary(OutputStream out, FileSummary summary) out.write(lengthBytes); } - private void saveInodes(FileSummary.Builder summary) throws IOException { + private long saveInodes(FileSummary.Builder summary) throws IOException { FSImageFormatPBINode.Saver saver = new FSImageFormatPBINode.Saver(this, summary); saver.serializeINodeSection(sectionOutputStream); saver.serializeINodeDirectorySection(sectionOutputStream); saver.serializeFilesUCSection(sectionOutputStream); + + return saver.getNumImageErrors(); } /** @@ -543,8 +545,9 @@ private long saveInternal(FileOutputStream fout, step = new Step(StepType.INODES, filePath); prog.beginStep(Phase.SAVING_CHECKPOINT, step); - saveInodes(b); - long numErrors = saveSnapshots(b); + // Count number of non-fatal errors when saving inodes and snapshots. + long numErrors = saveInodes(b); + numErrors += saveSnapshots(b); prog.endStep(Phase.SAVING_CHECKPOINT, step); step = new Step(StepType.DELEGATION_TOKENS, filePath); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImagePreTransactionalStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImagePreTransactionalStorageInspector.java index 64693852a54..1d97ace7523 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImagePreTransactionalStorageInspector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImagePreTransactionalStorageInspector.java @@ -29,8 +29,8 @@ import java.util.List; import java.util.Set; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileUtil; @@ -51,8 +51,8 @@ @InterfaceAudience.Private @InterfaceStability.Unstable class FSImagePreTransactionalStorageInspector extends FSImageStorageInspector { - private static final Log LOG = - LogFactory.getLog(FSImagePreTransactionalStorageInspector.class); + private static final Logger LOG = + LoggerFactory.getLogger(FSImagePreTransactionalStorageInspector.class); /* Flag if there is at least one storage dir that doesn't contain the newest * fstime */ @@ -136,7 +136,7 @@ static long readCheckpointTime(StorageDirectory sd) throws IOException { in.close(); in = null; } finally { - IOUtils.cleanup(LOG, in); + IOUtils.cleanupWithLogger(LOG, in); } } return timeStamp; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java index a0465a5bebd..b04007513c3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java @@ -28,8 +28,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; @@ -39,7 +39,7 @@ import com.google.common.collect.Lists; class FSImageTransactionalStorageInspector extends FSImageStorageInspector { - public static final Log LOG = LogFactory.getLog( + public static final Logger LOG = LoggerFactory.getLogger( FSImageTransactionalStorageInspector.class); private boolean needToSave = false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index f94f6d072bd..1caf7c20093 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import static org.apache.commons.lang3.StringEscapeUtils.escapeJava; +import static org.apache.commons.text.StringEscapeUtils.escapeJava; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_DEFAULT; @@ -89,7 +89,9 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT; + import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY; import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; @@ -305,9 +307,9 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.VersionInfo; +import org.apache.log4j.Logger; import org.apache.log4j.Appender; import org.apache.log4j.AsyncAppender; -import org.apache.log4j.Logger; import org.eclipse.jetty.util.ajax.JSON; import com.google.common.annotations.VisibleForTesting; @@ -425,13 +427,13 @@ private void logAuditEvent(boolean succeeded, public static final Log auditLog = LogFactory.getLog( FSNamesystem.class.getName() + ".audit"); - static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100; - static int BLOCK_DELETION_INCREMENT = 1000; + private final int maxCorruptFileBlocksReturn; private final boolean isPermissionEnabled; private final UserGroupInformation fsOwner; private final String supergroup; private final boolean standbyShouldCheckpoint; private final int snapshotDiffReportLimit; + private final int blockDeletionIncrement; /** Interval between each check of lease to release. */ private final long leaseRecheckIntervalMs; @@ -831,6 +833,10 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { DFSConfigKeys.DFS_NAMENODE_FILE_CLOSE_NUM_COMMITTED_ALLOWED_KEY, DFSConfigKeys.DFS_NAMENODE_FILE_CLOSE_NUM_COMMITTED_ALLOWED_DEFAULT); + this.maxCorruptFileBlocksReturn = conf.getInt( + DFSConfigKeys.DFS_NAMENODE_MAX_CORRUPT_FILE_BLOCKS_RETURNED_KEY, + DFSConfigKeys.DFS_NAMENODE_MAX_CORRUPT_FILE_BLOCKS_RETURNED_DEFAULT); + this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf); this.standbyShouldCheckpoint = conf.getBoolean( @@ -903,6 +909,13 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { DFSConfigKeys.DFS_NAMENODE_LIST_OPENFILES_NUM_RESPONSES + " must be a positive integer." ); + + this.blockDeletionIncrement = conf.getInt( + DFSConfigKeys.DFS_NAMENODE_BLOCK_DELETION_INCREMENT_KEY, + DFSConfigKeys.DFS_NAMENODE_BLOCK_DELETION_INCREMENT_DEFAULT); + Preconditions.checkArgument(blockDeletionIncrement > 0, + DFSConfigKeys.DFS_NAMENODE_BLOCK_DELETION_INCREMENT_KEY + + " must be a positive integer."); } catch(IOException e) { LOG.error(getClass().getSimpleName() + " initialization failed.", e); close(); @@ -1072,8 +1085,8 @@ private void loadFSImage(StartupOption startOpt) throws IOException { // format before starting up if requested if (startOpt == StartupOption.FORMAT) { - - fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id + // reuse current id + fsImage.format(this, fsImage.getStorage().determineClusterId(), false); startOpt = StartupOption.REGULAR; } @@ -1283,6 +1296,9 @@ void startActiveServices() throws IOException { FSDirEncryptionZoneOp.warmUpEdekCache(edekCacheLoader, dir, edekCacheLoaderDelay, edekCacheLoaderInterval); } + if (blockManager.getSPSManager() != null) { + blockManager.getSPSManager().start(); + } } finally { startingActiveService = false; blockManager.checkSafeMode(); @@ -1312,6 +1328,9 @@ void stopActiveServices() { LOG.info("Stopping services started for active state"); writeLock(); try { + if (blockManager != null && blockManager.getSPSManager() != null) { + blockManager.getSPSManager().stop(); + } stopSecretManager(); leaseManager.stopMonitor(); if (nnrmthread != null) { @@ -2221,6 +2240,56 @@ void setStoragePolicy(String src, String policyName) throws IOException { logAuditEvent(true, operationName, src, null, auditStat); } + /** + * Satisfy the storage policy for a file or a directory. + * + * @param src file/directory path + */ + void satisfyStoragePolicy(String src, boolean logRetryCache) + throws IOException { + final String operationName = "satisfyStoragePolicy"; + FileStatus auditStat; + validateStoragePolicySatisfy(); + checkOperation(OperationCategory.WRITE); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); + checkNameNodeSafeMode("Cannot satisfy storage policy for " + src); + auditStat = FSDirSatisfyStoragePolicyOp.satisfyStoragePolicy( + dir, blockManager, src, logRetryCache); + } catch (AccessControlException e) { + logAuditEvent(false, operationName, src); + throw e; + } finally { + writeUnlock(operationName); + } + getEditLog().logSync(); + logAuditEvent(true, operationName, src, null, auditStat); + } + + private void validateStoragePolicySatisfy() + throws UnsupportedActionException, IOException { + // make sure storage policy is enabled, otherwise + // there is no need to satisfy storage policy. + if (!dir.isStoragePolicyEnabled()) { + throw new IOException(String.format( + "Failed to satisfy storage policy since %s is set to false.", + DFS_STORAGE_POLICY_ENABLED_KEY)); + } + // checks sps status + boolean disabled = (blockManager.getSPSManager() == null); + if (disabled) { + throw new UnsupportedActionException( + "Cannot request to satisfy storage policy " + + "when storage policy satisfier feature has been disabled" + + " by admin. Seek for an admin help to enable it " + + "or use Mover tool."); + } + // checks SPS Q has many outstanding requests. It will throw IOException if + // the limit exceeds. + blockManager.getSPSManager().verifyOutstandingPathQLimit(); + } + /** * unset storage policy set for a given file or a directory. * @@ -3032,7 +3101,7 @@ void removeBlocks(BlocksMapUpdateInfo blocks) { while (iter.hasNext()) { writeLock(); try { - for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) { + for (int i = 0; i < blockDeletionIncrement && iter.hasNext(); i++) { blockManager.removeBlock(iter.next()); } } finally { @@ -3517,7 +3586,7 @@ void finalizeINodeFileUnderConstruction(String src, INodeFile pendingFile, BlockInfo getStoredBlock(Block block) { return blockManager.getStoredBlock(block); } - + @Override public boolean isInSnapshot(long blockCollectionID) { assert hasReadLock(); @@ -3856,7 +3925,8 @@ HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, VolumeFailureSummary volumeFailureSummary, boolean requestFullBlockReportLease, @Nonnull SlowPeerReports slowPeers, - @Nonnull SlowDiskReports slowDisks) throws IOException { + @Nonnull SlowDiskReports slowDisks) + throws IOException { readLock(); try { //get datanode commands @@ -3870,6 +3940,7 @@ HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, if (requestFullBlockReportLease) { blockReportLeaseId = blockManager.requestBlockReportLeaseId(nodeReg); } + //create ha status final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( haContext.getState().getServiceState(), @@ -4057,7 +4128,7 @@ private void clearCorruptLazyPersistFiles() while (it.hasNext()) { Block b = it.next(); BlockInfo blockInfo = blockManager.getStoredBlock(b); - if (blockInfo == null) { + if (blockInfo == null || blockInfo.isDeleted()) { LOG.info("Cannot find block info for block " + b); } else { BlockCollection bc = getBlockCollection(blockInfo); @@ -4214,7 +4285,8 @@ ReplicatedBlockStats getReplicatedBlockStats() { return new ReplicatedBlockStats(getLowRedundancyReplicatedBlocks(), getCorruptReplicatedBlocks(), getMissingReplicatedBlocks(), getMissingReplicationOneBlocks(), getBytesInFutureReplicatedBlocks(), - getPendingDeletionReplicatedBlocks()); + getPendingDeletionReplicatedBlocks(), + getHighestPriorityLowRedundancyReplicatedBlocks()); } /** @@ -4226,7 +4298,8 @@ ReplicatedBlockStats getReplicatedBlockStats() { ECBlockGroupStats getECBlockGroupStats() { return new ECBlockGroupStats(getLowRedundancyECBlockGroups(), getCorruptECBlockGroups(), getMissingECBlockGroups(), - getBytesInFutureECBlockGroups(), getPendingDeletionECBlocks()); + getBytesInFutureECBlockGroups(), getPendingDeletionECBlocks(), + getHighestPriorityLowRedundancyECBlocks()); } @Override // FSNamesystemMBean @@ -4371,15 +4444,7 @@ int getNumberOfDatanodes(DatanodeReportType type) { try { checkOperation(OperationCategory.UNCHECKED); final DatanodeManager dm = getBlockManager().getDatanodeManager(); - final List datanodes = dm.getDatanodeListForReport(type); - - reports = new DatanodeStorageReport[datanodes.size()]; - for (int i = 0; i < reports.length; i++) { - final DatanodeDescriptor d = datanodes.get(i); - reports[i] = new DatanodeStorageReport( - new DatanodeInfoBuilder().setFrom(d).build(), - d.getStorageReports()); - } + reports = dm.getDatanodeStorageReport(type); } finally { readUnlock("getDatanodeStorageReport"); } @@ -4833,6 +4898,20 @@ public long getMissingReplicationOneBlocks() { return blockManager.getMissingReplicationOneBlocks(); } + @Override // ReplicatedBlocksMBean + @Metric({"HighestPriorityLowRedundancyReplicatedBlocks", "Number of " + + "replicated blocks which have the highest risk of loss."}) + public long getHighestPriorityLowRedundancyReplicatedBlocks() { + return blockManager.getHighestPriorityReplicatedBlockCount(); + } + + @Override // ReplicatedBlocksMBean + @Metric({"HighestPriorityLowRedundancyECBlocks", "Number of erasure coded " + + "blocks which have the highest risk of loss."}) + public long getHighestPriorityLowRedundancyECBlocks() { + return blockManager.getHighestPriorityECBlockCount(); + } + @Override // ReplicatedBlocksMBean @Metric({"BytesInFutureReplicatedBlocks", "Total bytes in replicated " + "blocks with future generation stamp"}) @@ -5508,7 +5587,7 @@ public String toString() { if (src.startsWith(path)){ corruptFiles.add(new CorruptFileBlockInfo(src, blk)); count++; - if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED) + if (count >= maxCorruptFileBlocksReturn) break; } } @@ -7466,9 +7545,10 @@ void setErasureCodingPolicy(final String srcArg, final String ecPolicyName, ErasureCodingPolicy[] policies, final boolean logRetryCache) throws IOException { final String operationName = "addErasureCodingPolicies"; - String addECPolicyName = ""; + List addECPolicyNames = new ArrayList<>(policies.length); checkOperation(OperationCategory.WRITE); - List responses = new ArrayList<>(); + List responses = + new ArrayList<>(policies.length); boolean success = false; writeLock(); try { @@ -7479,7 +7559,7 @@ void setErasureCodingPolicy(final String srcArg, final String ecPolicyName, ErasureCodingPolicy newPolicy = FSDirErasureCodingOp.addErasureCodingPolicy(this, policy, logRetryCache); - addECPolicyName = newPolicy.getName(); + addECPolicyNames.add(newPolicy.getName()); responses.add(new AddErasureCodingPolicyResponse(newPolicy)); } catch (HadoopIllegalArgumentException e) { responses.add(new AddErasureCodingPolicyResponse(policy, e)); @@ -7492,7 +7572,8 @@ void setErasureCodingPolicy(final String srcArg, final String ecPolicyName, if (success) { getEditLog().logSync(); } - logAuditEvent(success, operationName, addECPolicyName, null, null); + logAuditEvent(success, operationName, addECPolicyNames.toString(), + null, null); } } @@ -7530,29 +7611,31 @@ void removeErasureCodingPolicy(String ecPolicyName, * @param ecPolicyName the name of the policy to be enabled * @param logRetryCache whether to record RPC ids in editlog for retry cache * rebuilding + * @return * @throws IOException */ - void enableErasureCodingPolicy(String ecPolicyName, + boolean enableErasureCodingPolicy(String ecPolicyName, final boolean logRetryCache) throws IOException { final String operationName = "enableErasureCodingPolicy"; checkOperation(OperationCategory.WRITE); boolean success = false; - LOG.info("Enable the erasure coding policy " + ecPolicyName); writeLock(); try { checkOperation(OperationCategory.WRITE); checkNameNodeSafeMode("Cannot enable erasure coding policy " + ecPolicyName); - FSDirErasureCodingOp.enableErasureCodingPolicy(this, ecPolicyName, - logRetryCache); - success = true; + success = FSDirErasureCodingOp.enableErasureCodingPolicy(this, + ecPolicyName, logRetryCache); + } catch (AccessControlException ace) { + logAuditEvent(false, operationName, ecPolicyName, null, null); } finally { writeUnlock(operationName); if (success) { getEditLog().logSync(); + logAuditEvent(success, operationName, ecPolicyName, null, null); } - logAuditEvent(success, operationName, ecPolicyName, null, null); } + return success; } /** @@ -7562,7 +7645,7 @@ void enableErasureCodingPolicy(String ecPolicyName, * rebuilding * @throws IOException */ - void disableErasureCodingPolicy(String ecPolicyName, + boolean disableErasureCodingPolicy(String ecPolicyName, final boolean logRetryCache) throws IOException { final String operationName = "disableErasureCodingPolicy"; checkOperation(OperationCategory.WRITE); @@ -7573,16 +7656,18 @@ void disableErasureCodingPolicy(String ecPolicyName, checkOperation(OperationCategory.WRITE); checkNameNodeSafeMode("Cannot disable erasure coding policy " + ecPolicyName); - FSDirErasureCodingOp.disableErasureCodingPolicy(this, ecPolicyName, - logRetryCache); - success = true; + success = FSDirErasureCodingOp.disableErasureCodingPolicy(this, + ecPolicyName, logRetryCache); + } catch (AccessControlException ace) { + logAuditEvent(false, operationName, ecPolicyName, null, null); } finally { writeUnlock(operationName); if (success) { getEditLog().logSync(); + logAuditEvent(success, operationName, ecPolicyName, null, null); } - logAuditEvent(success, operationName, ecPolicyName, null, null); } + return success; } /** @@ -7761,6 +7846,29 @@ void removeXAttr(String src, XAttr xAttr, boolean logRetryCache) logAuditEvent(true, operationName, src, null, auditStat); } + @Override + public void removeXattr(long id, String xattrName) throws IOException { + writeLock(); + try { + final INode inode = dir.getInode(id); + if (inode == null) { + return; + } + final XAttrFeature xaf = inode.getXAttrFeature(); + if (xaf == null) { + return; + } + final XAttr spsXAttr = xaf.getXAttr(xattrName); + + if (spsXAttr != null) { + FSDirSatisfyStoragePolicyOp.removeSPSXattr(dir, inode, spsXAttr); + } + } finally { + writeUnlock("removeXAttr"); + } + getEditLog().logSync(); + } + void checkAccess(String src, FsAction mode) throws IOException { final String operationName = "checkAccess"; checkOperation(OperationCategory.READ); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java index 354b4e364aa..146869d5b9e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java @@ -24,8 +24,8 @@ import java.util.Stack; import com.google.common.base.Preconditions; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FSExceptionMessages; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.permission.AclEntryScope; @@ -47,7 +47,7 @@ * Some of the helper methods are guarded by {@link FSNamesystem#readLock()}. */ public class FSPermissionChecker implements AccessControlEnforcer { - static final Log LOG = LogFactory.getLog(UserGroupInformation.class); + static final Logger LOG = LoggerFactory.getLogger(UserGroupInformation.class); private static String getPath(byte[][] components, int start, int end) { return DFSUtil.byteArray2PathString(components, start, end - start + 1); @@ -409,7 +409,7 @@ private boolean hasPermission(INodeAttributes inode, FsAction access) { } final FsPermission mode = inode.getFsPermission(); final AclFeature aclFeature = inode.getAclFeature(); - if (aclFeature != null) { + if (aclFeature != null && aclFeature.getEntriesSize() > 0) { // It's possible that the inode has a default ACL but no access ACL. int firstEntry = aclFeature.getEntryAt(0); if (AclEntryStatusFormat.getScope(firstEntry) == AclEntryScope.ACCESS) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java index ff77029a6a8..2acbda4005b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java @@ -289,7 +289,7 @@ protected abstract boolean processFileInode(INode inode, /** * Check whether current batch can be submitted for the processing. * - * @return true if batch size meets meet the condition, otherwise false. + * @return true if batch size meets the condition, otherwise false. */ protected abstract boolean shouldSubmitCurrentBatch(); @@ -310,7 +310,7 @@ protected abstract boolean processFileInode(INode inode, * @throws IOException * @throws InterruptedException */ - protected abstract void submitCurrentBatch(long startId) + protected abstract void submitCurrentBatch(Long startId) throws IOException, InterruptedException; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java index c71c09ad4d8..d08a64497fc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; @@ -60,7 +60,8 @@ */ @InterfaceAudience.Private public class FileJournalManager implements JournalManager { - private static final Log LOG = LogFactory.getLog(FileJournalManager.class); + private static final Logger LOG = + LoggerFactory.getLogger(FileJournalManager.class); private final Configuration conf; private final StorageDirectory sd; @@ -100,7 +101,7 @@ public FileJournalManager(Configuration conf, StorageDirectory sd, public void close() throws IOException {} @Override - public void format(NamespaceInfo ns) throws IOException { + public void format(NamespaceInfo ns, boolean force) throws IOException { // Formatting file journals is done by the StorageDirectory // format code, since they may share their directory with // checkpoints, etc. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java index 207d97726b1..2123f4ea993 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java @@ -25,8 +25,8 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.Path; @@ -56,7 +56,7 @@ */ @InterfaceAudience.Private public abstract class INode implements INodeAttributes, Diff.Element { - public static final Log LOG = LogFactory.getLog(INode.class); + public static final Logger LOG = LoggerFactory.getLogger(INode.class); /** parent is either an {@link INodeDirectory} or an {@link INodeReference}.*/ private INode parent = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java index 9adcc3eb63c..84d99e483ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java @@ -35,8 +35,8 @@ implements LinkedElement { enum PermissionStatusFormat { MODE(null, 16), - GROUP(MODE.BITS, 25), - USER(GROUP.BITS, 23); + GROUP(MODE.BITS, 24), + USER(GROUP.BITS, 24); final LongBitFormat BITS; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java index 50ead610c60..04fb50e3513 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java @@ -19,8 +19,8 @@ import java.util.Arrays; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; @@ -36,7 +36,7 @@ * Contains INodes information resolved from a given path. */ public class INodesInPath { - public static final Log LOG = LogFactory.getLog(INodesInPath.class); + public static final Logger LOG = LoggerFactory.getLogger(INodesInPath.class); /** * @return true if path component is {@link HdfsConstants#DOT_SNAPSHOT_DIR} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java index 7a26df9ef03..9028b362e04 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java @@ -35,8 +35,8 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.security.SecurityUtil; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSUtil; @@ -72,7 +72,7 @@ private static final long serialVersionUID = -7669068179452648952L; - private static final Log LOG = LogFactory.getLog(ImageServlet.class); + private static final Logger LOG = LoggerFactory.getLogger(ImageServlet.class); public final static String CONTENT_DISPOSITION = "Content-Disposition"; public final static String HADOOP_IMAGE_EDITS_HEADER = "X-Image-Edits-Name"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/IsNameNodeActiveServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/IsNameNodeActiveServlet.java new file mode 100644 index 00000000000..1bd3f7f38fa --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/IsNameNodeActiveServlet.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import org.apache.hadoop.http.IsActiveServlet; + +/** + * Used by Load Balancers to find the active NameNode. + */ +public class IsNameNodeActiveServlet extends IsActiveServlet { + + @Override + protected boolean isActive() { + NameNode namenode = NameNodeHttpServer.getNameNodeFromContext( + getServletContext()); + return namenode.isActiveState(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java index ae1bc3b7db7..d6d20945bc1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java @@ -43,7 +43,7 @@ * Format the underlying storage, removing any previously * stored data. */ - void format(NamespaceInfo ns) throws IOException; + void format(NamespaceInfo ns, boolean force) throws IOException; /** * Begin writing to a new segment of the log stream, which starts at diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java index e7f2adb5bd6..7be7073c5f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java @@ -30,8 +30,8 @@ import java.util.SortedSet; import java.util.concurrent.CopyOnWriteArrayList; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.StorageInfo; @@ -54,7 +54,7 @@ @InterfaceAudience.Private public class JournalSet implements JournalManager { - static final Log LOG = LogFactory.getLog(FSEditLog.class); + static final Logger LOG = LoggerFactory.getLogger(FSEditLog.class); // we want local logs to be ordered earlier in the collection, and true // is considered larger than false, so reverse the comparator @@ -188,7 +188,7 @@ public boolean isShared() { } @Override - public void format(NamespaceInfo nsInfo) throws IOException { + public void format(NamespaceInfo nsInfo, boolean force) throws IOException { // The operation is done by FSEditLog itself throw new UnsupportedOperationException(); } @@ -387,7 +387,7 @@ private void mapJournalsAndReportErrors( if (jas.isRequired()) { final String msg = "Error: " + status + " failed for required journal (" + jas + ")"; - LOG.fatal(msg, t); + LOG.error(msg, t); // If we fail on *any* of the required journals, then we must not // continue on any of the other journals. Abort them to ensure that // retry behavior doesn't allow them to keep going in any way. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/MetaRecoveryContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/MetaRecoveryContext.java index 8831b49adb0..dad5779b68e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/MetaRecoveryContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/MetaRecoveryContext.java @@ -22,14 +22,15 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** Context data for an ongoing NameNode metadata recovery process. */ @InterfaceAudience.Private @InterfaceStability.Evolving public final class MetaRecoveryContext { - public static final Log LOG = LogFactory.getLog(MetaRecoveryContext.class.getName()); + public static final Logger LOG = + LoggerFactory.getLogger(MetaRecoveryContext.class.getName()); public final static int FORCE_NONE = 0; public final static int FORCE_FIRST_CHOICE = 1; public final static int FORCE_ALL = 2; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java index 2a83541d38a..fc54dfcfd31 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java @@ -28,8 +28,8 @@ import java.util.List; import java.util.TreeSet; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile; @@ -56,7 +56,7 @@ private final int numCheckpointsToRetain; private final long numExtraEditsToRetain; private final int maxExtraEditsSegmentsToRetain; - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( NNStorageRetentionManager.class); private final NNStorage storage; private final StoragePurger purger; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java index b3fff749faa..9cca97ab74b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java @@ -27,8 +27,8 @@ import java.nio.file.attribute.BasicFileAttributes; import java.util.Collections; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; @@ -38,7 +38,8 @@ public abstract class NNUpgradeUtil { - private static final Log LOG = LogFactory.getLog(NNUpgradeUtil.class); + private static final Logger LOG = + LoggerFactory.getLogger(NNUpgradeUtil.class); /** * Return true if this storage dir can roll back to the previous storage diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameCache.java index cf4f9a739da..4b8b797551f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameCache.java @@ -20,8 +20,8 @@ import java.util.HashMap; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Caches frequently used names to facilitate reuse. @@ -62,7 +62,7 @@ int get() { } } - static final Log LOG = LogFactory.getLog(NameCache.class.getName()); + static final Logger LOG = LoggerFactory.getLogger(NameCache.class.getName()); /** indicates initialization is in progress */ private boolean initialized = false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 8ad576757fd..a8034da85e4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; import org.apache.hadoop.hdfs.server.aliasmap.InMemoryAliasMap; import org.apache.hadoop.hdfs.server.aliasmap.InMemoryLevelDBAliasMapServer; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; @@ -160,6 +161,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.FS_PROTECTED_DIRECTORIES; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY; import static org.apache.hadoop.util.ExitUtil.terminate; import static org.apache.hadoop.util.ToolRunner.confirmPrompt; import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_BACKOFF_ENABLE; @@ -293,7 +295,8 @@ DFS_HEARTBEAT_INTERVAL_KEY, DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, FS_PROTECTED_DIRECTORIES, - HADOOP_CALLER_CONTEXT_ENABLED_KEY)); + HADOOP_CALLER_CONTEXT_ENABLED_KEY, + DFS_STORAGE_POLICY_SATISFIER_MODE_KEY)); private static final String USAGE = "Usage: hdfs namenode [" + StartupOption.BACKUP.getName() + "] | \n\t[" @@ -1156,7 +1159,7 @@ private static boolean format(Configuration conf, boolean force, return true; // aborted } - fsImage.format(fsn, clusterId); + fsImage.format(fsn, clusterId, force); } catch (IOException ioe) { LOG.warn("Encountered exception during format: ", ioe); fsImage.close(); @@ -1259,7 +1262,7 @@ private static boolean initializeSharedEdits(Configuration conf, // actually want to save a checkpoint - just prime the dirs with // the existing namespace info newSharedStorage.format(nsInfo); - sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo); + sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo, force); // Need to make sure the edit log segments are in good shape to initialize // the shared edits dir. @@ -1757,7 +1760,6 @@ synchronized void transitionToStandby() synchronized HAServiceStatus getServiceStatus() throws ServiceFailedException, AccessControlException { - namesystem.checkSuperuserPrivilege(); if (!haEnabled) { throw new ServiceFailedException("HA for namenode is not enabled"); } @@ -2040,6 +2042,8 @@ protected String reconfigurePropertyImpl(String property, String newVal) return reconfCallerContextEnabled(newVal); } else if (property.equals(ipcClientRPCBackoffEnable)) { return reconfigureIPCBackoffEnabled(newVal); + } else if (property.equals(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY)) { + return reconfigureSPSModeEvent(newVal, property); } else { throw new ReconfigurationException(property, newVal, getConf().get( property)); @@ -2123,6 +2127,47 @@ String reconfigureIPCBackoffEnabled(String newVal) { return Boolean.toString(clientBackoffEnabled); } + String reconfigureSPSModeEvent(String newVal, String property) + throws ReconfigurationException { + if (newVal == null + || StoragePolicySatisfierMode.fromString(newVal) == null) { + throw new ReconfigurationException(property, newVal, + getConf().get(property), + new HadoopIllegalArgumentException( + "For enabling or disabling storage policy satisfier, must " + + "pass either internal/external/none string value only")); + } + + if (!isActiveState()) { + throw new ReconfigurationException(property, newVal, + getConf().get(property), + new HadoopIllegalArgumentException( + "Enabling or disabling storage policy satisfier service on " + + state + " NameNode is not allowed")); + } + StoragePolicySatisfierMode mode = StoragePolicySatisfierMode + .fromString(newVal); + if (mode == StoragePolicySatisfierMode.NONE) { + // disabling sps service + if (namesystem.getBlockManager().getSPSManager() != null) { + namesystem.getBlockManager().getSPSManager().changeModeEvent(mode); + namesystem.getBlockManager().disableSPS(); + } + } else { + // enabling sps service + boolean spsCreated = (namesystem.getBlockManager() + .getSPSManager() != null); + if (!spsCreated) { + spsCreated = namesystem.getBlockManager().createSPSManager(getConf(), + newVal); + } + if (spsCreated) { + namesystem.getBlockManager().getSPSManager().changeModeEvent(mode); + } + } + return newVal; + } + @Override // ReconfigurableBase protected Configuration getNewConf() { return new HdfsConfiguration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java index 861afae5c74..ae9c7feca70 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java @@ -294,6 +294,9 @@ private static void setupServlets(HttpServer2 httpServer, Configuration conf) { true); httpServer.addInternalServlet("imagetransfer", ImageServlet.PATH_SPEC, ImageServlet.class, true); + httpServer.addInternalServlet(IsNameNodeActiveServlet.SERVLET_NAME, + IsNameNodeActiveServlet.PATH_SPEC, + IsNameNodeActiveServlet.class); } static FSImage getFsImageFromContext(ServletContext context) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java index e4ed3a90a4f..5c7bbbb4515 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java @@ -162,6 +162,24 @@ */ public long getNumberOfMissingBlocksWithReplicationFactorOne(); + /** + * Gets the total number of replicated low redundancy blocks on the cluster + * with the highest risk of loss. + * + * @return the total number of low redundancy blocks on the cluster + * with the highest risk of loss. + */ + public long getHighestPriorityLowRedundancyReplicatedBlocks(); + + /** + * Gets the total number of erasure coded low redundancy blocks on the cluster + * with the highest risk of loss + * + * @return the total number of low redundancy blocks on the cluster + * with the highest risk of loss + */ + public long getHighestPriorityLowRedundancyECBlocks(); + /** * Gets the total number of snapshottable dirs in the system. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java index 7b1dbc6f2a4..898f57e4b07 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java @@ -25,8 +25,8 @@ import java.util.HashMap; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.DF; @@ -47,7 +47,8 @@ */ @InterfaceAudience.Private public class NameNodeResourceChecker { - private static final Log LOG = LogFactory.getLog(NameNodeResourceChecker.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(NameNodeResourceChecker.class.getName()); // Space (in bytes) reserved per volume. private final long duReserved; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index c5b9d5a053f..ec5ce9d174f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -28,7 +28,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_HANDLER_COUNT_KEY; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.MAX_PATH_DEPTH; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.MAX_PATH_LENGTH; - import static org.apache.hadoop.util.Time.now; import java.io.FileNotFoundException; @@ -111,6 +110,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.ReencryptAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; @@ -155,6 +155,7 @@ import org.apache.hadoop.hdfs.server.common.IncorrectVersionException; import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfyManager; import org.apache.hadoop.hdfs.server.protocol.BlockReportContext; import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations; import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; @@ -1404,6 +1405,23 @@ public QuotaUsage getQuotaUsage(String path) throws IOException { return namesystem.getQuotaUsage(path); } + @Override // ClientProtocol + public void satisfyStoragePolicy(String src) throws IOException { + checkNNStartup(); + namesystem.checkOperation(OperationCategory.WRITE); + CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); + if (cacheEntry != null && cacheEntry.isSuccess()) { + return; // Return previous response + } + boolean success = false; + try { + namesystem.satisfyStoragePolicy(src, cacheEntry != null); + success = true; + } finally { + RetryCache.setState(cacheEntry, success); + } + } + @Override // ClientProtocol public void setQuota(String path, long namespaceQuota, long storagespaceQuota, StorageType type) @@ -1498,7 +1516,8 @@ public HeartbeatResponse sendHeartbeat(DatanodeRegistration nodeReg, int failedVolumes, VolumeFailureSummary volumeFailureSummary, boolean requestFullBlockReportLease, @Nonnull SlowPeerReports slowPeers, - @Nonnull SlowDiskReports slowDisks) throws IOException { + @Nonnull SlowDiskReports slowDisks) + throws IOException { checkNNStartup(); verifyRequest(nodeReg); return namesystem.handleHeartbeat(nodeReg, report, @@ -2458,8 +2477,8 @@ public void enableErasureCodingPolicy(String ecPolicyName) } boolean success = false; try { - namesystem.enableErasureCodingPolicy(ecPolicyName, cacheEntry != null); - success = true; + success = namesystem.enableErasureCodingPolicy(ecPolicyName, + cacheEntry != null); } finally { RetryCache.setState(cacheEntry, success); } @@ -2476,8 +2495,8 @@ public void disableErasureCodingPolicy(String ecPolicyName) } boolean success = false; try { - namesystem.disableErasureCodingPolicy(ecPolicyName, cacheEntry != null); - success = true; + success = namesystem.disableErasureCodingPolicy(ecPolicyName, + cacheEntry != null); } finally { RetryCache.setState(cacheEntry, success); } @@ -2512,4 +2531,28 @@ public ReconfigurationTaskStatus getReconfigurationStatus() namesystem.logAuditEvent(true, operationName, null); return result; } + + @Override + public Long getNextSPSPath() throws IOException { + checkNNStartup(); + String operationName = "getNextSPSPath"; + namesystem.checkSuperuserPrivilege(operationName); + if (nn.isStandbyState()) { + throw new StandbyException("Not supported by Standby Namenode."); + } + // Check that SPS is enabled externally + StoragePolicySatisfyManager spsMgr = + namesystem.getBlockManager().getSPSManager(); + StoragePolicySatisfierMode spsMode = (spsMgr != null ? spsMgr.getMode() + : StoragePolicySatisfierMode.NONE); + if (spsMode != StoragePolicySatisfierMode.EXTERNAL) { + if (LOG.isDebugEnabled()) { + LOG.debug("SPS service mode is {}, so external SPS service is " + + "not allowed to fetch the path Ids", spsMode); + } + throw new IOException("SPS service mode is " + spsMode + ", so " + + "external SPS service is not allowed to fetch the path Ids"); + } + return namesystem.getBlockManager().getSPSManager().getNextPathId(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 5e7bab5dfa8..5d664cbb339 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -36,8 +36,8 @@ import java.util.concurrent.ThreadLocalRandom; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -112,7 +112,8 @@ */ @InterfaceAudience.Private public class NamenodeFsck implements DataEncryptionKeyFactory { - public static final Log LOG = LogFactory.getLog(NameNode.class.getName()); + public static final Logger LOG = + LoggerFactory.getLogger(NameNode.class.getName()); // return string marking fsck status public static final String CORRUPT_STATUS = "is CORRUPT"; @@ -264,12 +265,13 @@ public void blockIdCK(String blockId) { return; } + namenode.getNamesystem().readLock(); try { //get blockInfo Block block = new Block(Block.getBlockId(blockId)); //find which file this block belongs to BlockInfo blockInfo = blockManager.getStoredBlock(block); - if(blockInfo == null) { + if (blockInfo == null || blockInfo.isDeleted()) { out.println("Block "+ blockId +" " + NONEXISTENT_STATUS); LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS); return; @@ -329,6 +331,8 @@ public void blockIdCK(String blockId) { out.println(e.getMessage()); out.print("\n\n" + errMsg); LOG.warn("Error in looking up block", e); + } finally { + namenode.getNamesystem().readUnlock("fsck"); } } @@ -355,7 +359,7 @@ public void fsck() { blockIdCK(blk); sb.append(blk + "\n"); } - LOG.info(sb); + LOG.info("{}", sb.toString()); namenode.getNamesystem().logFsckEvent("/", remoteAddress); out.flush(); return; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java index e07376bc9ef..2a525870064 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import java.io.IOException; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; @@ -45,4 +47,12 @@ * middle of the starting active services. */ boolean inTransitionToActive(); + + /** + * Remove xAttr from the inode. + * @param id + * @param xattrName + * @throws IOException + */ + void removeXattr(long id, String xattrName) throws IOException; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java index a73206b31ef..6c42c82dc7d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java @@ -21,8 +21,8 @@ import java.util.Arrays; import java.util.Collection; import java.util.Comparator; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.io.IOUtils; @@ -37,7 +37,7 @@ * different subset of the available edits. */ class RedundantEditLogInputStream extends EditLogInputStream { - public static final Log LOG = LogFactory.getLog( + public static final Logger LOG = LoggerFactory.getLogger( RedundantEditLogInputStream.class.getName()); private int curIdx; private long prevTxId; @@ -152,7 +152,7 @@ public long getLastTxId() { @Override public void close() throws IOException { - IOUtils.cleanup(LOG, streams); + IOUtils.cleanupWithLogger(LOG, streams); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java index 5b52c825700..a8acccdd964 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java @@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.util.concurrent.ThreadFactoryBuilder; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; @@ -616,7 +617,9 @@ protected void checkPauseForTesting() while (shouldPauseForTesting) { LOG.info("Sleeping in the re-encrypt handler for unit test."); synchronized (reencryptionHandler) { - reencryptionHandler.wait(30000); + if (shouldPauseForTesting) { + reencryptionHandler.wait(30000); + } } LOG.info("Continuing re-encrypt handler after pausing."); } @@ -699,7 +702,7 @@ protected void checkINodeReady(long zoneId) throws IOException { * @throws InterruptedException */ @Override - protected void submitCurrentBatch(final long zoneId) throws IOException, + protected void submitCurrentBatch(final Long zoneId) throws IOException, InterruptedException { if (currentBatch.isEmpty()) { return; @@ -711,10 +714,10 @@ protected void submitCurrentBatch(final long zoneId) throws IOException, zst = new ZoneSubmissionTracker(); submissions.put(zoneId, zst); } + Future future = batchService.submit(new EDEKReencryptCallable(zoneId, + currentBatch, reencryptionHandler)); + zst.addTask(future); } - Future future = batchService.submit(new EDEKReencryptCallable(zoneId, - currentBatch, reencryptionHandler)); - zst.addTask(future); LOG.info("Submitted batch (start:{}, size:{}) of zone {} to re-encrypt.", currentBatch.getFirstFilePath(), currentBatch.size(), zoneId); currentBatch = new ReencryptionBatch(reencryptBatchSize); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java index a5923a7836c..15cfa9278f6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java @@ -383,32 +383,34 @@ private void processTaskEntries(final String zoneNodePath, final LinkedList tasks = tracker.getTasks(); final List xAttrs = Lists.newArrayListWithCapacity(1); ListIterator iter = tasks.listIterator(); - while (iter.hasNext()) { - Future curr = iter.next(); - if (curr.isCancelled()) { - break; - } - if (!curr.isDone() || !curr.get().processed) { - // still has earlier tasks not completed, skip here. - break; - } - ReencryptionTask task = curr.get(); - LOG.debug("Updating re-encryption checkpoint with completed task." - + " last: {} size:{}.", task.lastFile, task.batch.size()); - assert zoneId == task.zoneId; - try { - final XAttr xattr = FSDirEncryptionZoneOp - .updateReencryptionProgress(dir, zoneNode, status, task.lastFile, - task.numFilesUpdated, task.numFailures); - xAttrs.clear(); - xAttrs.add(xattr); - } catch (IOException ie) { - LOG.warn("Failed to update re-encrypted progress to xattr for zone {}", - zonePath, ie); - ++task.numFailures; + synchronized (handler) { + while (iter.hasNext()) { + Future curr = iter.next(); + if (curr.isCancelled()) { + break; + } + if (!curr.isDone() || !curr.get().processed) { + // still has earlier tasks not completed, skip here. + break; + } + ReencryptionTask task = curr.get(); + LOG.debug("Updating re-encryption checkpoint with completed task." + + " last: {} size:{}.", task.lastFile, task.batch.size()); + assert zoneId == task.zoneId; + try { + final XAttr xattr = FSDirEncryptionZoneOp + .updateReencryptionProgress(dir, zoneNode, status, task.lastFile, + task.numFilesUpdated, task.numFailures); + xAttrs.clear(); + xAttrs.add(xattr); + } catch (IOException ie) { + LOG.warn("Failed to update re-encrypted progress to xattr" + + " for zone {}", zonePath, ie); + ++task.numFailures; + } + ++tracker.numCheckpointed; + iter.remove(); } - ++tracker.numCheckpointed; - iter.remove(); } if (tracker.isCompleted()) { LOG.debug("Removed re-encryption tracker for zone {} because it completed" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java index ff83e3493c6..1423b308012 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java @@ -38,8 +38,8 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -103,8 +103,8 @@ static{ HdfsConfiguration.init(); } - public static final Log LOG = - LogFactory.getLog(SecondaryNameNode.class.getName()); + public static final Logger LOG = + LoggerFactory.getLogger(SecondaryNameNode.class.getName()); private final long starttime = Time.now(); private volatile long lastCheckpointTime = 0; @@ -367,12 +367,12 @@ public void doWork() { // Prevent a huge number of edits from being created due to // unrecoverable conditions and endless retries. if (checkpointImage.getMergeErrorCount() > maxRetries) { - LOG.fatal("Merging failed " + + LOG.error("Merging failed " + checkpointImage.getMergeErrorCount() + " times."); terminate(1); } } catch (Throwable e) { - LOG.fatal("Throwable Exception in doCheckpoint", e); + LOG.error("Throwable Exception in doCheckpoint", e); e.printStackTrace(); terminate(1, e); } @@ -676,7 +676,7 @@ boolean shouldCheckpointBasedOnCount() throws IOException { public static void main(String[] argv) throws Exception { CommandLineOpts opts = SecondaryNameNode.parseArgs(argv); if (opts == null) { - LOG.fatal("Failed to parse options"); + LOG.error("Failed to parse options"); terminate(1); } else if (opts.shouldPrintHelp()) { opts.usage(); @@ -703,7 +703,7 @@ public static void main(String[] argv) throws Exception { secondary.join(); } } catch (Throwable e) { - LOG.fatal("Failed to start secondary namenode", e); + LOG.error("Failed to start secondary namenode", e); terminate(1); } } @@ -722,6 +722,11 @@ public String getHostAndPort() { return NetUtils.getHostPortString(nameNodeAddr); } + @Override + public boolean isSecurityEnabled() { + return UserGroupInformation.isSecurityEnabled(); + } + @Override // SecondaryNameNodeInfoMXBean public long getStartTime() { return starttime; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNodeInfoMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNodeInfoMXBean.java index 785c5ee3103..a042dc24f69 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNodeInfoMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNodeInfoMXBean.java @@ -31,6 +31,13 @@ */ public String getHostAndPort(); + /** + * Gets if security is enabled. + * + * @return true, if security is enabled. + */ + boolean isSecurityEnabled(); + /** * @return the timestamp of when the SNN starts */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java index 9fb0c624c64..449a1aa62ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java @@ -103,7 +103,7 @@ protected void doGet(HttpServletRequest req, HttpServletResponse resp) json.writeEndArray(); json.writeEndObject(); } finally { - IOUtils.cleanup(LOG, json); + IOUtils.cleanupWithLogger(LOG, json); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java index 771a43ef6f9..14ce00098f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java @@ -32,8 +32,8 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; @@ -95,7 +95,8 @@ public static TransferResult getResultForCode(int code){ @VisibleForTesting static int timeout = 0; - private static final Log LOG = LogFactory.getLog(TransferFsImage.class); + private static final Logger LOG = + LoggerFactory.getLogger(TransferFsImage.class); public static void downloadMostRecentImageToDirectory(URL infoServer, File dir) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFormat.java index 7e704d0226c..f9f06db8faf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrFormat.java @@ -27,25 +27,56 @@ import com.google.common.base.Preconditions; import com.google.common.primitives.Ints; +import org.apache.hadoop.hdfs.util.LongBitFormat; /** * Class to pack XAttrs into byte[].
* For each XAttr:
* The first 4 bytes represents XAttr namespace and name
* [0:3) - XAttr namespace
- * [3:32) - The name of the entry, which is an ID that points to a + * [3:8) - Reserved
+ * [8:32) - The name of the entry, which is an ID that points to a * string in map
* The following two bytes represents the length of XAttr value
* The remaining bytes is the XAttr value
*/ class XAttrFormat { - private static final int XATTR_NAMESPACE_MASK = (1 << 3) - 1; - private static final int XATTR_NAMESPACE_OFFSET = 29; - private static final int XATTR_NAME_MASK = (1 << 29) - 1; - private static final int XATTR_NAME_ID_MAX = 1 << 29; + private enum XAttrStatusFormat { + + NAMESPACE(null, 3), + RESERVED(NAMESPACE.BITS, 5), + NAME(RESERVED.BITS, 24); + + private final LongBitFormat BITS; + + XAttrStatusFormat(LongBitFormat previous, int length) { + BITS = new LongBitFormat(name(), previous, length, 0); + } + + static XAttr.NameSpace getNamespace(int xattrStatus) { + int ordinal = (int) NAMESPACE.BITS.retrieve(xattrStatus); + return XAttr.NameSpace.values()[ordinal]; + } + + static String getName(int xattrStatus) { + int id = (int) NAME.BITS.retrieve(xattrStatus); + return XAttrStorage.getName(id); + } + + static int toInt(XAttr.NameSpace namespace, String name) { + long xattrStatusInt = 0; + + xattrStatusInt = NAMESPACE.BITS + .combine(namespace.ordinal(), xattrStatusInt); + int nid = XAttrStorage.getNameSerialNumber(name); + xattrStatusInt = NAME.BITS + .combine(nid, xattrStatusInt); + + return (int) xattrStatusInt; + } + } + private static final int XATTR_VALUE_LEN_MAX = 1 << 16; - private static final XAttr.NameSpace[] XATTR_NAMESPACE_VALUES = - XAttr.NameSpace.values(); /** * Unpack byte[] to XAttrs. @@ -64,10 +95,8 @@ int v = Ints.fromBytes(attrs[i], attrs[i + 1], attrs[i + 2], attrs[i + 3]); i += 4; - int ns = (v >> XATTR_NAMESPACE_OFFSET) & XATTR_NAMESPACE_MASK; - int nid = v & XATTR_NAME_MASK; - builder.setNameSpace(XATTR_NAMESPACE_VALUES[ns]); - builder.setName(XAttrStorage.getName(nid)); + builder.setNameSpace(XAttrStatusFormat.getNamespace(v)); + builder.setName(XAttrStatusFormat.getName(v)); int vlen = ((0xff & attrs[i]) << 8) | (0xff & attrs[i + 1]); i += 2; if (vlen > 0) { @@ -100,10 +129,8 @@ static XAttr getXAttr(byte[] attrs, String prefixedName) { int v = Ints.fromBytes(attrs[i], attrs[i + 1], attrs[i + 2], attrs[i + 3]); i += 4; - int ns = (v >> XATTR_NAMESPACE_OFFSET) & XATTR_NAMESPACE_MASK; - int nid = v & XATTR_NAME_MASK; - XAttr.NameSpace namespace = XATTR_NAMESPACE_VALUES[ns]; - String name = XAttrStorage.getName(nid); + XAttr.NameSpace namespace = XAttrStatusFormat.getNamespace(v); + String name = XAttrStatusFormat.getName(v); int vlen = ((0xff & attrs[i]) << 8) | (0xff & attrs[i + 1]); i += 2; if (xAttr.getNameSpace() == namespace && @@ -134,15 +161,7 @@ static XAttr getXAttr(byte[] attrs, String prefixedName) { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { for (XAttr a : xAttrs) { - int nsOrd = a.getNameSpace().ordinal(); - Preconditions.checkArgument(nsOrd < 8, "Too many namespaces."); - int nid = XAttrStorage.getNameSerialNumber(a.getName()); - Preconditions.checkArgument(nid < XATTR_NAME_ID_MAX, - "Too large serial number of the xattr name"); - - // big-endian - int v = ((nsOrd & XATTR_NAMESPACE_MASK) << XATTR_NAMESPACE_OFFSET) - | (nid & XATTR_NAME_MASK); + int v = XAttrStatusFormat.toInt(a.getNameSpace(), a.getName()); out.write(Ints.toByteArray(v)); int vlen = a.getValue() == null ? 0 : a.getValue().length; Preconditions.checkArgument(vlen < XATTR_VALUE_LEN_MAX, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java index 4d6716f04fd..aed90a5300b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java @@ -31,8 +31,8 @@ import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configurable; @@ -76,7 +76,8 @@ */ @InterfaceAudience.Private public class BootstrapStandby implements Tool, Configurable { - private static final Log LOG = LogFactory.getLog(BootstrapStandby.class); + private static final Logger LOG = + LoggerFactory.getLogger(BootstrapStandby.class); private String nsId; private String nnId; private List remoteNNs; @@ -182,14 +183,14 @@ private int doRun() throws IOException { } if (nsInfo == null) { - LOG.fatal( + LOG.error( "Unable to fetch namespace information from any remote NN. Possible NameNodes: " + remoteNNs); return ERR_CODE_FAILED_CONNECT; } if (!checkLayoutVersion(nsInfo)) { - LOG.fatal("Layout version on remote node (" + nsInfo.getLayoutVersion() + LOG.error("Layout version on remote node (" + nsInfo.getLayoutVersion() + ") does not match " + "this node's layout version (" + HdfsServerConstants.NAMENODE_LAYOUT_VERSION + ")"); return ERR_CODE_INVALID_VERSION; @@ -382,7 +383,7 @@ private boolean checkLogsAvailableForRead(FSImage image, long imageTxId, "Please copy these logs into the shared edits storage " + "or call saveNamespace on the active node.\n" + "Error: " + e.getLocalizedMessage(); - LOG.fatal(msg, e); + LOG.error(msg, e); return false; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java index 2003f94f5f8..4ba2aa3bf80 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java @@ -36,8 +36,8 @@ import com.google.common.collect.Iterators; import com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -73,7 +73,7 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class EditLogTailer { - public static final Log LOG = LogFactory.getLog(EditLogTailer.class); + public static final Logger LOG = LoggerFactory.getLogger(EditLogTailer.class); /** * StandbyNode will hold namesystem lock to apply at most this many journal @@ -234,7 +234,6 @@ public void start() { } public void stop() throws IOException { - rollEditsRpcExecutor.shutdown(); tailerThread.setShouldRun(false); tailerThread.interrupt(); try { @@ -242,6 +241,8 @@ public void stop() throws IOException { } catch (InterruptedException e) { LOG.warn("Edit log tailer thread exited with an exception"); throw new IOException(e); + } finally { + rollEditsRpcExecutor.shutdown(); } } @@ -470,7 +471,7 @@ private void doWork() { // interrupter should have already set shouldRun to false continue; } catch (Throwable t) { - LOG.fatal("Unknown error encountered while tailing edits. " + + LOG.error("Unknown error encountered while tailing edits. " + "Shutting down standby NN.", t); terminate(1, t); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockMoveTaskHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockMoveTaskHandler.java new file mode 100644 index 00000000000..1b11d01397c --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockMoveTaskHandler.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode.sps; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hdfs.server.protocol.BlockStorageMovementCommand.BlockMovingInfo; + +/** + * Interface for implementing different ways of block moving approaches. One can + * connect directly to DN and request block move, and other can talk NN to + * schedule via heart-beats. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface BlockMoveTaskHandler { + + /** + * This is an interface method to handle the move tasks. BlockMovingInfo must + * contain the required info to move the block, that source location, + * destination location and storage types. + */ + void submitMoveTask(BlockMovingInfo blkMovingInfo) throws IOException; + +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockMovementListener.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockMovementListener.java new file mode 100644 index 00000000000..36473f3e2ab --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockMovementListener.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode.sps; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hdfs.protocol.Block; + +/** + * Interface for notifying about block movement attempt completion. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface BlockMovementListener { + + /** + * This method used to notify to the SPS about block movement attempt + * finished. Then SPS will re-check whether it needs retry or not. + * + * @param moveAttemptFinishedBlks + * -list of movement attempt finished blocks + */ + void notifyMovementTriedBlocks(Block[] moveAttemptFinishedBlks); +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementAttemptedItems.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementAttemptedItems.java new file mode 100644 index 00000000000..df4f0dddb49 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementAttemptedItems.java @@ -0,0 +1,313 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.sps; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_KEY; +import static org.apache.hadoop.util.Time.monotonicNow; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier.AttemptedItemInfo; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier.StorageTypeNodePair; +import org.apache.hadoop.util.Daemon; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +/** + * A monitor class for checking whether block storage movements attempt + * completed or not. If this receives block storage movement attempt + * status(either success or failure) from DN then it will just remove the + * entries from tracking. If there is no DN reports about movement attempt + * finished for a longer time period, then such items will retries automatically + * after timeout. The default timeout would be 5 minutes. + */ +public class BlockStorageMovementAttemptedItems { + private static final Logger LOG = + LoggerFactory.getLogger(BlockStorageMovementAttemptedItems.class); + + /** + * A map holds the items which are already taken for blocks movements + * processing and sent to DNs. + */ + private final List storageMovementAttemptedItems; + private Map> scheduledBlkLocs; + // Maintains separate Queue to keep the movement finished blocks. This Q + // is used to update the storageMovementAttemptedItems list asynchronously. + private final BlockingQueue movementFinishedBlocks; + private volatile boolean monitorRunning = true; + private Daemon timerThread = null; + private final Context context; + // + // It might take anywhere between 5 to 10 minutes before + // a request is timed out. + // + private long selfRetryTimeout = 5 * 60 * 1000; + + // + // It might take anywhere between 1 to 2 minutes before + // a request is timed out. + // + private long minCheckTimeout = 1 * 60 * 1000; // minimum value + private BlockStorageMovementNeeded blockStorageMovementNeeded; + private final SPSService service; + + public BlockStorageMovementAttemptedItems(SPSService service, + BlockStorageMovementNeeded unsatisfiedStorageMovementFiles, + Context context) { + this.service = service; + long recheckTimeout = this.service.getConf().getLong( + DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_KEY, + DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_DEFAULT); + if (recheckTimeout > 0) { + this.minCheckTimeout = Math.min(minCheckTimeout, recheckTimeout); + } + + this.selfRetryTimeout = this.service.getConf().getLong( + DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_KEY, + DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_DEFAULT); + this.blockStorageMovementNeeded = unsatisfiedStorageMovementFiles; + storageMovementAttemptedItems = new ArrayList<>(); + scheduledBlkLocs = new HashMap<>(); + movementFinishedBlocks = new LinkedBlockingQueue<>(); + this.context = context; + } + + /** + * Add item to block storage movement attempted items map which holds the + * tracking/blockCollection id versus time stamp. + * + * @param startPathId + * - start satisfier path identifier + * @param fileId + * - file identifier + * @param monotonicNow + * - time now + * @param assignedBlocks + * - assigned blocks for block movement + * @param retryCount + * - retry count + */ + public void add(long startPathId, long fileId, long monotonicNow, + Map> assignedBlocks, int retryCount) { + AttemptedItemInfo itemInfo = new AttemptedItemInfo(startPathId, fileId, + monotonicNow, assignedBlocks.keySet(), retryCount); + synchronized (storageMovementAttemptedItems) { + storageMovementAttemptedItems.add(itemInfo); + } + synchronized (scheduledBlkLocs) { + scheduledBlkLocs.putAll(assignedBlocks); + } + } + + /** + * Notify the storage movement attempt finished block. + * + * @param reportedDn + * reported datanode + * @param type + * storage type + * @param reportedBlock + * reported block + */ + public void notifyReportedBlock(DatanodeInfo reportedDn, StorageType type, + Block reportedBlock) { + synchronized (scheduledBlkLocs) { + if (scheduledBlkLocs.size() <= 0) { + return; + } + matchesReportedBlock(reportedDn, type, reportedBlock); + } + } + + private void matchesReportedBlock(DatanodeInfo reportedDn, StorageType type, + Block reportedBlock) { + Set blkLocs = scheduledBlkLocs.get(reportedBlock); + if (blkLocs == null) { + return; // unknown block, simply skip. + } + + for (StorageTypeNodePair dn : blkLocs) { + boolean foundDn = dn.getDatanodeInfo().compareTo(reportedDn) == 0 ? true + : false; + boolean foundType = dn.getStorageType().equals(type); + if (foundDn && foundType) { + blkLocs.remove(dn); + Block[] mFinishedBlocks = new Block[1]; + mFinishedBlocks[0] = reportedBlock; + context.notifyMovementTriedBlocks(mFinishedBlocks); + // All the block locations has reported. + if (blkLocs.size() <= 0) { + movementFinishedBlocks.add(reportedBlock); + scheduledBlkLocs.remove(reportedBlock); // clean-up reported block + } + return; // found + } + } + if (LOG.isDebugEnabled()) { + LOG.debug("Reported block:{} not found in attempted blocks. Datanode:{}" + + ", StorageType:{}", reportedBlock, reportedDn, type); + } + } + + /** + * Starts the monitor thread. + */ + public synchronized void start() { + monitorRunning = true; + timerThread = new Daemon(new BlocksStorageMovementAttemptMonitor()); + timerThread.setName("BlocksStorageMovementAttemptMonitor"); + timerThread.start(); + } + + /** + * Sets running flag to false. Also, this will interrupt monitor thread and + * clear all the queued up tasks. + */ + public synchronized void stop() { + monitorRunning = false; + if (timerThread != null) { + timerThread.interrupt(); + } + this.clearQueues(); + } + + /** + * Timed wait to stop monitor thread. + */ + synchronized void stopGracefully() { + if (timerThread == null) { + return; + } + if (monitorRunning) { + stop(); + } + try { + timerThread.join(3000); + } catch (InterruptedException ie) { + } + } + + /** + * A monitor class for checking block storage movement attempt status and long + * waiting items periodically. + */ + private class BlocksStorageMovementAttemptMonitor implements Runnable { + @Override + public void run() { + while (monitorRunning) { + try { + blockStorageMovementReportedItemsCheck(); + blocksStorageMovementUnReportedItemsCheck(); + Thread.sleep(minCheckTimeout); + } catch (InterruptedException ie) { + LOG.info("BlocksStorageMovementAttemptMonitor thread " + + "is interrupted.", ie); + } catch (IOException ie) { + LOG.warn("BlocksStorageMovementAttemptMonitor thread " + + "received exception and exiting.", ie); + } + } + } + } + + @VisibleForTesting + void blocksStorageMovementUnReportedItemsCheck() { + synchronized (storageMovementAttemptedItems) { + Iterator iter = storageMovementAttemptedItems + .iterator(); + long now = monotonicNow(); + while (iter.hasNext()) { + AttemptedItemInfo itemInfo = iter.next(); + if (now > itemInfo.getLastAttemptedOrReportedTime() + + selfRetryTimeout) { + long file = itemInfo.getFile(); + ItemInfo candidate = new ItemInfo(itemInfo.getStartPath(), file, + itemInfo.getRetryCount() + 1); + blockStorageMovementNeeded.add(candidate); + iter.remove(); + LOG.info("TrackID: {} becomes timed out and moved to needed " + + "retries queue for next iteration.", file); + } + } + } + } + + @VisibleForTesting + void blockStorageMovementReportedItemsCheck() throws IOException { + // Removes all available blocks from this queue and process it. + Collection finishedBlks = new ArrayList<>(); + movementFinishedBlocks.drainTo(finishedBlks); + + // Update attempted items list + for (Block blk : finishedBlks) { + synchronized (storageMovementAttemptedItems) { + Iterator iterator = storageMovementAttemptedItems + .iterator(); + while (iterator.hasNext()) { + AttemptedItemInfo attemptedItemInfo = iterator.next(); + attemptedItemInfo.getBlocks().remove(blk); + if (attemptedItemInfo.getBlocks().isEmpty()) { + blockStorageMovementNeeded.add(new ItemInfo( + attemptedItemInfo.getStartPath(), attemptedItemInfo.getFile(), + attemptedItemInfo.getRetryCount() + 1)); + iterator.remove(); + } + } + } + } + } + + @VisibleForTesting + public int getMovementFinishedBlocksCount() { + return movementFinishedBlocks.size(); + } + + @VisibleForTesting + public int getAttemptedItemsCount() { + synchronized (storageMovementAttemptedItems) { + return storageMovementAttemptedItems.size(); + } + } + + public void clearQueues() { + movementFinishedBlocks.clear(); + synchronized (storageMovementAttemptedItems) { + storageMovementAttemptedItems.clear(); + } + synchronized (scheduledBlkLocs) { + scheduledBlkLocs.clear(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementNeeded.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementNeeded.java new file mode 100644 index 00000000000..02b9cffe4fe --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/BlockStorageMovementNeeded.java @@ -0,0 +1,346 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.sps; + +import java.io.IOException; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.util.Daemon; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +/** + * A Class to track the block collection IDs (Inode's ID) for which physical + * storage movement needed as per the Namespace and StorageReports from DN. + * It scan the pending directories for which storage movement is required and + * schedule the block collection IDs for movement. It track the info of + * scheduled items and remove the SPS xAttr from the file/Directory once + * movement is success. + */ +@InterfaceAudience.Private +public class BlockStorageMovementNeeded { + + public static final Logger LOG = + LoggerFactory.getLogger(BlockStorageMovementNeeded.class); + + private final Queue storageMovementNeeded = + new LinkedList(); + + /** + * Map of startPath and number of child's. Number of child's indicate the + * number of files pending to satisfy the policy. + */ + private final Map pendingWorkForDirectory = + new HashMap<>(); + + private final Context ctxt; + + private Daemon pathIdCollector; + + private SPSPathIdProcessor pathIDProcessor; + + // Amount of time to cache the SUCCESS status of path before turning it to + // NOT_AVAILABLE. + private static long statusClearanceElapsedTimeMs = 300000; + + public BlockStorageMovementNeeded(Context context) { + this.ctxt = context; + pathIDProcessor = new SPSPathIdProcessor(); + } + + /** + * Add the candidate to tracking list for which storage movement + * expected if necessary. + * + * @param trackInfo + * - track info for satisfy the policy + */ + public synchronized void add(ItemInfo trackInfo) { + if (trackInfo != null) { + storageMovementNeeded.add(trackInfo); + } + } + + /** + * Add the itemInfo list to tracking list for which storage movement expected + * if necessary. + * + * @param startPath + * - start path + * @param itemInfoList + * - List of child in the directory + * @param scanCompleted + * -Indicates whether the start id directory has no more elements to + * scan. + */ + @VisibleForTesting + public synchronized void addAll(long startPath, List itemInfoList, + boolean scanCompleted) { + storageMovementNeeded.addAll(itemInfoList); + updatePendingDirScanStats(startPath, itemInfoList.size(), scanCompleted); + } + + /** + * Add the itemInfo to tracking list for which storage movement expected if + * necessary. + * + * @param itemInfoList + * - List of child in the directory + * @param scanCompleted + * -Indicates whether the ItemInfo start id directory has no more + * elements to scan. + */ + @VisibleForTesting + public synchronized void add(ItemInfo itemInfo, boolean scanCompleted) { + storageMovementNeeded.add(itemInfo); + // This represents sps start id is file, so no need to update pending dir + // stats. + if (itemInfo.getStartPath() == itemInfo.getFile()) { + return; + } + updatePendingDirScanStats(itemInfo.getStartPath(), 1, scanCompleted); + } + + private void updatePendingDirScanStats(long startPath, int numScannedFiles, + boolean scanCompleted) { + DirPendingWorkInfo pendingWork = pendingWorkForDirectory.get(startPath); + if (pendingWork == null) { + pendingWork = new DirPendingWorkInfo(); + pendingWorkForDirectory.put(startPath, pendingWork); + } + pendingWork.addPendingWorkCount(numScannedFiles); + if (scanCompleted) { + pendingWork.markScanCompleted(); + } + } + + /** + * Gets the satisfier files for which block storage movements check necessary + * and make the movement if required. + * + * @return satisfier files + */ + public synchronized ItemInfo get() { + return storageMovementNeeded.poll(); + } + + /** + * Returns queue size. + */ + public synchronized int size() { + return storageMovementNeeded.size(); + } + + public synchronized void clearAll() { + storageMovementNeeded.clear(); + pendingWorkForDirectory.clear(); + } + + /** + * Decrease the pending child count for directory once one file blocks moved + * successfully. Remove the SPS xAttr if pending child count is zero. + */ + public synchronized void removeItemTrackInfo(ItemInfo trackInfo, + boolean isSuccess) throws IOException { + if (trackInfo.isDir()) { + // If track is part of some start inode then reduce the pending + // directory work count. + long startId = trackInfo.getStartPath(); + if (!ctxt.isFileExist(startId)) { + // directory deleted just remove it. + this.pendingWorkForDirectory.remove(startId); + } else { + DirPendingWorkInfo pendingWork = pendingWorkForDirectory.get(startId); + if (pendingWork != null) { + pendingWork.decrementPendingWorkCount(); + if (pendingWork.isDirWorkDone()) { + ctxt.removeSPSHint(startId); + pendingWorkForDirectory.remove(startId); + } + } + } + } else { + // Remove xAttr if trackID doesn't exist in + // storageMovementAttemptedItems or file policy satisfied. + ctxt.removeSPSHint(trackInfo.getFile()); + } + } + + /** + * Clean all the movements in spsDirsToBeTraveresed/storageMovementNeeded + * and notify to clean up required resources. + * @throws IOException + */ + public synchronized void clearQueuesWithNotification() { + // Remove xAttr from directories + Long trackId; + while ((trackId = ctxt.getNextSPSPath()) != null) { + try { + // Remove xAttr for file + ctxt.removeSPSHint(trackId); + } catch (IOException ie) { + LOG.warn("Failed to remove SPS xattr for track id " + trackId, ie); + } + } + + // File's directly added to storageMovementNeeded, So try to remove + // xAttr for file + ItemInfo itemInfo; + while ((itemInfo = get()) != null) { + try { + // Remove xAttr for file + if (!itemInfo.isDir()) { + ctxt.removeSPSHint(itemInfo.getFile()); + } + } catch (IOException ie) { + LOG.warn( + "Failed to remove SPS xattr for track id " + + itemInfo.getFile(), ie); + } + } + this.clearAll(); + } + + /** + * Take dir tack ID from the spsDirsToBeTraveresed queue and collect child + * ID's to process for satisfy the policy. + */ + private class SPSPathIdProcessor implements Runnable { + + @Override + public void run() { + LOG.info("Starting SPSPathIdProcessor!."); + Long startINode = null; + while (ctxt.isRunning()) { + try { + if (!ctxt.isInSafeMode()) { + if (startINode == null) { + startINode = ctxt.getNextSPSPath(); + } // else same id will be retried + if (startINode == null) { + // Waiting for SPS path + Thread.sleep(3000); + } else { + ctxt.scanAndCollectFiles(startINode); + // check if directory was empty and no child added to queue + DirPendingWorkInfo dirPendingWorkInfo = + pendingWorkForDirectory.get(startINode); + if (dirPendingWorkInfo != null + && dirPendingWorkInfo.isDirWorkDone()) { + ctxt.removeSPSHint(startINode); + pendingWorkForDirectory.remove(startINode); + } + } + startINode = null; // Current inode successfully scanned. + } + } catch (Throwable t) { + String reClass = t.getClass().getName(); + if (InterruptedException.class.getName().equals(reClass)) { + LOG.info("SPSPathIdProcessor thread is interrupted. Stopping.."); + break; + } + LOG.warn("Exception while scanning file inodes to satisfy the policy", + t); + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + LOG.info("Interrupted while waiting in SPSPathIdProcessor", t); + break; + } + } + } + } + } + + /** + * Info for directory recursive scan. + */ + public static class DirPendingWorkInfo { + + private int pendingWorkCount = 0; + private boolean fullyScanned = false; + + /** + * Increment the pending work count for directory. + */ + public synchronized void addPendingWorkCount(int count) { + this.pendingWorkCount = this.pendingWorkCount + count; + } + + /** + * Decrement the pending work count for directory one track info is + * completed. + */ + public synchronized void decrementPendingWorkCount() { + this.pendingWorkCount--; + } + + /** + * Return true if all the pending work is done and directory fully + * scanned, otherwise false. + */ + public synchronized boolean isDirWorkDone() { + return (pendingWorkCount <= 0 && fullyScanned); + } + + /** + * Mark directory scan is completed. + */ + public synchronized void markScanCompleted() { + this.fullyScanned = true; + } + } + + public void activate() { + pathIdCollector = new Daemon(pathIDProcessor); + pathIdCollector.setName("SPSPathIdProcessor"); + pathIdCollector.start(); + } + + public void close() { + if (pathIdCollector != null) { + pathIdCollector.interrupt(); + } + } + + @VisibleForTesting + public static void setStatusClearanceElapsedTimeMs( + long statusClearanceElapsedTimeMs) { + BlockStorageMovementNeeded.statusClearanceElapsedTimeMs = + statusClearanceElapsedTimeMs; + } + + @VisibleForTesting + public static long getStatusClearanceElapsedTimeMs() { + return statusClearanceElapsedTimeMs; + } + + public void markScanCompletedForDir(long inode) { + DirPendingWorkInfo pendingWork = pendingWorkForDirectory.get(inode); + if (pendingWork != null) { + pendingWork.markScanCompleted(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/Context.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/Context.java new file mode 100644 index 00000000000..b27294c27f5 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/Context.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode.sps; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier.DatanodeMap; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; +import org.apache.hadoop.hdfs.server.protocol.BlockStorageMovementCommand.BlockMovingInfo; +import org.apache.hadoop.net.NetworkTopology; + +/** + * An interface for the communication between SPS and Namenode module. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface Context { + + /** + * Returns true if the SPS is running, false otherwise. + */ + boolean isRunning(); + + /** + * Returns true if the Namenode in safe mode, false otherwise. + */ + boolean isInSafeMode(); + + /** + * Gets the network topology. + * + * @param datanodeMap + * target datanodes + * + * @return network topology + */ + NetworkTopology getNetworkTopology(DatanodeMap datanodeMap); + + /** + * Returns true if the give file exists in the Namespace. + * + * @param filePath + * - file info + * @return true if the given file exists, false otherwise. + */ + boolean isFileExist(long filePath); + + /** + * Gets the storage policy details for the given policy ID. + * + * @param policyId + * - Storage policy ID + * @return the detailed policy object + */ + BlockStoragePolicy getStoragePolicy(byte policyId); + + /** + * Remove the hint which was added to track SPS call. + * + * @param spsPath + * - user invoked satisfier path + * @throws IOException + */ + void removeSPSHint(long spsPath) throws IOException; + + /** + * Gets the number of live datanodes in the cluster. + * + * @return number of live datanodes + */ + int getNumLiveDataNodes(); + + /** + * Get the file info for a specific file. + * + * @param file + * file path + * @return file status metadata information + */ + HdfsFileStatus getFileInfo(long file) throws IOException; + + /** + * Returns all the live datanodes and its storage details. + * + * @throws IOException + */ + DatanodeStorageReport[] getLiveDatanodeStorageReport() + throws IOException; + + /** + * @return next SPS path info to process. + */ + Long getNextSPSPath(); + + /** + * Do scan and collects the files under that directory and adds to the given + * BlockStorageMovementNeeded. + * + * @param filePath + * file path + */ + void scanAndCollectFiles(long filePath) + throws IOException, InterruptedException; + + /** + * Handles the block move tasks. BlockMovingInfo must contain the required + * info to move the block, that source location, destination location and + * storage types. + */ + void submitMoveTask(BlockMovingInfo blkMovingInfo) throws IOException; + + /** + * This can be used to notify to the SPS about block movement attempt + * finished. Then SPS will re-check whether it needs retry or not. + * + * @param moveAttemptFinishedBlks + * list of movement attempt finished blocks + */ + void notifyMovementTriedBlocks(Block[] moveAttemptFinishedBlks); +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/DatanodeCacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/DatanodeCacheManager.java new file mode 100644 index 00000000000..d4e514b8132 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/DatanodeCacheManager.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.sps; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier.DatanodeMap; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; +import org.apache.hadoop.hdfs.server.protocol.StorageReport; +import org.apache.hadoop.net.NetworkTopology; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The Datanode cache Manager handles caching of {@link DatanodeStorageReport}. + * + * This class is instantiated by StoragePolicySatisifer. It maintains the array + * of datanode storage reports. It has a configurable refresh interval and + * periodically refresh the datanode cache by fetching latest + * {@link Context#getLiveDatanodeStorageReport()} once it reaches refresh + * interval. + */ +@InterfaceAudience.Private +public class DatanodeCacheManager { + private static final Logger LOG = LoggerFactory + .getLogger(DatanodeCacheManager.class); + + private final DatanodeMap datanodeMap; + private NetworkTopology cluster; + + /** + * Interval between scans in milliseconds. + */ + private final long refreshIntervalMs; + + private long lastAccessedTime; + + public DatanodeCacheManager(Configuration conf) { + refreshIntervalMs = conf.getLong( + DFSConfigKeys.DFS_SPS_DATANODE_CACHE_REFRESH_INTERVAL_MS, + DFSConfigKeys.DFS_SPS_DATANODE_CACHE_REFRESH_INTERVAL_MS_DEFAULT); + + LOG.info("DatanodeCacheManager refresh interval is {} milliseconds", + refreshIntervalMs); + datanodeMap = new DatanodeMap(); + } + + /** + * Returns the live datanodes and its storage details, which has available + * space (> 0) to schedule block moves. This will return array of datanodes + * from its local cache. It has a configurable refresh interval in millis and + * periodically refresh the datanode cache by fetching latest + * {@link Context#getLiveDatanodeStorageReport()} once it elapsed refresh + * interval. + * + * @throws IOException + */ + public DatanodeMap getLiveDatanodeStorageReport( + Context spsContext) throws IOException { + long now = Time.monotonicNow(); + long elapsedTimeMs = now - lastAccessedTime; + boolean refreshNeeded = elapsedTimeMs >= refreshIntervalMs; + lastAccessedTime = now; + if (refreshNeeded) { + if (LOG.isDebugEnabled()) { + LOG.debug("elapsedTimeMs > refreshIntervalMs : {} > {}," + + " so refreshing cache", elapsedTimeMs, refreshIntervalMs); + } + datanodeMap.reset(); // clear all previously cached items. + + // Fetch live datanodes from namenode and prepare DatanodeMap. + DatanodeStorageReport[] liveDns = spsContext + .getLiveDatanodeStorageReport(); + for (DatanodeStorageReport storage : liveDns) { + StorageReport[] storageReports = storage.getStorageReports(); + List storageTypes = new ArrayList<>(); + List remainingSizeList = new ArrayList<>(); + for (StorageReport t : storageReports) { + if (t.getRemaining() > 0) { + storageTypes.add(t.getStorage().getStorageType()); + remainingSizeList.add(t.getRemaining()); + } + } + datanodeMap.addTarget(storage.getDatanodeInfo(), storageTypes, + remainingSizeList); + } + if (LOG.isDebugEnabled()) { + LOG.debug("LIVE datanodes: {}", datanodeMap); + } + // get network topology + cluster = spsContext.getNetworkTopology(datanodeMap); + } + return datanodeMap; + } + + NetworkTopology getCluster() { + return cluster; + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/FileCollector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/FileCollector.java new file mode 100644 index 00000000000..fa8b31b5492 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/FileCollector.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode.sps; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * An interface for scanning the directory recursively and collect files + * under the given directory. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface FileCollector { + + /** + * This method can be used to scan and collects the files under that + * directory and adds to the given BlockStorageMovementNeeded. + * + * @param path + * - file path id + */ + void scanAndCollectFiles(long path) + throws IOException, InterruptedException; +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/ItemInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/ItemInfo.java new file mode 100644 index 00000000000..949e3fcdc26 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/ItemInfo.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.sps; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * ItemInfo is a file info object for which need to satisfy the policy. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class ItemInfo { + private long startPathId; + private long fileId; + private int retryCount; + + public ItemInfo(long startPathId, long fileId) { + this.startPathId = startPathId; + this.fileId = fileId; + // set 0 when item is getting added first time in queue. + this.retryCount = 0; + } + + public ItemInfo(final long startPathId, final long fileId, + final int retryCount) { + this.startPathId = startPathId; + this.fileId = fileId; + this.retryCount = retryCount; + } + + /** + * Returns the start path of the current file. This indicates that SPS + * was invoked on this path. + */ + public long getStartPath() { + return startPathId; + } + + /** + * Returns the file for which needs to satisfy the policy. + */ + public long getFile() { + return fileId; + } + + /** + * Returns true if the tracking path is a directory, false otherwise. + */ + public boolean isDir() { + return !(startPathId == fileId); + } + + /** + * Get the attempted retry count of the block for satisfy the policy. + */ + public int getRetryCount() { + return retryCount; + } + + /** + * Increments the retry count. + */ + public void increRetryCount() { + this.retryCount++; + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/SPSService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/SPSService.java new file mode 100644 index 00000000000..a83d32a3474 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/SPSService.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.sps; + +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; + +/** + * An interface for SPSService, which exposes life cycle and processing APIs. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface SPSService { + + /** + * Initializes the helper services. + * + * @param ctxt + * - context is an helper service to provide communication channel + * between NN and SPS + */ + void init(Context ctxt); + + /** + * Starts the SPS service. Make sure to initialize the helper services before + * invoking this method. + * + * @param spsMode sps service mode + */ + void start(StoragePolicySatisfierMode spsMode); + + /** + * Stops the SPS service gracefully. Timed wait to stop storage policy + * satisfier daemon threads. + */ + void stopGracefully(); + + /** + * Stops the SPS service. + * + * @param forceStop + * true represents to clear all the sps path's hint, false otherwise. + */ + void stop(boolean forceStop); + + /** + * Check whether StoragePolicySatisfier is running. + * + * @return true if running + */ + boolean isRunning(); + + /** + * Adds the Item information(file etc) to processing queue. + * + * @param itemInfo + * file info object for which need to satisfy the policy + */ + void addFileToProcess(ItemInfo itemInfo, boolean scanCompleted); + + /** + * Adds all the Item information(file etc) to processing queue. + * + * @param startPathId + * - directoryId/fileId, on which SPS was called. + * @param itemInfoList + * - list of item infos + * @param scanCompleted + * - whether the scanning of directory fully done with itemInfoList + */ + void addAllFilesToProcess(long startPathId, List itemInfoList, + boolean scanCompleted); + + /** + * @return current processing queue size. + */ + int processingQueueSize(); + + /** + * @return the configuration. + */ + Configuration getConf(); + + /** + * Marks the scanning of directory if finished. + * + * @param spsPath + * - satisfier path id + */ + void markScanCompletedForPath(long spsPath); + + /** + * Given node is reporting that it received a certain movement attempt + * finished block. + * + * @param dnInfo + * - reported datanode + * @param storageType + * - storage type + * @param block + * - block that is attempted to move + */ + void notifyStorageMovementAttemptFinishedBlk(DatanodeInfo dnInfo, + StorageType storageType, Block block); +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfier.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfier.java new file mode 100644 index 00000000000..4c04b466789 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfier.java @@ -0,0 +1,1226 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.sps; + +import static org.apache.hadoop.util.Time.monotonicNow; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.DatanodeInfoWithStorage; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock; +import org.apache.hadoop.hdfs.server.balancer.Matcher; +import org.apache.hadoop.hdfs.server.namenode.ErasureCodingPolicyManager; +import org.apache.hadoop.hdfs.server.protocol.BlockStorageMovementCommand.BlockMovingInfo; +import org.apache.hadoop.hdfs.util.StripedBlockUtil; +import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + +/** + * Setting storagePolicy on a file after the file write will only update the new + * storage policy type in Namespace, but physical block storage movement will + * not happen until user runs "Mover Tool" explicitly for such files. The + * StoragePolicySatisfier Daemon thread implemented for addressing the case + * where users may want to physically move the blocks by a dedicated daemon (can + * run inside Namenode or stand alone) instead of running mover tool explicitly. + * Just calling client API to satisfyStoragePolicy on a file/dir will + * automatically trigger to move its physical storage locations as expected in + * asynchronous manner. Here SPS will pick the file blocks which are expecting + * to change its storages, then it will build the mapping of source block + * location and expected storage type and location to move. After that this + * class will also prepare requests to send to Datanode for processing the + * physical block movements. + */ +@InterfaceAudience.Private +public class StoragePolicySatisfier implements SPSService, Runnable { + public static final Logger LOG = + LoggerFactory.getLogger(StoragePolicySatisfier.class); + private Daemon storagePolicySatisfierThread; + private BlockStorageMovementNeeded storageMovementNeeded; + private BlockStorageMovementAttemptedItems storageMovementsMonitor; + private volatile boolean isRunning = false; + private int spsWorkMultiplier; + private long blockCount = 0L; + private int blockMovementMaxRetry; + private Context ctxt; + private final Configuration conf; + private DatanodeCacheManager dnCacheMgr; + + public StoragePolicySatisfier(Configuration conf) { + this.conf = conf; + } + + /** + * Represents the collective analysis status for all blocks. + */ + private static class BlocksMovingAnalysis { + + enum Status { + // Represents that, the analysis skipped due to some conditions. A such + // condition is if block collection is in incomplete state. + ANALYSIS_SKIPPED_FOR_RETRY, + // Represents that few or all blocks found respective target to do + // the storage movement. + BLOCKS_TARGETS_PAIRED, + // Represents that none of the blocks found respective target to do + // the storage movement. + NO_BLOCKS_TARGETS_PAIRED, + // Represents that, none of the blocks found for block storage movements. + BLOCKS_ALREADY_SATISFIED, + // Represents that, the analysis skipped due to some conditions. + // Example conditions are if no blocks really exists in block collection + // or + // if analysis is not required on ec files with unsuitable storage + // policies + BLOCKS_TARGET_PAIRING_SKIPPED, + // Represents that, All the reported blocks are satisfied the policy but + // some of the blocks are low redundant. + FEW_LOW_REDUNDANCY_BLOCKS, + // Represents that, movement failures due to unexpected errors. + BLOCKS_FAILED_TO_MOVE + } + + private Status status = null; + private Map> assignedBlocks = null; + + BlocksMovingAnalysis(Status status, + Map> assignedBlocks) { + this.status = status; + this.assignedBlocks = assignedBlocks; + } + } + + public void init(final Context context) { + this.ctxt = context; + this.storageMovementNeeded = new BlockStorageMovementNeeded(context); + this.storageMovementsMonitor = new BlockStorageMovementAttemptedItems( + this, storageMovementNeeded, context); + this.spsWorkMultiplier = getSPSWorkMultiplier(getConf()); + this.blockMovementMaxRetry = getConf().getInt( + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MAX_RETRY_ATTEMPTS_KEY, + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MAX_RETRY_ATTEMPTS_DEFAULT); + } + + /** + * Start storage policy satisfier demon thread. Also start block storage + * movements monitor for retry the attempts if needed. + */ + @Override + public synchronized void start(StoragePolicySatisfierMode serviceMode) { + if (serviceMode == StoragePolicySatisfierMode.NONE) { + LOG.error("Can't start StoragePolicySatisfier for the given mode:{}", + serviceMode); + return; + } + LOG.info("Starting {} StoragePolicySatisfier.", + StringUtils.toLowerCase(serviceMode.toString())); + isRunning = true; + storagePolicySatisfierThread = new Daemon(this); + storagePolicySatisfierThread.setName("StoragePolicySatisfier"); + storagePolicySatisfierThread.start(); + this.storageMovementsMonitor.start(); + this.storageMovementNeeded.activate(); + dnCacheMgr = new DatanodeCacheManager(conf); + } + + @Override + public synchronized void stop(boolean forceStop) { + isRunning = false; + if (storagePolicySatisfierThread == null) { + return; + } + + storageMovementNeeded.close(); + + storagePolicySatisfierThread.interrupt(); + this.storageMovementsMonitor.stop(); + if (forceStop) { + storageMovementNeeded.clearQueuesWithNotification(); + } else { + LOG.info("Stopping StoragePolicySatisfier."); + } + } + + @Override + public synchronized void stopGracefully() { + if (isRunning) { + stop(false); + } + + if (this.storageMovementsMonitor != null) { + this.storageMovementsMonitor.stopGracefully(); + } + + if (storagePolicySatisfierThread != null) { + try { + storagePolicySatisfierThread.join(3000); + } catch (InterruptedException ie) { + if (LOG.isDebugEnabled()) { + LOG.debug("Interrupted Exception while waiting to join sps thread," + + " ignoring it", ie); + } + } + } + } + + @Override + public boolean isRunning() { + return isRunning; + } + + @Override + public void run() { + while (isRunning) { + // Check if dependent service is running + if (!ctxt.isRunning()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Upstream service is down, skipping the sps work."); + } + continue; + } + ItemInfo itemInfo = null; + try { + boolean retryItem = false; + if (!ctxt.isInSafeMode()) { + itemInfo = storageMovementNeeded.get(); + if (itemInfo != null) { + if(itemInfo.getRetryCount() >= blockMovementMaxRetry){ + LOG.info("Failed to satisfy the policy after " + + blockMovementMaxRetry + " retries. Removing inode " + + itemInfo.getFile() + " from the queue"); + storageMovementNeeded.removeItemTrackInfo(itemInfo, false); + continue; + } + long trackId = itemInfo.getFile(); + BlocksMovingAnalysis status = null; + BlockStoragePolicy existingStoragePolicy; + // TODO: presently, context internally acquire the lock + // and returns the result. Need to discuss to move the lock outside? + HdfsFileStatus fileStatus = ctxt.getFileInfo(trackId); + // Check path existence. + if (fileStatus == null || fileStatus.isDir()) { + // File doesn't exists (maybe got deleted) or its a directory, + // just remove trackId from the queue + storageMovementNeeded.removeItemTrackInfo(itemInfo, true); + } else { + byte existingStoragePolicyID = fileStatus.getStoragePolicy(); + existingStoragePolicy = ctxt + .getStoragePolicy(existingStoragePolicyID); + + HdfsLocatedFileStatus file = (HdfsLocatedFileStatus) fileStatus; + status = analyseBlocksStorageMovementsAndAssignToDN(file, + existingStoragePolicy); + switch (status.status) { + // Just add to monitor, so it will be retried after timeout + case ANALYSIS_SKIPPED_FOR_RETRY: + // Just add to monitor, so it will be tracked for report and + // be removed on storage movement attempt finished report. + case BLOCKS_TARGETS_PAIRED: + if (LOG.isDebugEnabled()) { + LOG.debug("Block analysis status:{} for the file id:{}." + + " Adding to attempt monitor queue for the storage " + + "movement attempt finished report", + status.status, fileStatus.getFileId()); + } + this.storageMovementsMonitor.add(itemInfo.getStartPath(), + itemInfo.getFile(), monotonicNow(), status.assignedBlocks, + itemInfo.getRetryCount()); + break; + case NO_BLOCKS_TARGETS_PAIRED: + if (LOG.isDebugEnabled()) { + LOG.debug("Adding trackID:{} for the file id:{} back to" + + " retry queue as none of the blocks found its eligible" + + " targets.", trackId, fileStatus.getFileId()); + } + retryItem = true; + break; + case FEW_LOW_REDUNDANCY_BLOCKS: + if (LOG.isDebugEnabled()) { + LOG.debug("Adding trackID:{} for the file id:{} back to " + + "retry queue as some of the blocks are low redundant.", + trackId, fileStatus.getFileId()); + } + retryItem = true; + break; + case BLOCKS_FAILED_TO_MOVE: + if (LOG.isDebugEnabled()) { + LOG.debug("Adding trackID:{} for the file id:{} back to " + + "retry queue as some of the blocks movement failed.", + trackId, fileStatus.getFileId()); + } + retryItem = true; + break; + // Just clean Xattrs + case BLOCKS_TARGET_PAIRING_SKIPPED: + case BLOCKS_ALREADY_SATISFIED: + default: + LOG.info("Block analysis status:{} for the file id:{}." + + " So, Cleaning up the Xattrs.", status.status, + fileStatus.getFileId()); + storageMovementNeeded.removeItemTrackInfo(itemInfo, true); + break; + } + } + } + } else { + LOG.info("Namenode is in safemode. It will retry again."); + Thread.sleep(3000); + } + int numLiveDn = ctxt.getNumLiveDataNodes(); + if (storageMovementNeeded.size() == 0 + || blockCount > (numLiveDn * spsWorkMultiplier)) { + Thread.sleep(3000); + blockCount = 0L; + } + if (retryItem) { + this.storageMovementNeeded.add(itemInfo); + } + } catch (IOException e) { + LOG.error("Exception during StoragePolicySatisfier execution - " + + "will continue next cycle", e); + // Since it could not finish this item in previous iteration due to IOE, + // just try again. + this.storageMovementNeeded.add(itemInfo); + } catch (Throwable t) { + synchronized (this) { + if (isRunning) { + isRunning = false; + if (t instanceof InterruptedException) { + LOG.info("Stopping StoragePolicySatisfier.", t); + } else { + LOG.error("StoragePolicySatisfier thread received " + + "runtime exception.", t); + } + // Stopping monitor thread and clearing queues as well + this.clearQueues(); + this.storageMovementsMonitor.stopGracefully(); + } + } + } + } + } + + private BlocksMovingAnalysis analyseBlocksStorageMovementsAndAssignToDN( + HdfsLocatedFileStatus fileInfo, + BlockStoragePolicy existingStoragePolicy) throws IOException { + BlocksMovingAnalysis.Status status = + BlocksMovingAnalysis.Status.BLOCKS_ALREADY_SATISFIED; + final ErasureCodingPolicy ecPolicy = fileInfo.getErasureCodingPolicy(); + final LocatedBlocks locatedBlocks = fileInfo.getLocatedBlocks(); + final boolean lastBlkComplete = locatedBlocks.isLastBlockComplete(); + if (!lastBlkComplete) { + // Postpone, currently file is under construction + LOG.info("File: {} is under construction. So, postpone" + + " this to the next retry iteration", fileInfo.getFileId()); + return new BlocksMovingAnalysis( + BlocksMovingAnalysis.Status.ANALYSIS_SKIPPED_FOR_RETRY, + new HashMap<>()); + } + + List blocks = locatedBlocks.getLocatedBlocks(); + if (blocks.size() == 0) { + LOG.info("File: {} is not having any blocks." + + " So, skipping the analysis.", fileInfo.getFileId()); + return new BlocksMovingAnalysis( + BlocksMovingAnalysis.Status.BLOCKS_TARGET_PAIRING_SKIPPED, + new HashMap<>()); + } + List blockMovingInfos = new ArrayList(); + boolean hasLowRedundancyBlocks = false; + int replication = fileInfo.getReplication(); + DatanodeMap liveDns = dnCacheMgr.getLiveDatanodeStorageReport(ctxt); + for (int i = 0; i < blocks.size(); i++) { + LocatedBlock blockInfo = blocks.get(i); + + // Block is considered as low redundancy when the block locations array + // length is less than expected replication factor. If any of the block is + // low redundant, then hasLowRedundancyBlocks will be marked as true. + hasLowRedundancyBlocks |= isLowRedundancyBlock(blockInfo, replication, + ecPolicy); + + List expectedStorageTypes; + if (blockInfo.isStriped()) { + if (ErasureCodingPolicyManager + .checkStoragePolicySuitableForECStripedMode( + existingStoragePolicy.getId())) { + expectedStorageTypes = existingStoragePolicy + .chooseStorageTypes((short) blockInfo.getLocations().length); + } else { + // Currently we support only limited policies (HOT, COLD, ALLSSD) + // for EC striped mode files. SPS will ignore to move the blocks if + // the storage policy is not in EC Striped mode supported policies + LOG.warn("The storage policy " + existingStoragePolicy.getName() + + " is not suitable for Striped EC files. " + + "So, ignoring to move the blocks"); + return new BlocksMovingAnalysis( + BlocksMovingAnalysis.Status.BLOCKS_TARGET_PAIRING_SKIPPED, + new HashMap<>()); + } + } else { + expectedStorageTypes = existingStoragePolicy + .chooseStorageTypes(fileInfo.getReplication()); + } + + List existing = new LinkedList( + Arrays.asList(blockInfo.getStorageTypes())); + if (!removeOverlapBetweenStorageTypes(expectedStorageTypes, + existing, true)) { + boolean blocksPaired = computeBlockMovingInfos(blockMovingInfos, + blockInfo, expectedStorageTypes, existing, blockInfo.getLocations(), + liveDns, ecPolicy); + if (blocksPaired) { + status = BlocksMovingAnalysis.Status.BLOCKS_TARGETS_PAIRED; + } else if (status != + BlocksMovingAnalysis.Status.BLOCKS_TARGETS_PAIRED) { + // Check if the previous block was successfully paired. Here the + // status will set to NO_BLOCKS_TARGETS_PAIRED only when none of the + // blocks of a file found its eligible targets to satisfy the storage + // policy. + status = BlocksMovingAnalysis.Status.NO_BLOCKS_TARGETS_PAIRED; + } + } + } + + // If there is no block paired and few blocks are low redundant, so marking + // the status as FEW_LOW_REDUNDANCY_BLOCKS. + if (hasLowRedundancyBlocks + && status != BlocksMovingAnalysis.Status.BLOCKS_TARGETS_PAIRED) { + status = BlocksMovingAnalysis.Status.FEW_LOW_REDUNDANCY_BLOCKS; + } + Map> assignedBlocks = new HashMap<>(); + for (BlockMovingInfo blkMovingInfo : blockMovingInfos) { + // Check for at least one block storage movement has been chosen + try { + ctxt.submitMoveTask(blkMovingInfo); + LOG.debug("BlockMovingInfo: {}", blkMovingInfo); + StorageTypeNodePair nodeStorage = new StorageTypeNodePair( + blkMovingInfo.getTargetStorageType(), blkMovingInfo.getTarget()); + Set nodesWithStorage = assignedBlocks + .get(blkMovingInfo.getBlock()); + if (nodesWithStorage == null) { + nodesWithStorage = new HashSet<>(); + assignedBlocks.put(blkMovingInfo.getBlock(), nodesWithStorage); + } + nodesWithStorage.add(nodeStorage); + blockCount++; + } catch (IOException e) { + LOG.warn("Exception while scheduling movement task", e); + // failed to move the block. + status = BlocksMovingAnalysis.Status.BLOCKS_FAILED_TO_MOVE; + } + } + return new BlocksMovingAnalysis(status, assignedBlocks); + } + + /** + * The given block is considered as low redundancy when the block locations + * length is less than expected replication factor. For EC blocks, redundancy + * is the summation of data + parity blocks. + * + * @param blockInfo + * block + * @param replication + * replication factor of the given file block + * @param ecPolicy + * erasure coding policy of the given file block + * @return true if the given block is low redundant. + */ + private boolean isLowRedundancyBlock(LocatedBlock blockInfo, int replication, + ErasureCodingPolicy ecPolicy) { + boolean hasLowRedundancyBlock = false; + if (blockInfo.isStriped()) { + // For EC blocks, redundancy is the summation of data + parity blocks. + replication = ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits(); + } + // block is considered as low redundancy when the block locations length is + // less than expected replication factor. + hasLowRedundancyBlock = blockInfo.getLocations().length < replication ? true + : false; + return hasLowRedundancyBlock; + } + + /** + * Compute the list of block moving information corresponding to the given + * blockId. This will check that each block location of the given block is + * satisfying the expected storage policy. If block location is not satisfied + * the policy then find out the target node with the expected storage type to + * satisfy the storage policy. + * + * @param blockMovingInfos + * - list of block source and target node pair + * @param blockInfo + * - block details + * @param expectedStorageTypes + * - list of expected storage type to satisfy the storage policy + * @param existing + * - list to get existing storage types + * @param storages + * - available storages + * @param liveDns + * - live datanodes which can be used as targets + * @param ecPolicy + * - ec policy of sps invoked file + * @return false if some of the block locations failed to find target node to + * satisfy the storage policy, true otherwise + */ + private boolean computeBlockMovingInfos( + List blockMovingInfos, LocatedBlock blockInfo, + List expectedStorageTypes, List existing, + DatanodeInfo[] storages, DatanodeMap liveDns, + ErasureCodingPolicy ecPolicy) { + boolean foundMatchingTargetNodesForBlock = true; + if (!removeOverlapBetweenStorageTypes(expectedStorageTypes, + existing, true)) { + List sourceWithStorageMap = + new ArrayList(); + List existingBlockStorages = new ArrayList( + Arrays.asList(storages)); + + // Add existing storages into exclude nodes to avoid choosing this as + // remote target later. + List excludeNodes = new ArrayList<>(existingBlockStorages); + + // if expected type exists in source node already, local movement would be + // possible, so lets find such sources first. + Iterator iterator = existingBlockStorages.iterator(); + while (iterator.hasNext()) { + DatanodeInfoWithStorage dnInfo = (DatanodeInfoWithStorage) iterator + .next(); + if (checkSourceAndTargetTypeExists(dnInfo, existing, + expectedStorageTypes, liveDns)) { + sourceWithStorageMap + .add(new StorageTypeNodePair(dnInfo.getStorageType(), dnInfo)); + iterator.remove(); + existing.remove(dnInfo.getStorageType()); + } + } + + // Let's find sources for existing types left. + for (StorageType existingType : existing) { + iterator = existingBlockStorages.iterator(); + while (iterator.hasNext()) { + DatanodeInfoWithStorage dnStorageInfo = + (DatanodeInfoWithStorage) iterator.next(); + StorageType storageType = dnStorageInfo.getStorageType(); + if (storageType == existingType) { + iterator.remove(); + sourceWithStorageMap.add(new StorageTypeNodePair(storageType, + dnStorageInfo)); + break; + } + } + } + + EnumMap> targetDns = + findTargetsForExpectedStorageTypes(expectedStorageTypes, liveDns); + + foundMatchingTargetNodesForBlock |= findSourceAndTargetToMove( + blockMovingInfos, blockInfo, sourceWithStorageMap, + expectedStorageTypes, targetDns, + ecPolicy, excludeNodes); + } + return foundMatchingTargetNodesForBlock; + } + + /** + * Find the good target node for each source node for which block storages was + * misplaced. + * + * @param blockMovingInfos + * - list of block source and target node pair + * @param blockInfo + * - Block + * @param sourceWithStorageList + * - Source Datanode with storages list + * @param expectedTypes + * - Expecting storages to move + * @param targetDns + * - Available DNs for expected storage types + * @param ecPolicy + * - erasure coding policy of sps invoked file + * @param excludeNodes + * - existing source nodes, which has replica copy + * @return false if some of the block locations failed to find target node to + * satisfy the storage policy + */ + private boolean findSourceAndTargetToMove( + List blockMovingInfos, LocatedBlock blockInfo, + List sourceWithStorageList, + List expectedTypes, + EnumMap> targetDns, + ErasureCodingPolicy ecPolicy, List excludeNodes) { + boolean foundMatchingTargetNodesForBlock = true; + + // Looping over all the source node locations and choose the target + // storage within same node if possible. This is done separately to + // avoid choosing a target which already has this block. + for (int i = 0; i < sourceWithStorageList.size(); i++) { + StorageTypeNodePair existingTypeNodePair = sourceWithStorageList.get(i); + + // Check whether the block replica is already placed in the expected + // storage type in this source datanode. + if (!expectedTypes.contains(existingTypeNodePair.storageType)) { + StorageTypeNodePair chosenTarget = chooseTargetTypeInSameNode(blockInfo, + existingTypeNodePair.dn, targetDns, expectedTypes); + if (chosenTarget != null) { + if (blockInfo.isStriped()) { + buildStripedBlockMovingInfos(blockInfo, existingTypeNodePair.dn, + existingTypeNodePair.storageType, chosenTarget.dn, + chosenTarget.storageType, blockMovingInfos, + ecPolicy); + } else { + buildContinuousBlockMovingInfos(blockInfo, existingTypeNodePair.dn, + existingTypeNodePair.storageType, chosenTarget.dn, + chosenTarget.storageType, blockMovingInfos); + } + expectedTypes.remove(chosenTarget.storageType); + } + } + } + // If all the sources and targets are paired within same node, then simply + // return. + if (expectedTypes.size() <= 0) { + return foundMatchingTargetNodesForBlock; + } + // Looping over all the source node locations. Choose a remote target + // storage node if it was not found out within same node. + for (int i = 0; i < sourceWithStorageList.size(); i++) { + StorageTypeNodePair existingTypeNodePair = sourceWithStorageList.get(i); + StorageTypeNodePair chosenTarget = null; + // Chosen the target storage within same datanode. So just skipping this + // source node. + if (checkIfAlreadyChosen(blockMovingInfos, existingTypeNodePair.dn)) { + continue; + } + if (chosenTarget == null && dnCacheMgr.getCluster().isNodeGroupAware()) { + chosenTarget = chooseTarget(blockInfo, existingTypeNodePair.dn, + expectedTypes, Matcher.SAME_NODE_GROUP, targetDns, + excludeNodes); + } + + // Then, match nodes on the same rack + if (chosenTarget == null) { + chosenTarget = + chooseTarget(blockInfo, existingTypeNodePair.dn, expectedTypes, + Matcher.SAME_RACK, targetDns, excludeNodes); + } + + if (chosenTarget == null) { + chosenTarget = + chooseTarget(blockInfo, existingTypeNodePair.dn, expectedTypes, + Matcher.ANY_OTHER, targetDns, excludeNodes); + } + if (null != chosenTarget) { + if (blockInfo.isStriped()) { + buildStripedBlockMovingInfos(blockInfo, existingTypeNodePair.dn, + existingTypeNodePair.storageType, chosenTarget.dn, + chosenTarget.storageType, blockMovingInfos, ecPolicy); + } else { + buildContinuousBlockMovingInfos(blockInfo, existingTypeNodePair.dn, + existingTypeNodePair.storageType, chosenTarget.dn, + chosenTarget.storageType, blockMovingInfos); + } + + expectedTypes.remove(chosenTarget.storageType); + excludeNodes.add(chosenTarget.dn); + } else { + LOG.warn( + "Failed to choose target datanode for the required" + + " storage types {}, block:{}, existing storage type:{}", + expectedTypes, blockInfo, existingTypeNodePair.storageType); + } + } + + if (expectedTypes.size() > 0) { + foundMatchingTargetNodesForBlock = false; + } + + return foundMatchingTargetNodesForBlock; + } + + private boolean checkIfAlreadyChosen(List blockMovingInfos, + DatanodeInfo dn) { + for (BlockMovingInfo blockMovingInfo : blockMovingInfos) { + if (blockMovingInfo.getSource().equals(dn)) { + return true; + } + } + return false; + } + + private void buildContinuousBlockMovingInfos(LocatedBlock blockInfo, + DatanodeInfo sourceNode, StorageType sourceStorageType, + DatanodeInfo targetNode, StorageType targetStorageType, + List blkMovingInfos) { + Block blk = ExtendedBlock.getLocalBlock(blockInfo.getBlock()); + BlockMovingInfo blkMovingInfo = new BlockMovingInfo(blk, sourceNode, + targetNode, sourceStorageType, targetStorageType); + blkMovingInfos.add(blkMovingInfo); + } + + private void buildStripedBlockMovingInfos(LocatedBlock blockInfo, + DatanodeInfo sourceNode, StorageType sourceStorageType, + DatanodeInfo targetNode, StorageType targetStorageType, + List blkMovingInfos, ErasureCodingPolicy ecPolicy) { + // For a striped block, it needs to construct internal block at the given + // index of a block group. Here it is iterating over all the block indices + // and construct internal blocks which can be then considered for block + // movement. + LocatedStripedBlock sBlockInfo = (LocatedStripedBlock) blockInfo; + byte[] indices = sBlockInfo.getBlockIndices(); + DatanodeInfo[] locations = sBlockInfo.getLocations(); + for (int i = 0; i < indices.length; i++) { + byte blkIndex = indices[i]; + if (blkIndex >= 0) { + // pick block movement only for the given source node. + if (sourceNode.equals(locations[i])) { + // construct internal block + ExtendedBlock extBlock = sBlockInfo.getBlock(); + long numBytes = StripedBlockUtil.getInternalBlockLength( + extBlock.getNumBytes(), ecPolicy, blkIndex); + Block blk = new Block(ExtendedBlock.getLocalBlock(extBlock)); + long blkId = blk.getBlockId() + blkIndex; + blk.setBlockId(blkId); + blk.setNumBytes(numBytes); + BlockMovingInfo blkMovingInfo = new BlockMovingInfo(blk, sourceNode, + targetNode, sourceStorageType, targetStorageType); + blkMovingInfos.add(blkMovingInfo); + } + } + } + } + + /** + * Choose the target storage within same datanode if possible. + * + * @param blockInfo + * - block info + * @param source + * - source datanode + * @param targetDns + * - set of target datanodes with its respective storage type + * @param targetTypes + * - list of target storage types + */ + private StorageTypeNodePair chooseTargetTypeInSameNode(LocatedBlock blockInfo, + DatanodeInfo source, + EnumMap> targetDns, + List targetTypes) { + for (StorageType t : targetTypes) { + List targetNodeStorages = + targetDns.get(t); + if (targetNodeStorages == null) { + continue; + } + for (DatanodeWithStorage.StorageDetails targetNode : targetNodeStorages) { + if (targetNode.getDatanodeInfo().equals(source)) { + // Good target with enough space to write the given block size. + if (targetNode.hasSpaceForScheduling(blockInfo.getBlockSize())) { + targetNode.incScheduledSize(blockInfo.getBlockSize()); + return new StorageTypeNodePair(t, source); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Datanode:{} storage type:{} doesn't have sufficient " + + "space:{} to move the target block size:{}", + source, t, targetNode, blockInfo.getBlockSize()); + } + } + } + } + return null; + } + + private StorageTypeNodePair chooseTarget(LocatedBlock block, + DatanodeInfo source, List targetTypes, Matcher matcher, + EnumMap> + locsForExpectedStorageTypes, List excludeNodes) { + for (StorageType t : targetTypes) { + List nodesWithStorages = + locsForExpectedStorageTypes.get(t); + if (nodesWithStorages == null || nodesWithStorages.isEmpty()) { + continue; // no target nodes with the required storage type. + } + Collections.shuffle(nodesWithStorages); + for (DatanodeWithStorage.StorageDetails targetNode : nodesWithStorages) { + DatanodeInfo target = targetNode.getDatanodeInfo(); + if (!excludeNodes.contains(target) + && matcher.match(dnCacheMgr.getCluster(), source, target)) { + // Good target with enough space to write the given block size. + if (targetNode.hasSpaceForScheduling(block.getBlockSize())) { + targetNode.incScheduledSize(block.getBlockSize()); + return new StorageTypeNodePair(t, target); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Datanode:{} storage type:{} doesn't have sufficient " + + "space:{} to move the target block size:{}", + target, t, targetNode, block.getBlockSize()); + } + } + } + } + return null; + } + + /** + * Keeps datanode with its respective storage type. + */ + static final class StorageTypeNodePair { + private final StorageType storageType; + private final DatanodeInfo dn; + + StorageTypeNodePair(StorageType storageType, DatanodeInfo dn) { + this.storageType = storageType; + this.dn = dn; + } + + public DatanodeInfo getDatanodeInfo() { + return dn; + } + + public StorageType getStorageType() { + return storageType; + } + + @Override + public String toString() { + return new StringBuilder().append("StorageTypeNodePair(\n ") + .append("DatanodeInfo: ").append(dn).append(", StorageType: ") + .append(storageType).toString(); + } + } + + private EnumMap> + findTargetsForExpectedStorageTypes(List expected, + DatanodeMap liveDns) { + EnumMap> targetsMap = + new EnumMap>( + StorageType.class); + + for (StorageType storageType : expected) { + List nodes = liveDns.getTarget(storageType); + if (nodes == null) { + return targetsMap; + } + List listNodes = targetsMap + .get(storageType); + if (listNodes == null) { + listNodes = new ArrayList<>(); + targetsMap.put(storageType, listNodes); + } + + for (DatanodeWithStorage n : nodes) { + final DatanodeWithStorage.StorageDetails node = getMaxRemaining(n, + storageType); + if (node != null) { + listNodes.add(node); + } + } + } + return targetsMap; + } + + private static DatanodeWithStorage.StorageDetails getMaxRemaining( + DatanodeWithStorage node, StorageType storageType) { + long max = 0L; + DatanodeWithStorage.StorageDetails nodeInfo = null; + List storages = node + .getNodesWithStorages(storageType); + for (DatanodeWithStorage.StorageDetails n : storages) { + if (n.availableSizeToMove() > max) { + max = n.availableSizeToMove(); + nodeInfo = n; + } + } + return nodeInfo; + } + + private boolean checkSourceAndTargetTypeExists(DatanodeInfo dn, + List existingStorageTypes, + List expectedStorageTypes, DatanodeMap liveDns) { + boolean isExpectedTypeAvailable = false; + boolean isExistingTypeAvailable = false; + for (DatanodeWithStorage liveDn : liveDns.getTargets()) { + if (dn.equals(liveDn.datanode)) { + for (StorageType eachType : liveDn.getStorageTypes()) { + if (existingStorageTypes.contains(eachType)) { + isExistingTypeAvailable = true; + } + if (expectedStorageTypes.contains(eachType)) { + isExpectedTypeAvailable = true; + } + if (isExistingTypeAvailable && isExpectedTypeAvailable) { + return true; + } + } + } + } + return isExistingTypeAvailable && isExpectedTypeAvailable; + } + + /** + * Maintains storage type map with the available datanodes in the cluster. + */ + public static class DatanodeMap { + private final EnumMap> targetsMap = + new EnumMap>(StorageType.class); + + private List targets = new ArrayList<>(); + + /** + * Build datanode map with the available storage types. + * + * @param node + * datanode + * @param storageTypes + * list of available storage types in the given datanode + * @param maxSize2Move + * available space which can be used for scheduling block move + */ + void addTarget(DatanodeInfo node, List storageTypes, + List maxSize2Move) { + DatanodeWithStorage nodeStorage = new DatanodeWithStorage(node); + targets.add(nodeStorage); + for (int i = 0; i < storageTypes.size(); i++) { + StorageType type = storageTypes.get(i); + List nodeStorages = targetsMap.get(type); + nodeStorage.addStorageType(type, maxSize2Move.get(i)); + if (nodeStorages == null) { + nodeStorages = new LinkedList<>(); + targetsMap.put(type, nodeStorages); + } + nodeStorages.add(nodeStorage); + } + } + + List getTarget(StorageType storageType) { + return targetsMap.get(storageType); + } + + public List getTargets() { + return targets; + } + + void reset() { + targetsMap.clear(); + } + } + + /** + * Keeps datanode with its respective set of supported storage types. It holds + * the available space in each volumes and will be used while pairing the + * target datanodes. + */ + public static final class DatanodeWithStorage { + private final EnumMap> storageMap = + new EnumMap>(StorageType.class); + private final DatanodeInfo datanode; + + private DatanodeWithStorage(DatanodeInfo datanode) { + this.datanode = datanode; + } + + public DatanodeInfo getDatanodeInfo() { + return datanode; + } + + Set getStorageTypes() { + return storageMap.keySet(); + } + + private void addStorageType(StorageType t, long maxSize2Move) { + List nodesWithStorages = getNodesWithStorages(t); + if (nodesWithStorages == null) { + nodesWithStorages = new LinkedList(); + storageMap.put(t, nodesWithStorages); + } + nodesWithStorages.add(new StorageDetails(maxSize2Move)); + } + + /** + * Returns datanode storages which has the given storage type. + * + * @param type + * - storage type + * @return datanodes for the given storage type + */ + private List getNodesWithStorages(StorageType type) { + return storageMap.get(type); + } + + @Override + public String toString() { + return new StringBuilder().append("DatanodeWithStorageInfo(\n ") + .append("Datanode: ").append(datanode).append(" StorageTypeNodeMap: ") + .append(storageMap).append(")").toString(); + } + + /** Storage details in a datanode storage type. */ + final class StorageDetails { + private final long maxSize2Move; + private long scheduledSize = 0L; + + private StorageDetails(long maxSize2Move) { + this.maxSize2Move = maxSize2Move; + } + + private DatanodeInfo getDatanodeInfo() { + return DatanodeWithStorage.this.datanode; + } + + /** + * Checks whether this datanode storage has sufficient space to occupy the + * given block size. + */ + private synchronized boolean hasSpaceForScheduling(long size) { + return availableSizeToMove() > size; + } + + /** + * @return the total number of bytes that need to be moved. + */ + private synchronized long availableSizeToMove() { + return maxSize2Move - scheduledSize; + } + + /** Increment scheduled size. */ + private synchronized void incScheduledSize(long size) { + scheduledSize += size; + } + + @Override + public String toString() { + return new StringBuilder().append("StorageDetails(\n ") + .append("maxSize2Move: ").append(maxSize2Move) + .append(" scheduledSize: ").append(scheduledSize).append(")") + .toString(); + } + } + } + + /** + * Receives storage movement attempt finished block report. + * + * @param dnInfo + * reported datanode + * @param storageType + * - storage type + * @param block + * movement attempt finished block. + */ + @Override + public void notifyStorageMovementAttemptFinishedBlk(DatanodeInfo dnInfo, + StorageType storageType, Block block) { + storageMovementsMonitor.notifyReportedBlock(dnInfo, storageType, block); + } + + @VisibleForTesting + public BlockStorageMovementAttemptedItems getAttemptedItemsMonitor() { + return storageMovementsMonitor; + } + + /** + * Clear the queues from to be storage movement needed lists and items tracked + * in storage movement monitor. + */ + public void clearQueues() { + LOG.warn("Clearing all the queues from StoragePolicySatisfier. So, " + + "user requests on satisfying block storages would be discarded."); + storageMovementNeeded.clearAll(); + } + + /** + * This class contains information of an attempted blocks and its last + * attempted or reported time stamp. This is used by + * {@link BlockStorageMovementAttemptedItems#storageMovementAttemptedItems}. + */ + final static class AttemptedItemInfo extends ItemInfo { + private long lastAttemptedOrReportedTime; + private final Set blocks; + + /** + * AttemptedItemInfo constructor. + * + * @param rootId + * rootId for trackId + * @param trackId + * trackId for file. + * @param lastAttemptedOrReportedTime + * last attempted or reported time + * @param blocks + * scheduled blocks + * @param retryCount + * file retry count + */ + AttemptedItemInfo(long rootId, long trackId, + long lastAttemptedOrReportedTime, + Set blocks, int retryCount) { + super(rootId, trackId, retryCount); + this.lastAttemptedOrReportedTime = lastAttemptedOrReportedTime; + this.blocks = blocks; + } + + /** + * @return last attempted or reported time stamp. + */ + long getLastAttemptedOrReportedTime() { + return lastAttemptedOrReportedTime; + } + + /** + * Update lastAttemptedOrReportedTime, so that the expiration time will be + * postponed to future. + */ + void touchLastReportedTimeStamp() { + this.lastAttemptedOrReportedTime = monotonicNow(); + } + + Set getBlocks() { + return this.blocks; + } + } + + @Override + public void addFileToProcess(ItemInfo trackInfo, boolean scanCompleted) { + storageMovementNeeded.add(trackInfo, scanCompleted); + if (LOG.isDebugEnabled()) { + LOG.debug("Added track info for inode {} to block " + + "storageMovementNeeded queue", trackInfo.getFile()); + } + } + + @Override + public void addAllFilesToProcess(long startPath, List itemInfoList, + boolean scanCompleted) { + getStorageMovementQueue().addAll(startPath, itemInfoList, scanCompleted); + } + + @Override + public int processingQueueSize() { + return storageMovementNeeded.size(); + } + + @Override + public Configuration getConf() { + return conf; + } + + @VisibleForTesting + public BlockStorageMovementNeeded getStorageMovementQueue() { + return storageMovementNeeded; + } + + @Override + public void markScanCompletedForPath(long inodeId) { + getStorageMovementQueue().markScanCompletedForDir(inodeId); + } + + /** + * Join main SPS thread. + */ + public void join() throws InterruptedException { + storagePolicySatisfierThread.join(); + } + + /** + * Remove the overlap between the expected types and the existing types. + * + * @param expected + * - Expected storage types list. + * @param existing + * - Existing storage types list. + * @param ignoreNonMovable + * ignore non-movable storage types by removing them from both + * expected and existing storage type list to prevent non-movable + * storage from being moved. + * @returns if the existing types or the expected types is empty after + * removing the overlap. + */ + private static boolean removeOverlapBetweenStorageTypes( + List expected, + List existing, boolean ignoreNonMovable) { + for (Iterator i = existing.iterator(); i.hasNext();) { + final StorageType t = i.next(); + if (expected.remove(t)) { + i.remove(); + } + } + if (ignoreNonMovable) { + removeNonMovable(existing); + removeNonMovable(expected); + } + return expected.isEmpty() || existing.isEmpty(); + } + + private static void removeNonMovable(List types) { + for (Iterator i = types.iterator(); i.hasNext();) { + final StorageType t = i.next(); + if (!t.isMovable()) { + i.remove(); + } + } + } + + /** + * Get DFS_SPS_WORK_MULTIPLIER_PER_ITERATION from + * configuration. + * + * @param conf Configuration + * @return Value of DFS_SPS_WORK_MULTIPLIER_PER_ITERATION + */ + private static int getSPSWorkMultiplier(Configuration conf) { + int spsWorkMultiplier = conf + .getInt( + DFSConfigKeys.DFS_SPS_WORK_MULTIPLIER_PER_ITERATION, + DFSConfigKeys.DFS_SPS_WORK_MULTIPLIER_PER_ITERATION_DEFAULT); + Preconditions.checkArgument( + (spsWorkMultiplier > 0), + DFSConfigKeys.DFS_SPS_WORK_MULTIPLIER_PER_ITERATION + + " = '" + spsWorkMultiplier + "' is invalid. " + + "It should be a positive, non-zero integer value."); + return spsWorkMultiplier; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfyManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfyManager.java new file mode 100644 index 00000000000..074eab64ba0 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/StoragePolicySatisfyManager.java @@ -0,0 +1,293 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.sps; + +import java.io.IOException; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Queue; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.namenode.Namesystem; +import org.apache.hadoop.hdfs.server.sps.ExternalStoragePolicySatisfier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +/** + * This manages satisfy storage policy invoked path ids and expose methods to + * process these path ids. It maintains sps mode(EXTERNAL/NONE) + * configured by the administrator. + * + *

+ * If the configured mode is {@link StoragePolicySatisfierMode.EXTERNAL}, then + * it won't do anything, just maintains the sps invoked path ids. Administrator + * requires to start external sps service explicitly, to fetch the sps invoked + * path ids from namenode, then do necessary computations and block movement in + * order to satisfy the storage policy. Please refer + * {@link ExternalStoragePolicySatisfier} class to understand more about the + * external sps service functionality. + * + *

+ * If the configured mode is {@link StoragePolicySatisfierMode.NONE}, then it + * will disable the sps feature completely by clearing all queued up sps path's + * hint. + * + * This class is instantiated by the BlockManager. + */ +public class StoragePolicySatisfyManager { + private static final Logger LOG = LoggerFactory + .getLogger(StoragePolicySatisfyManager.class); + private final StoragePolicySatisfier spsService; + private final boolean storagePolicyEnabled; + private volatile StoragePolicySatisfierMode mode; + private final Queue pathsToBeTraveresed; + private final int outstandingPathsLimit; + private final Namesystem namesystem; + + public StoragePolicySatisfyManager(Configuration conf, + Namesystem namesystem) { + // StoragePolicySatisfier(SPS) configs + storagePolicyEnabled = conf.getBoolean( + DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY, + DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT); + String modeVal = conf.get( + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_DEFAULT); + outstandingPathsLimit = conf.getInt( + DFSConfigKeys.DFS_SPS_MAX_OUTSTANDING_PATHS_KEY, + DFSConfigKeys.DFS_SPS_MAX_OUTSTANDING_PATHS_DEFAULT); + mode = StoragePolicySatisfierMode.fromString(modeVal); + pathsToBeTraveresed = new LinkedList(); + this.namesystem = namesystem; + // instantiate SPS service by just keeps config reference and not starting + // any supporting threads. + spsService = new StoragePolicySatisfier(conf); + } + + /** + * This function will do following logic based on the configured sps mode: + * + *

+ * If the configured mode is {@link StoragePolicySatisfierMode.EXTERNAL}, then + * it won't do anything. Administrator requires to start external sps service + * explicitly. + * + *

+ * If the configured mode is {@link StoragePolicySatisfierMode.NONE}, then the + * service is disabled and won't do any action. + */ + public void start() { + if (!storagePolicyEnabled) { + LOG.info("Disabling StoragePolicySatisfier service as {} set to {}.", + DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY, storagePolicyEnabled); + return; + } + + switch (mode) { + case EXTERNAL: + LOG.info("Storage policy satisfier is configured as external, " + + "please start external sps service explicitly to satisfy policy"); + break; + case NONE: + LOG.info("Storage policy satisfier is disabled"); + break; + default: + LOG.info("Given mode: {} is invalid", mode); + break; + } + } + + /** + * This function will do following logic based on the configured sps mode: + * + *

+ * If the configured mode is {@link StoragePolicySatisfierMode.EXTERNAL}, then + * it won't do anything. Administrator requires to stop external sps service + * explicitly, if needed. + * + *

+ * If the configured mode is {@link StoragePolicySatisfierMode.NONE}, then the + * service is disabled and won't do any action. + */ + public void stop() { + if (!storagePolicyEnabled) { + if (LOG.isDebugEnabled()) { + LOG.debug("Storage policy is not enabled, ignoring"); + } + return; + } + + switch (mode) { + case EXTERNAL: + removeAllPathIds(); + if (LOG.isDebugEnabled()) { + LOG.debug( + "Storage policy satisfier service is running outside namenode" + + ", ignoring"); + } + break; + case NONE: + if (LOG.isDebugEnabled()) { + LOG.debug("Storage policy satisfier is not enabled, ignoring"); + } + break; + default: + if (LOG.isDebugEnabled()) { + LOG.debug("Invalid mode:{}, ignoring", mode); + } + break; + } + } + + /** + * Sets new sps mode. If the new mode is none, then it will disable the sps + * feature completely by clearing all queued up sps path's hint. + */ + public void changeModeEvent(StoragePolicySatisfierMode newMode) { + if (!storagePolicyEnabled) { + LOG.info("Failed to change storage policy satisfier as {} set to {}.", + DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY, storagePolicyEnabled); + return; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Updating SPS service status, current mode:{}, new mode:{}", + mode, newMode); + } + + switch (newMode) { + case EXTERNAL: + if (mode == newMode) { + LOG.info("Storage policy satisfier is already in mode:{}," + + " so ignoring change mode event.", newMode); + return; + } + spsService.stopGracefully(); + break; + case NONE: + if (mode == newMode) { + LOG.info("Storage policy satisfier is already disabled, mode:{}" + + " so ignoring change mode event.", newMode); + return; + } + LOG.info("Disabling StoragePolicySatisfier, mode:{}", newMode); + spsService.stop(true); + clearPathIds(); + break; + default: + if (LOG.isDebugEnabled()) { + LOG.debug("Given mode: {} is invalid", newMode); + } + break; + } + + // update sps mode + mode = newMode; + } + + /** + * @return true if the internal storage policy satisfier daemon is running, + * false otherwise. + */ + @VisibleForTesting + public boolean isSatisfierRunning() { + return spsService.isRunning(); + } + + /** + * @return the next SPS path id, on which path users has invoked to satisfy + * storages. + */ + public Long getNextPathId() { + synchronized (pathsToBeTraveresed) { + return pathsToBeTraveresed.poll(); + } + } + + /** + * Verify that satisfier queue limit exceeds allowed outstanding limit. + */ + public void verifyOutstandingPathQLimit() throws IOException { + long size = pathsToBeTraveresed.size(); + // Checking that the SPS call Q exceeds the allowed limit. + if (outstandingPathsLimit - size <= 0) { + LOG.debug("Satisifer Q - outstanding limit:{}, current size:{}", + outstandingPathsLimit, size); + throw new IOException("Outstanding satisfier queue limit: " + + outstandingPathsLimit + " exceeded, try later!"); + } + } + + /** + * Removes the SPS path id from the list of sps paths. + * + * @throws IOException + */ + private void clearPathIds(){ + synchronized (pathsToBeTraveresed) { + Iterator iterator = pathsToBeTraveresed.iterator(); + while (iterator.hasNext()) { + Long trackId = iterator.next(); + try { + namesystem.removeXattr(trackId, + HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY); + } catch (IOException e) { + LOG.debug("Failed to remove sps xatttr!", e); + } + iterator.remove(); + } + } + } + + /** + * Clean up all sps path ids. + */ + public void removeAllPathIds() { + synchronized (pathsToBeTraveresed) { + pathsToBeTraveresed.clear(); + } + } + + /** + * Adds the sps path to SPSPathIds list. + */ + public void addPathId(long id) { + synchronized (pathsToBeTraveresed) { + pathsToBeTraveresed.add(id); + } + } + + /** + * @return true if sps is configured as an external + * service, false otherwise. + */ + public boolean isEnabled() { + return mode == StoragePolicySatisfierMode.EXTERNAL; + } + + /** + * @return sps service mode. + */ + public StoragePolicySatisfierMode getMode() { + return mode; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/package-info.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/package-info.java new file mode 100644 index 00000000000..d1d69fb388b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/sps/package-info.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package provides a mechanism for satisfying the storage policy of a + * path. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.hdfs.server.namenode.sps; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index a8ab798299d..619d439fbaa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -56,8 +56,8 @@ import javax.ws.rs.core.Response.ResponseBuilder; import javax.ws.rs.core.Response.Status; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileEncryptionInfo; @@ -117,7 +117,8 @@ @Path("") @ResourceFilters(ParamFilter.class) public class NamenodeWebHdfsMethods { - public static final Log LOG = LogFactory.getLog(NamenodeWebHdfsMethods.class); + public static final Logger LOG = + LoggerFactory.getLogger(NamenodeWebHdfsMethods.class); private static final UriFsPathParam ROOT = new UriFsPathParam(""); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockStorageMovementCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockStorageMovementCommand.java new file mode 100644 index 00000000000..e90317da1a8 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlockStorageMovementCommand.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.protocol; + +import java.util.Collection; + +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; + +/** + * A BlockStorageMovementCommand is an instruction to a DataNode to move the + * given set of blocks to specified target DataNodes to fulfill the block + * storage policy. + * + * Upon receiving this command, this DataNode pass the array of block movement + * details to + * {@link org.apache.hadoop.hdfs.server.datanode.StoragePolicySatisfyWorker} + * service. Later, StoragePolicySatisfyWorker will schedule block movement tasks + * for these blocks and monitors the completion of each task. After the block + * movement attempt is finished(with success or failure) this DataNode will send + * response back to NameNode about the block movement attempt finished details. + */ +public class BlockStorageMovementCommand extends DatanodeCommand { + private final String blockPoolId; + private final Collection blockMovingTasks; + + /** + * Block storage movement command constructor. + * + * @param action + * protocol specific action + * @param blockMovingInfos + * block to storage info that will be used for movement + */ + public BlockStorageMovementCommand(int action, String blockPoolId, + Collection blockMovingInfos) { + super(action); + this.blockPoolId = blockPoolId; + this.blockMovingTasks = blockMovingInfos; + } + + /** + * Returns block pool ID. + */ + public String getBlockPoolId() { + return blockPoolId; + } + + /** + * Returns the list of blocks to be moved. + */ + public Collection getBlockMovingTasks() { + return blockMovingTasks; + } + + /** + * Stores block to storage info that can be used for block movement. + */ + public static class BlockMovingInfo { + private Block blk; + private DatanodeInfo sourceNode; + private DatanodeInfo targetNode; + private StorageType sourceStorageType; + private StorageType targetStorageType; + + /** + * Block to storage info constructor. + * + * @param block + * block info + * @param sourceDnInfo + * node that can be the source of a block move + * @param srcStorageType + * type of source storage media + */ + public BlockMovingInfo(Block block, DatanodeInfo sourceDnInfo, + DatanodeInfo targetDnInfo, StorageType srcStorageType, + StorageType targetStorageType) { + this.blk = block; + this.sourceNode = sourceDnInfo; + this.targetNode = targetDnInfo; + this.sourceStorageType = srcStorageType; + this.targetStorageType = targetStorageType; + } + + public void addBlock(Block block) { + this.blk = block; + } + + public Block getBlock() { + return blk; + } + + public DatanodeInfo getSource() { + return sourceNode; + } + + public DatanodeInfo getTarget() { + return targetNode; + } + + public StorageType getTargetStorageType() { + return targetStorageType; + } + + public StorageType getSourceStorageType() { + return sourceStorageType; + } + + @Override + public String toString() { + return new StringBuilder().append("BlockMovingInfo(\n ") + .append("Moving block: ").append(blk).append(" From: ") + .append(sourceNode).append(" To: [").append(targetNode).append("\n ") + .append(" sourceStorageType: ").append(sourceStorageType) + .append(" targetStorageType: ").append(targetStorageType).append(")") + .toString(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksStorageMoveAttemptFinished.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksStorageMoveAttemptFinished.java new file mode 100644 index 00000000000..c837e013ac6 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/BlocksStorageMoveAttemptFinished.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.protocol; + +import java.util.Arrays; + +import org.apache.hadoop.hdfs.protocol.Block; + +/** + * This class represents, the blocks for which storage movements has done by + * datanodes. The movementFinishedBlocks array contains all the blocks that are + * attempted to do the movement and it could be finished with either success or + * failure. + */ +public class BlocksStorageMoveAttemptFinished { + + private final Block[] movementFinishedBlocks; + + public BlocksStorageMoveAttemptFinished(Block[] moveAttemptFinishedBlocks) { + this.movementFinishedBlocks = moveAttemptFinishedBlocks; + } + + public Block[] getBlocks() { + return movementFinishedBlocks; + } + + @Override + public String toString() { + return new StringBuilder().append("BlocksStorageMovementFinished(\n ") + .append(" blockID: ").append(Arrays.toString(movementFinishedBlocks)) + .append(")").toString(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java index 1f55100af98..311b68f6248 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java @@ -79,6 +79,8 @@ final static int DNA_CACHE = 9; // cache blocks final static int DNA_UNCACHE = 10; // uncache blocks final static int DNA_ERASURE_CODING_RECONSTRUCTION = 11; // erasure coding reconstruction command + int DNA_BLOCK_STORAGE_MOVEMENT = 12; // block storage movement command + int DNA_DROP_SPS_WORK_COMMAND = 13; // drop sps work command /** * Register Datanode. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DropSPSWorkCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DropSPSWorkCommand.java new file mode 100644 index 00000000000..806f713fbb8 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DropSPSWorkCommand.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.protocol; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * A DropSPSWorkCommand is an instruction to a datanode to drop the SPSWorker's + * pending block storage movement queues. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class DropSPSWorkCommand extends DatanodeCommand { + public static final DropSPSWorkCommand DNA_DROP_SPS_WORK_COMMAND = + new DropSPSWorkCommand(); + + public DropSPSWorkCommand() { + super(DatanodeProtocol.DNA_DROP_SPS_WORK_COMMAND); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java index 0c8adc639a2..f80477b7f50 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java @@ -31,7 +31,8 @@ /***************************************************************************** * Protocol that a secondary NameNode uses to communicate with the NameNode. - * It's used to get part of the name node state + * Also used by external storage policy satisfier. It's used to get part of the + * name node state *****************************************************************************/ @KerberosInfo( serverPrincipal = DFSConfigKeys.DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY) @@ -202,5 +203,12 @@ public RemoteEditLogManifest getEditLogManifest(long sinceTxId) */ @Idempotent boolean isRollingUpgrade() throws IOException; + + /** + * @return Gets the next available sps path, otherwise null. This API used + * by External SPS. + */ + @AtMostOnce + Long getNextSPSPath() throws IOException; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalSPSBlockMoveTaskHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalSPSBlockMoveTaskHandler.java new file mode 100644 index 00000000000..3ea02947106 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalSPSBlockMoveTaskHandler.java @@ -0,0 +1,223 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.sps; + +import java.io.IOException; +import java.net.Socket; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtilClient; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver; +import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil; +import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.SaslDataTransferClient; +import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; +import org.apache.hadoop.hdfs.server.balancer.KeyManager; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; +import org.apache.hadoop.hdfs.server.common.sps.BlockDispatcher; +import org.apache.hadoop.hdfs.server.common.sps.BlockMovementAttemptFinished; +import org.apache.hadoop.hdfs.server.common.sps.BlockMovementStatus; +import org.apache.hadoop.hdfs.server.common.sps.BlockStorageMovementTracker; +import org.apache.hadoop.hdfs.server.common.sps.BlocksMovementsStatusHandler; +import org.apache.hadoop.hdfs.server.namenode.sps.BlockMoveTaskHandler; +import org.apache.hadoop.hdfs.server.namenode.sps.SPSService; +import org.apache.hadoop.hdfs.server.protocol.BlockStorageMovementCommand.BlockMovingInfo; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.Daemon; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class handles the external SPS block movements. This will move the + * given block to a target datanode by directly establishing socket connection + * to it and invokes function + * {@link Sender#replaceBlock(ExtendedBlock, StorageType, Token, String, + * DatanodeInfo, String)}. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class ExternalSPSBlockMoveTaskHandler implements BlockMoveTaskHandler { + private static final Logger LOG = LoggerFactory + .getLogger(ExternalSPSBlockMoveTaskHandler.class); + + private final ExecutorService moveExecutor; + private final CompletionService mCompletionServ; + private final NameNodeConnector nnc; + private final SaslDataTransferClient saslClient; + private final BlockStorageMovementTracker blkMovementTracker; + private Daemon movementTrackerThread; + private final SPSService service; + private final BlockDispatcher blkDispatcher; + + public ExternalSPSBlockMoveTaskHandler(Configuration conf, + NameNodeConnector nnc, SPSService spsService) { + int moverThreads = conf.getInt(DFSConfigKeys.DFS_MOVER_MOVERTHREADS_KEY, + DFSConfigKeys.DFS_MOVER_MOVERTHREADS_DEFAULT); + moveExecutor = initializeBlockMoverThreadPool(moverThreads); + mCompletionServ = new ExecutorCompletionService<>(moveExecutor); + this.nnc = nnc; + this.saslClient = new SaslDataTransferClient(conf, + DataTransferSaslUtil.getSaslPropertiesResolver(conf), + TrustedChannelResolver.getInstance(conf), + nnc.getFallbackToSimpleAuth()); + this.blkMovementTracker = new BlockStorageMovementTracker( + mCompletionServ, new ExternalBlocksMovementsStatusHandler()); + this.service = spsService; + + boolean connectToDnViaHostname = conf.getBoolean( + HdfsClientConfigKeys.DFS_CLIENT_USE_DN_HOSTNAME, + HdfsClientConfigKeys.DFS_CLIENT_USE_DN_HOSTNAME_DEFAULT); + int ioFileBufferSize = DFSUtilClient.getIoFileBufferSize(conf); + blkDispatcher = new BlockDispatcher(HdfsConstants.READ_TIMEOUT, + ioFileBufferSize, connectToDnViaHostname); + + startMovementTracker(); + } + + /** + * Initializes block movement tracker daemon and starts the thread. + */ + private void startMovementTracker() { + movementTrackerThread = new Daemon(this.blkMovementTracker); + movementTrackerThread.setName("BlockStorageMovementTracker"); + movementTrackerThread.start(); + } + + private ThreadPoolExecutor initializeBlockMoverThreadPool(int num) { + LOG.debug("Block mover to satisfy storage policy; pool threads={}", num); + + ThreadPoolExecutor moverThreadPool = new ThreadPoolExecutor(1, num, 60, + TimeUnit.SECONDS, new SynchronousQueue(), + new Daemon.DaemonFactory() { + private final AtomicInteger threadIndex = new AtomicInteger(0); + + @Override + public Thread newThread(Runnable r) { + Thread t = super.newThread(r); + t.setName("BlockMoverTask-" + threadIndex.getAndIncrement()); + return t; + } + }, new ThreadPoolExecutor.CallerRunsPolicy() { + @Override + public void rejectedExecution(Runnable runnable, + ThreadPoolExecutor e) { + LOG.info("Execution for block movement to satisfy storage policy" + + " got rejected, Executing in current thread"); + // will run in the current thread. + super.rejectedExecution(runnable, e); + } + }); + + moverThreadPool.allowCoreThreadTimeOut(true); + return moverThreadPool; + } + + @Override + public void submitMoveTask(BlockMovingInfo blkMovingInfo) throws IOException { + // TODO: Need to increment scheduled block size on the target node. This + // count will be used to calculate the remaining space of target datanode + // during block movement assignment logic. In the internal movement, + // remaining space is bookkeeping at the DatanodeDescriptor, please refer + // IntraSPSNameNodeBlockMoveTaskHandler#submitMoveTask implementation and + // updating via the funcation call - + // dn.incrementBlocksScheduled(blkMovingInfo.getTargetStorageType()); + LOG.debug("Received BlockMovingTask {}", blkMovingInfo); + BlockMovingTask blockMovingTask = new BlockMovingTask(blkMovingInfo); + mCompletionServ.submit(blockMovingTask); + } + + private class ExternalBlocksMovementsStatusHandler + implements BlocksMovementsStatusHandler { + @Override + public void handle(BlockMovementAttemptFinished attemptedMove) { + service.notifyStorageMovementAttemptFinishedBlk( + attemptedMove.getTargetDatanode(), attemptedMove.getTargetType(), + attemptedMove.getBlock()); + } + } + + /** + * This class encapsulates the process of moving the block replica to the + * given target. + */ + private class BlockMovingTask + implements Callable { + private final BlockMovingInfo blkMovingInfo; + + BlockMovingTask(BlockMovingInfo blkMovingInfo) { + this.blkMovingInfo = blkMovingInfo; + } + + @Override + public BlockMovementAttemptFinished call() { + BlockMovementStatus blkMovementStatus = moveBlock(); + return new BlockMovementAttemptFinished(blkMovingInfo.getBlock(), + blkMovingInfo.getSource(), blkMovingInfo.getTarget(), + blkMovingInfo.getTargetStorageType(), + blkMovementStatus); + } + + private BlockMovementStatus moveBlock() { + ExtendedBlock eb = new ExtendedBlock(nnc.getBlockpoolID(), + blkMovingInfo.getBlock()); + + final KeyManager km = nnc.getKeyManager(); + Token accessToken; + try { + accessToken = km.getAccessToken(eb, + new StorageType[]{blkMovingInfo.getTargetStorageType()}, + new String[0]); + } catch (IOException e) { + // TODO: handle failure retries + LOG.warn( + "Failed to move block:{} from src:{} to destin:{} to satisfy " + + "storageType:{}", + blkMovingInfo.getBlock(), blkMovingInfo.getSource(), + blkMovingInfo.getTarget(), blkMovingInfo.getTargetStorageType(), e); + return BlockMovementStatus.DN_BLK_STORAGE_MOVEMENT_FAILURE; + } + return blkDispatcher.moveBlock(blkMovingInfo, saslClient, eb, + new Socket(), km, accessToken); + } + } + + /** + * Cleanup the resources. + */ + void cleanUp() { + blkMovementTracker.stopTracking(); + if (movementTrackerThread != null) { + movementTrackerThread.interrupt(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalSPSContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalSPSContext.java new file mode 100644 index 00000000000..8427e93a709 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalSPSContext.java @@ -0,0 +1,211 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.sps; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSUtilClient; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.namenode.sps.BlockMoveTaskHandler; +import org.apache.hadoop.hdfs.server.namenode.sps.BlockMovementListener; +import org.apache.hadoop.hdfs.server.namenode.sps.Context; +import org.apache.hadoop.hdfs.server.namenode.sps.FileCollector; +import org.apache.hadoop.hdfs.server.namenode.sps.SPSService; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier.DatanodeMap; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier.DatanodeWithStorage; +import org.apache.hadoop.hdfs.server.protocol.BlockStorageMovementCommand.BlockMovingInfo; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; +import org.apache.hadoop.net.NetworkTopology; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class used to connect to Namenode and gets the required information to + * SPS from Namenode state. + */ +@InterfaceAudience.Private +public class ExternalSPSContext implements Context { + public static final Logger LOG = LoggerFactory + .getLogger(ExternalSPSContext.class); + private final SPSService service; + private final NameNodeConnector nnc; + private final BlockStoragePolicySuite createDefaultSuite = + BlockStoragePolicySuite.createDefaultSuite(); + private final FileCollector fileCollector; + private final BlockMoveTaskHandler externalHandler; + private final BlockMovementListener blkMovementListener; + + public ExternalSPSContext(SPSService service, NameNodeConnector nnc) { + this.service = service; + this.nnc = nnc; + this.fileCollector = new ExternalSPSFilePathCollector(service); + this.externalHandler = new ExternalSPSBlockMoveTaskHandler( + service.getConf(), nnc, service); + this.blkMovementListener = new ExternalBlockMovementListener(); + } + + @Override + public boolean isRunning() { + return service.isRunning(); + } + + @Override + public boolean isInSafeMode() { + try { + return nnc != null ? nnc.getDistributedFileSystem().isInSafeMode() + : false; + } catch (IOException e) { + LOG.warn("Exception while creating Namenode Connector..", e); + return false; + } + } + + @Override + public NetworkTopology getNetworkTopology(DatanodeMap datanodeMap) { + // create network topology. + NetworkTopology cluster = NetworkTopology.getInstance(service.getConf()); + List targets = datanodeMap.getTargets(); + for (DatanodeWithStorage node : targets) { + cluster.add(node.getDatanodeInfo()); + } + return cluster; + } + + @Override + public boolean isFileExist(long path) { + Path filePath = DFSUtilClient.makePathFromFileId(path); + try { + return nnc.getDistributedFileSystem().exists(filePath); + } catch (IllegalArgumentException | IOException e) { + LOG.warn("Exception while getting file is for the given path:{}", + filePath, e); + } + return false; + } + + @Override + public BlockStoragePolicy getStoragePolicy(byte policyId) { + return createDefaultSuite.getPolicy(policyId); + } + + @Override + public void removeSPSHint(long inodeId) throws IOException { + Path filePath = DFSUtilClient.makePathFromFileId(inodeId); + try { + nnc.getDistributedFileSystem().removeXAttr(filePath, + HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY); + } catch (IOException e) { + List listXAttrs = nnc.getDistributedFileSystem() + .listXAttrs(filePath); + if (!listXAttrs + .contains(HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY)) { + LOG.info("SPS hint already removed for the inodeId:{}." + + " Ignoring exception:{}", inodeId, e.getMessage()); + } + } + } + + @Override + public int getNumLiveDataNodes() { + try { + return nnc.getDistributedFileSystem() + .getDataNodeStats(DatanodeReportType.LIVE).length; + } catch (IOException e) { + LOG.warn("Exception while getting number of live datanodes.", e); + } + return 0; + } + + @Override + public HdfsFileStatus getFileInfo(long path) throws IOException { + HdfsLocatedFileStatus fileInfo = null; + try { + Path filePath = DFSUtilClient.makePathFromFileId(path); + fileInfo = nnc.getDistributedFileSystem().getClient() + .getLocatedFileInfo(filePath.toString(), false); + } catch (FileNotFoundException e) { + LOG.debug("Path:{} doesn't exists!", path, e); + } + return fileInfo; + } + + @Override + public DatanodeStorageReport[] getLiveDatanodeStorageReport() + throws IOException { + return nnc.getLiveDatanodeStorageReport(); + } + + @Override + public Long getNextSPSPath() { + try { + return nnc.getNNProtocolConnection().getNextSPSPath(); + } catch (IOException e) { + LOG.warn("Exception while getting next sps path id from Namenode.", e); + return null; + } + } + + @Override + public void scanAndCollectFiles(long path) + throws IOException, InterruptedException { + fileCollector.scanAndCollectFiles(path); + } + + @Override + public void submitMoveTask(BlockMovingInfo blkMovingInfo) throws IOException { + externalHandler.submitMoveTask(blkMovingInfo); + } + + @Override + public void notifyMovementTriedBlocks(Block[] moveAttemptFinishedBlks) { + // External listener if it is plugged-in + if (blkMovementListener != null) { + blkMovementListener.notifyMovementTriedBlocks(moveAttemptFinishedBlks); + } + } + + /** + * Its an implementation of BlockMovementListener. + */ + private static class ExternalBlockMovementListener + implements BlockMovementListener { + + private List actualBlockMovements = new ArrayList<>(); + + @Override + public void notifyMovementTriedBlocks(Block[] moveAttemptFinishedBlks) { + for (Block block : moveAttemptFinishedBlks) { + actualBlockMovements.add(block); + } + LOG.info("Movement attempted blocks", actualBlockMovements); + } + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalSPSFilePathCollector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalSPSFilePathCollector.java new file mode 100644 index 00000000000..611ff659a5d --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalSPSFilePathCollector.java @@ -0,0 +1,174 @@ +package org.apache.hadoop.hdfs.server.sps; +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtilClient; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.protocol.DirectoryListing; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.hdfs.server.namenode.sps.FileCollector; +import org.apache.hadoop.hdfs.server.namenode.sps.ItemInfo; +import org.apache.hadoop.hdfs.server.namenode.sps.SPSService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is to scan the paths recursively. If file is directory, then it + * will scan for files recursively. If the file is non directory, then it will + * just submit the same file to process. This will use file string path + * representation. + */ +@InterfaceAudience.Private +public class ExternalSPSFilePathCollector implements FileCollector { + public static final Logger LOG = + LoggerFactory.getLogger(ExternalSPSFilePathCollector.class); + private DistributedFileSystem dfs; + private SPSService service; + private int maxQueueLimitToScan; + + public ExternalSPSFilePathCollector(SPSService service) { + this.service = service; + this.maxQueueLimitToScan = service.getConf().getInt( + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_QUEUE_LIMIT_KEY, + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_QUEUE_LIMIT_DEFAULT); + try { + // TODO: probably we could get this dfs from external context? but this is + // too specific to external. + dfs = getFS(service.getConf()); + } catch (IOException e) { + LOG.error("Unable to get the filesystem. Make sure Namenode running and " + + "configured namenode address is correct.", e); + } + } + + private DistributedFileSystem getFS(Configuration conf) throws IOException { + return (DistributedFileSystem) FileSystem + .get(FileSystem.getDefaultUri(conf), conf); + } + + /** + * Recursively scan the given path and add the file info to SPS service for + * processing. + */ + private long processPath(Long startID, String childPath) { + long pendingWorkCount = 0; // to be satisfied file counter + for (byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME;;) { + final DirectoryListing children; + try { + children = dfs.getClient().listPaths(childPath, + lastReturnedName, false); + } catch (IOException e) { + LOG.warn("Failed to list directory " + childPath + + ". Ignore the directory and continue.", e); + return pendingWorkCount; + } + if (children == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("The scanning start dir/sub dir " + childPath + + " does not have childrens."); + } + return pendingWorkCount; + } + + for (HdfsFileStatus child : children.getPartialListing()) { + if (child.isFile()) { + service.addFileToProcess(new ItemInfo(startID, child.getFileId()), + false); + checkProcessingQueuesFree(); + pendingWorkCount++; // increment to be satisfied file count + } else { + String childFullPathName = child.getFullName(childPath); + if (child.isDirectory()) { + if (!childFullPathName.endsWith(Path.SEPARATOR)) { + childFullPathName = childFullPathName + Path.SEPARATOR; + } + pendingWorkCount += processPath(startID, childFullPathName); + } + } + } + + if (children.hasMore()) { + lastReturnedName = children.getLastName(); + } else { + return pendingWorkCount; + } + } + } + + private void checkProcessingQueuesFree() { + int remainingCapacity = remainingCapacity(); + // wait for queue to be free + while (remainingCapacity <= 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("Waiting for storageMovementNeeded queue to be free!"); + } + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + remainingCapacity = remainingCapacity(); + } + } + + /** + * Returns queue remaining capacity. + */ + public int remainingCapacity() { + int size = service.processingQueueSize(); + int remainingSize = 0; + if (size < maxQueueLimitToScan) { + remainingSize = maxQueueLimitToScan - size; + } + if (LOG.isDebugEnabled()) { + LOG.debug("SPS processing Q -> maximum capacity:{}, current size:{}," + + " remaining size:{}", maxQueueLimitToScan, size, remainingSize); + } + return remainingSize; + } + + @Override + public void scanAndCollectFiles(long pathId) throws IOException { + if (dfs == null) { + dfs = getFS(service.getConf()); + } + Path filePath = DFSUtilClient.makePathFromFileId(pathId); + long pendingSatisfyItemsCount = processPath(pathId, filePath.toString()); + // Check whether the given path contains any item to be tracked + // or the no to be satisfied paths. In case of empty list, add the given + // inodeId to the 'pendingWorkForDirectory' with empty list so that later + // SPSPathIdProcessor#run function will remove the SPS hint considering that + // this path is already satisfied the storage policy. + if (pendingSatisfyItemsCount <= 0) { + LOG.debug("There is no pending items to satisfy the given path " + + "inodeId:{}", pathId); + service.addAllFilesToProcess(pathId, new ArrayList<>(), true); + } else { + service.markScanCompletedForPath(pathId); + } + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalStoragePolicySatisfier.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalStoragePolicySatisfier.java new file mode 100644 index 00000000000..15cdc6eb47b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/ExternalStoragePolicySatisfier.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.sps; + +import static org.apache.hadoop.util.ExitUtil.terminate; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.URI; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class starts and runs external SPS service. + */ +@InterfaceAudience.Private +public final class ExternalStoragePolicySatisfier { + public static final Logger LOG = LoggerFactory + .getLogger(ExternalStoragePolicySatisfier.class); + + private ExternalStoragePolicySatisfier() { + // This is just a class to start and run external sps. + } + + /** + * Main method to start SPS service. + */ + public static void main(String[] args) throws Exception { + NameNodeConnector nnc = null; + try { + StringUtils.startupShutdownMessage(StoragePolicySatisfier.class, args, + LOG); + HdfsConfiguration spsConf = new HdfsConfiguration(); + // login with SPS keytab + secureLogin(spsConf); + StoragePolicySatisfier sps = new StoragePolicySatisfier(spsConf); + nnc = getNameNodeConnector(spsConf); + + ExternalSPSContext context = new ExternalSPSContext(sps, nnc); + sps.init(context); + sps.start(StoragePolicySatisfierMode.EXTERNAL); + if (sps != null) { + sps.join(); + } + } catch (Throwable e) { + LOG.error("Failed to start storage policy satisfier.", e); + terminate(1, e); + } finally { + if (nnc != null) { + nnc.close(); + } + } + } + + private static void secureLogin(Configuration conf) + throws IOException { + UserGroupInformation.setConfiguration(conf); + String addr = conf.get(DFSConfigKeys.DFS_SPS_ADDRESS_KEY, + DFSConfigKeys.DFS_SPS_ADDRESS_DEFAULT); + InetSocketAddress socAddr = NetUtils.createSocketAddr(addr, 0, + DFSConfigKeys.DFS_SPS_ADDRESS_KEY); + SecurityUtil.login(conf, DFSConfigKeys.DFS_SPS_KEYTAB_FILE_KEY, + DFSConfigKeys.DFS_SPS_KERBEROS_PRINCIPAL_KEY, + socAddr.getHostName()); + } + + private static NameNodeConnector getNameNodeConnector(Configuration conf) + throws IOException, InterruptedException { + final Collection namenodes = DFSUtil.getInternalNsRpcUris(conf); + final Path externalSPSPathId = HdfsServerConstants.MOVER_ID_PATH; + while (true) { + try { + final List nncs = NameNodeConnector + .newNameNodeConnectors(namenodes, + ExternalStoragePolicySatisfier.class.getSimpleName(), + externalSPSPathId, conf, + NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS); + return nncs.get(0); + } catch (IOException e) { + LOG.warn("Failed to connect with namenode", e); + Thread.sleep(3000); // retry the connection after few secs + } + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/package-info.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/package-info.java new file mode 100644 index 00000000000..f705df2ba10 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/sps/package-info.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package provides a mechanism for satisfying the storage policy of a + * path. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.hdfs.server.sps; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java index 9781ea14dcf..9e7a3cb9c75 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java @@ -22,7 +22,7 @@ import java.util.LinkedList; import java.util.List; -import org.apache.commons.lang3.text.WordUtils; +import org.apache.commons.text.WordUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index 99a8e3e7886..aa67e72dd11 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -40,8 +40,8 @@ import com.google.common.base.Joiner; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.ReconfigurationTaskStatus; @@ -114,7 +114,7 @@ HdfsConfiguration.init(); } - private static final Log LOG = LogFactory.getLog(DFSAdmin.class); + private static final Logger LOG = LoggerFactory.getLogger(DFSAdmin.class); /** * An abstract class for the execution of a file system command @@ -549,6 +549,11 @@ public void report(String[] argv, int i) throws IOException { replicatedBlockStats.getMissingReplicaBlocks()); System.out.println("\tMissing blocks (with replication factor 1): " + replicatedBlockStats.getMissingReplicationOneBlocks()); + if (replicatedBlockStats.hasHighestPriorityLowRedundancyBlocks()) { + System.out.println("\tLow redundancy blocks with highest priority " + + "to recover: " + + replicatedBlockStats.getHighestPriorityLowRedundancyBlocks()); + } System.out.println("\tPending deletion blocks: " + replicatedBlockStats.getPendingDeletionBlocks()); @@ -561,6 +566,11 @@ public void report(String[] argv, int i) throws IOException { ecBlockGroupStats.getCorruptBlockGroups()); System.out.println("\tMissing block groups: " + ecBlockGroupStats.getMissingBlockGroups()); + if (ecBlockGroupStats.hasHighestPriorityLowRedundancyBlocks()) { + System.out.println("\tLow redundancy blocks with highest priority " + + "to recover: " + + ecBlockGroupStats.getHighestPriorityLowRedundancyBlocks()); + } System.out.println("\tPending deletion blocks: " + ecBlockGroupStats.getPendingDeletionBlocks()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java index 4c0ddb29859..71a66d47e11 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java @@ -21,8 +21,8 @@ import java.util.Arrays; import java.util.Collection; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.ha.HAAdmin; @@ -38,7 +38,7 @@ */ public class DFSHAAdmin extends HAAdmin { - private static final Log LOG = LogFactory.getLog(DFSHAAdmin.class); + private static final Logger LOG = LoggerFactory.getLogger(DFSHAAdmin.class); private String nameserviceId; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java index e0a4f70f301..5ae535a2c4f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java @@ -30,8 +30,8 @@ import java.util.List; import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -61,8 +61,8 @@ @InterfaceAudience.Private public class DFSZKFailoverController extends ZKFailoverController { - private static final Log LOG = - LogFactory.getLog(DFSZKFailoverController.class); + private static final Logger LOG = + LoggerFactory.getLogger(DFSZKFailoverController.class); private final AccessControlList adminAcl; /* the same as superclass's localTarget, but with the more specfic NN type */ private final NNHAServiceTarget localNNTarget; @@ -194,7 +194,7 @@ public static void main(String args[]) try { System.exit(zkfc.run(parser.getRemainingArgs())); } catch (Throwable t) { - LOG.fatal("DFSZKFailOverController exiting due to earlier exception " + LOG.error("DFSZKFailOverController exiting due to earlier exception " + t); terminate(1, t); } @@ -243,7 +243,7 @@ private void getLocalNNThreadDump() { new StringBuilder("-- Local NN thread dump -- \n"); localNNThreadDumpContent.append(out); localNNThreadDumpContent.append("\n -- Local NN thread dump -- "); - LOG.info(localNNThreadDumpContent); + LOG.info("{}", localNNThreadDumpContent.toString()); isThreadDumpCaptured = true; } catch (IOException e) { LOG.warn("Can't get local NN thread dump due to " + e.getMessage()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java index 10b0012016b..31714972c9e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java @@ -210,8 +210,14 @@ private Integer listCorruptFileBlocks(String dir, String baseUrl) allDone = true; break; } + if (line.startsWith("Access denied for user")) { + out.println("Failed to open path '" + dir + "': Permission denied"); + errCode = -1; + return errCode; + } if ((line.isEmpty()) || (line.startsWith("FSCK started by")) + || (line.startsWith("FSCK ended at")) || (line.startsWith("The filesystem under path"))) continue; numCorrupt++; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java index c6ea91ca872..7d1e88fde76 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java @@ -28,8 +28,8 @@ import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Options; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -57,8 +57,8 @@ private static final String CANCEL = "cancel"; private static final String HELP = "help"; private static final String HELP_SHORT = "h"; - private static final Log LOG = LogFactory - .getLog(DelegationTokenFetcher.class); + private static final Logger LOG = LoggerFactory + .getLogger(DelegationTokenFetcher.class); private static final String PRINT = "print"; private static final String RENEW = "renew"; private static final String RENEWER = "renewer"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DiskBalancerCLI.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DiskBalancerCLI.java index 00e6f0499a9..34bd68bc684 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DiskBalancerCLI.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DiskBalancerCLI.java @@ -172,7 +172,9 @@ public static void main(String[] argv) throws Exception { try { res = ToolRunner.run(shell, argv); } catch (Exception ex) { - LOG.error(ex.toString()); + String msg = String.format("Exception thrown while running %s.", + DiskBalancerCLI.class.getSimpleName()); + LOG.error(msg, ex); res = 1; } System.exit(res); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java index 9b9fe14e46d..903a1e2ef9f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; +import org.apache.hadoop.hdfs.protocol.NoECPolicySetException; import org.apache.hadoop.hdfs.util.ECPolicyLoader; import org.apache.hadoop.io.erasurecode.ErasureCodeConstants; import org.apache.hadoop.tools.TableListing; @@ -154,7 +155,7 @@ public String getLongUsage() { listing.addRow("", "The path of the xml file which defines the EC policies to add"); return getShortUsage() + "\n" + - "Add a list of erasure coding policies.\n" + + "Add a list of user defined erasure coding policies.\n" + listing.toString(); } @@ -268,7 +269,7 @@ public String getLongUsage() { TableListing listing = AdminHelper.getOptionDescriptionListing(); listing.addRow("", "The name of the erasure coding policy"); return getShortUsage() + "\n" + - "Remove an erasure coding policy.\n" + + "Remove an user defined erasure coding policy.\n" + listing.toString(); } @@ -357,16 +358,16 @@ public int run(Configuration conf, List args) throws IOException { final DistributedFileSystem dfs = AdminHelper.getDFS(p.toUri(), conf); try { dfs.setErasureCodingPolicy(p, ecPolicyName); - if (ecPolicyName == null){ - ecPolicyName = "default"; - } - System.out.println("Set " + ecPolicyName + " erasure coding policy on" + - " " + path); + + String actualECPolicyName = dfs.getErasureCodingPolicy(p).getName(); + + System.out.println("Set " + actualECPolicyName + + " erasure coding policy on "+ path); RemoteIterator dirIt = dfs.listStatusIterator(p); if (dirIt.hasNext()) { System.out.println("Warning: setting erasure coding policy on a " + "non-empty directory will not automatically convert existing " + - "files to " + ecPolicyName + " erasure coding policy"); + "files to " + actualECPolicyName + " erasure coding policy"); } } catch (Exception e) { System.err.println(AdminHelper.prettifyException(e)); @@ -424,6 +425,12 @@ public int run(Configuration conf, List args) throws IOException { "non-empty directory will not automatically convert existing" + " files to replicated data."); } + } catch (NoECPolicySetException e) { + System.err.println(AdminHelper.prettifyException(e)); + System.err.println("Use '-setPolicy -path -replicate' to enforce" + + " default replication policy irrespective of EC policy" + + " defined on parent."); + return 2; } catch (Exception e) { System.err.println(AdminHelper.prettifyException(e)); return 2; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetGroups.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetGroups.java index e03e787a980..e71e4d82167 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetGroups.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetGroups.java @@ -21,8 +21,8 @@ import java.io.PrintStream; import java.net.InetSocketAddress; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -43,7 +43,7 @@ @InterfaceAudience.Private public class GetGroups extends GetGroupsBase { - private static final Log LOG = LogFactory.getLog(GetGroups.class); + private static final Logger LOG = LoggerFactory.getLogger(GetGroups.class); static final String USAGE = "Usage: hdfs groups [username ...]"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/StoragePolicyAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/StoragePolicyAdmin.java index aeb10d91ab9..e02208c5ab3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/StoragePolicyAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/StoragePolicyAdmin.java @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.BlockStoragePolicySpi; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; @@ -32,6 +33,7 @@ import org.apache.hadoop.util.ToolRunner; import java.io.FileNotFoundException; + import java.io.IOException; import java.util.Arrays; import java.util.Collection; @@ -245,6 +247,62 @@ public int run(Configuration conf, List args) throws IOException { } } + /** Command to schedule blocks to move based on specified policy. */ + private static class SatisfyStoragePolicyCommand + implements AdminHelper.Command { + @Override + public String getName() { + return "-satisfyStoragePolicy"; + } + + @Override + public String getShortUsage() { + return "[" + getName() + " [-w] -path ]\n"; + } + + @Override + public String getLongUsage() { + TableListing listing = AdminHelper.getOptionDescriptionListing(); + listing.addRow("", "The path of the file/directory to satisfy" + + " storage policy"); + listing.addRow("-w", + "It requests that the command wait till all the files satisfy" + + " the policy in given path. This will print the current" + + "status of the path in each 10 sec and status are:\n" + + "PENDING : Path is in queue and not processed for satisfying" + + " the policy.\n" + + "IN_PROGRESS : Satisfying the storage policy for" + + " path.\n" + + "SUCCESS : Storage policy satisfied for the path.\n" + + "FAILURE : Few blocks failed to move.\n" + + "NOT_AVAILABLE : Status not available."); + return getShortUsage() + "\n" + + "Schedule blocks to move based on file/directory policy.\n\n" + + listing.toString(); + } + + @Override + public int run(Configuration conf, List args) throws IOException { + final String path = StringUtils.popOptionWithArgument("-path", args); + if (path == null) { + System.err.println("Please specify the path for setting the storage " + + "policy.\nUsage: " + getLongUsage()); + return 1; + } + + final DistributedFileSystem dfs = AdminHelper.getDFS(conf); + try { + dfs.satisfyStoragePolicy(new Path(path)); + System.out.println("Scheduled blocks to move based on the current" + + " storage policy on " + path); + } catch (Exception e) { + System.err.println(AdminHelper.prettifyException(e)); + return 2; + } + return 0; + } + } + /* Command to unset the storage policy set for a file/directory */ private static class UnsetStoragePolicyCommand implements AdminHelper.Command { @@ -295,6 +353,7 @@ public int run(Configuration conf, List args) throws IOException { new ListStoragePoliciesCommand(), new SetStoragePolicyCommand(), new GetStoragePolicyCommand(), - new UnsetStoragePolicyCommand() + new UnsetStoragePolicyCommand(), + new SatisfyStoragePolicyCommand() }; } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsBinaryLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsBinaryLoader.java index d2634b38feb..76f70a1037e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsBinaryLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsBinaryLoader.java @@ -19,8 +19,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer; @@ -40,8 +40,8 @@ private final boolean fixTxIds; private final boolean recoveryMode; private long nextTxId; - public static final Log LOG = - LogFactory.getLog(OfflineEditsBinaryLoader.class.getName()); + public static final Logger LOG = + LoggerFactory.getLogger(OfflineEditsBinaryLoader.class.getName()); /** * Constructor @@ -102,7 +102,7 @@ public void loadEdits() throws IOException { } visitor.close(null); } finally { - IOUtils.cleanup(LOG, inputStream); + IOUtils.cleanupWithLogger(LOG, inputStream); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java index 2c504608ebc..ad51b72259a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java @@ -30,8 +30,8 @@ import io.netty.handler.codec.http.HttpRequest; import io.netty.handler.codec.http.HttpResponseStatus; import io.netty.handler.codec.http.QueryStringDecoder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.web.JsonUtil; import org.apache.hadoop.util.StringUtils; @@ -58,7 +58,8 @@ * Implement the read-only WebHDFS API for fsimage. */ class FSImageHandler extends SimpleChannelInboundHandler { - public static final Log LOG = LogFactory.getLog(FSImageHandler.class); + public static final Logger LOG = + LoggerFactory.getLogger(FSImageHandler.class); private final FSImageLoader image; private final ChannelGroup activeChannels; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java index 61494f4bcc9..380f35aaddd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java @@ -34,8 +34,8 @@ import com.google.protobuf.CodedInputStream; import com.google.protobuf.InvalidProtocolBufferException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.fs.permission.AclEntry; @@ -65,7 +65,8 @@ * file status of the namespace of the fsimage. */ class FSImageLoader { - public static final Log LOG = LogFactory.getLog(FSImageHandler.class); + public static final Logger LOG = + LoggerFactory.getLogger(FSImageHandler.class); private final String[] stringTable; // byte representation of inodes, sorted by id diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java index 7d0a4924f13..a97bb72fa7b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java @@ -54,8 +54,8 @@ import com.google.protobuf.ByteString; import com.google.protobuf.TextFormat; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.permission.AclEntry; @@ -96,8 +96,8 @@ @InterfaceAudience.Private @InterfaceStability.Unstable class OfflineImageReconstructor { - public static final Log LOG = - LogFactory.getLog(OfflineImageReconstructor.class); + public static final Logger LOG = + LoggerFactory.getLogger(OfflineImageReconstructor.class); /** * The output stream. @@ -1839,7 +1839,7 @@ public static void run(String inputPath, String outputPath) new OfflineImageReconstructor(out, reader); oir.processXml(); } finally { - IOUtils.cleanup(LOG, reader, fis, out, fout); + IOUtils.cleanupWithLogger(LOG, reader, fis, out, fout); } // Write the md5 file MD5FileUtils.saveMD5File(new File(outputPath), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java index 46a9c75c799..dd50ab003cc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java @@ -30,8 +30,8 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.IOUtils; @@ -44,7 +44,8 @@ */ @InterfaceAudience.Private public class OfflineImageViewer { - public static final Log LOG = LogFactory.getLog(OfflineImageViewer.class); + public static final Logger LOG = + LoggerFactory.getLogger(OfflineImageViewer.class); private final static String usage = "Usage: bin/hdfs oiv_legacy [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n" @@ -145,7 +146,7 @@ public void go() throws IOException { LOG.error("Failed to load image file."); } } - IOUtils.cleanup(LOG, in, tracker); + IOUtils.cleanupWithLogger(LOG, in, tracker); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java index e4afa994614..34a85a6dce7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java @@ -28,8 +28,8 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.net.NetUtils; @@ -44,7 +44,8 @@ public class OfflineImageViewerPB { private static final String HELP_OPT = "-h"; private static final String HELP_LONGOPT = "--help"; - public static final Log LOG = LogFactory.getLog(OfflineImageViewerPB.class); + public static final Logger LOG = + LoggerFactory.getLogger(OfflineImageViewerPB.class); private final static String usage = "Usage: bin/hdfs oiv [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n" + "Offline Image Viewer\n" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java index 8fab34c06f8..20a8350e310 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.tools.offlineImageViewer; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringEscapeUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode; @@ -71,9 +73,19 @@ private void append(StringBuffer buffer, long field) { buffer.append(field); } + static final String CRLF = StringUtils.CR + StringUtils.LF; + private void append(StringBuffer buffer, String field) { buffer.append(delimiter); - buffer.append(field); + + String escapedField = StringEscapeUtils.escapeCsv(field); + if (escapedField.contains(CRLF)) { + escapedField = escapedField.replace(CRLF, "%x0D%x0A"); + } else if (escapedField.contains(StringUtils.LF)) { + escapedField = escapedField.replace(StringUtils.LF, "%x0A"); + } + + buffer.append(escapedField); } @Override @@ -82,7 +94,7 @@ public String getEntry(String parent, INode inode) { String inodeName = inode.getName().toStringUtf8(); Path path = new Path(parent.isEmpty() ? "/" : parent, inodeName.isEmpty() ? "/" : inodeName); - buffer.append(path.toString()); + append(buffer, path.toString()); PermissionStatus p = null; boolean isDir = false; boolean hasAcl = false; @@ -136,7 +148,7 @@ public String getEntry(String parent, INode inode) { append(buffer, dirString + p.getPermission().toString() + aclString); append(buffer, p.getUserName()); append(buffer, p.getGroupName()); - return buffer.toString(); + return buffer.substring(1); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/WebImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/WebImageViewer.java index a50e828e4a1..767ecd809e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/WebImageViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/WebImageViewer.java @@ -32,8 +32,8 @@ import io.netty.handler.codec.http.HttpResponseEncoder; import io.netty.handler.codec.string.StringEncoder; import io.netty.util.concurrent.GlobalEventExecutor; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.security.UserGroupInformation; @@ -47,7 +47,8 @@ * namespace. */ public class WebImageViewer implements Closeable { - public static final Log LOG = LogFactory.getLog(WebImageViewer.class); + public static final Logger LOG = + LoggerFactory.getLogger(WebImageViewer.class); private Channel channel; private InetSocketAddress address; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/AtomicFileOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/AtomicFileOutputStream.java index a89b8cb07b2..33f976a7f7a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/AtomicFileOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/AtomicFileOutputStream.java @@ -23,8 +23,8 @@ import java.io.FilterOutputStream; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.io.nativeio.NativeIOException; @@ -46,7 +46,7 @@ private static final String TMP_EXTENSION = ".tmp"; - private final static Log LOG = LogFactory.getLog( + private final static Logger LOG = LoggerFactory.getLogger( AtomicFileOutputStream.class); private final File origFile; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightHashSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightHashSet.java index c7bf9a67f9d..a190c9ef401 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightHashSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightHashSet.java @@ -25,8 +25,8 @@ import java.util.List; import java.util.NoSuchElementException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A low memory linked hash set implementation, which uses an array for storing @@ -66,7 +66,8 @@ public String toString() { protected static final int MINIMUM_CAPACITY = 16; static final int MAXIMUM_CAPACITY = 1 << 30; - private static final Log LOG = LogFactory.getLog(LightWeightHashSet.class); + private static final Logger LOG = + LoggerFactory.getLogger(LightWeightHashSet.class); /** * An internal array of entries, which are the rows of the hash table. The diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java index d87ffbf3154..95dcf7181cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java @@ -29,8 +29,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.util.StringUtils; @@ -42,7 +42,7 @@ * that the Unix "md5sum" utility writes. */ public abstract class MD5FileUtils { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( MD5FileUtils.class); public static final String MD5_SUFFIX = ".md5"; @@ -84,7 +84,7 @@ private static Matcher readStoredMd5(File md5File) throws IOException { } catch (IOException ioe) { throw new IOException("Error reading md5 file at " + md5File, ioe); } finally { - IOUtils.cleanup(LOG, reader); + IOUtils.cleanupWithLogger(LOG, reader); } Matcher matcher = LINE_REGEX.matcher(md5Line); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java index 67bb2bb9a45..777dd87cfeb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java @@ -23,8 +23,8 @@ import java.io.IOException; import java.io.InputStreamReader; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.IOUtils; @@ -37,7 +37,7 @@ */ @InterfaceAudience.Private public class PersistentLongFile { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( PersistentLongFile.class); private final File file; @@ -99,7 +99,7 @@ public static long readFile(File file, long defaultVal) throws IOException { br.close(); br = null; } finally { - IOUtils.cleanup(LOG, br); + IOUtils.cleanupWithLogger(LOG, br); } } return val; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ExceptionHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ExceptionHandler.java index 6f1257c5e7e..2e0a17efa4b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ExceptionHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ExceptionHandler.java @@ -27,8 +27,8 @@ import javax.ws.rs.ext.ExceptionMapper; import javax.ws.rs.ext.Provider; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.web.JsonUtil; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.StandbyException; @@ -42,7 +42,8 @@ /** Handle exceptions. */ @Provider public class ExceptionHandler implements ExceptionMapper { - public static final Log LOG = LogFactory.getLog(ExceptionHandler.class); + public static final Logger LOG = + LoggerFactory.getLogger(ExceptionHandler.class); private static Exception toCause(Exception e) { final Throwable t = e.getCause(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/docs/libhdfs_footer.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/docs/libhdfs_footer.html index cb69b6316e7..35930c18c6e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/docs/libhdfs_footer.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/docs/libhdfs_footer.html @@ -21,5 +21,5 @@

libhdfs - -Hadoop +Hadoop diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto index bf0df5bf144..4a8f9f00f22 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto @@ -197,6 +197,7 @@ message VolumeFailureSummaryProto { * volumeFailureSummary - info about volume failures * slowPeers - info about peer DataNodes that are suspected to be slow. * slowDisks - info about DataNode disks that are suspected to be slow. + * blksMovementResults - status of the scheduled blocks movements */ message HeartbeatRequestProto { required DatanodeRegistrationProto registration = 1; // Datanode info diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto index 683dc802322..89edfbf2ea6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto @@ -206,6 +206,21 @@ message IsRollingUpgradeResponseProto { required bool isRollingUpgrade = 1; } +message GetFilePathRequestProto { + required uint64 fileId = 1; +} + +message GetFilePathResponseProto { + required string srcPath = 1; +} + +message GetNextSPSPathRequestProto { +} + +message GetNextSPSPathResponseProto { + optional uint64 spsPath = 1; +} + /** * Protocol used by the sub-ordinate namenode to send requests * the active/primary namenode. @@ -287,4 +302,10 @@ service NamenodeProtocolService { */ rpc isRollingUpgrade(IsRollingUpgradeRequestProto) returns (IsRollingUpgradeResponseProto); + + /** + * Return the sps path from namenode + */ + rpc getNextSPSPath(GetNextSPSPathRequestProto) + returns (GetNextSPSPathResponseProto); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto index a37c7236a65..625966fd33c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto @@ -242,6 +242,7 @@ message FormatRequestProto { required JournalIdProto jid = 1; required NamespaceInfoProto nsInfo = 2; optional string nameServiceId = 3; + optional bool force = 4 [ default = false ]; } message FormatResponseProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 6dd2d927962..1573582c91a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -615,6 +615,15 @@ + + dfs.namenode.max-corrupt-file-blocks-returned + 100 + + The maximum number of corrupt file blocks listed by NameNode Web UI, + JMX and other client request. + + + dfs.blocksize 134217728 @@ -1267,13 +1276,19 @@ The number of volumes that are allowed to fail before a datanode stops offering service. By default any volume failure will cause a datanode to shutdown. + The range of the value is -1 now, -1 represents the minimum + of volume valids is 1. dfs.image.compress false - Should the dfs image be compressed? + When this value is true, the dfs image will be compressed. + Enabling this will be very helpful if dfs image is large since it can + avoid consuming a lot of network bandwidth when SBN uploads a new dfs + image to ANN. The compressed codec is specified by the setting + dfs.image.compression.codec. @@ -1289,11 +1304,10 @@ dfs.image.transfer.timeout 60000 - Socket timeout for image transfer in milliseconds. This timeout and the related - dfs.image.transfer.bandwidthPerSec parameter should be configured such - that normal image transfer can complete successfully. - This timeout prevents client hangs when the sender fails during - image transfer. This is socket timeout during image transfer. + Socket timeout for the HttpURLConnection instance used in the image + transfer. This is measured in milliseconds. + This timeout prevents client hangs if the connection is idle + for this configured timeout, during image transfer. @@ -1304,9 +1318,7 @@ Maximum bandwidth used for regular image transfers (instead of bootstrapping the standby namenode), in bytes per second. This can help keep normal namenode operations responsive during - checkpointing. The maximum bandwidth and timeout in - dfs.image.transfer.timeout should be set such that normal image - transfers can complete successfully. + checkpointing. A default value of 0 indicates that throttling is disabled. The maximum bandwidth used for bootstrapping standby namenode is configured with dfs.image.transfer-bootstrap-standby.bandwidthPerSec. @@ -2460,6 +2472,17 @@ + + dfs.namenode.caching.enabled + true + + Set to true to enable block caching. This flag enables the NameNode to + maintain a mapping of cached blocks to DataNodes via processing DataNode + cache reports. Based on these reports and addition and removal of caching + directives, the NameNode will schedule caching and uncaching work. + + + dfs.namenode.path.based.cache.block.map.allocation.percent 0.25 @@ -4487,6 +4510,131 @@ + + dfs.storage.policy.satisfier.enabled + false + dfs.storage.policy.satisfier.mode + none + + Following values are supported - external, none. + If external, StoragePolicySatisfier will be enabled and started as an independent service outside namenode. + If none, StoragePolicySatisfier is disabled. + By default, StoragePolicySatisfier is disabled. + Administrator can dynamically change StoragePolicySatisfier mode by using reconfiguration option. + Dynamic mode change can be achieved in the following way. + 1. Edit/update this configuration property values in hdfs-site.xml + 2. Execute the reconfig command on hadoop command line prompt. + For example:$hdfs -reconfig namenode nn_host:port start + + + + + dfs.storage.policy.satisfier.queue.limit + 1000 + + Storage policy satisfier queue size. This queue contains the currently + scheduled file's inode ID for statisfy the policy. + Default value is 1000. + + + + + dfs.storage.policy.satisfier.work.multiplier.per.iteration + 1 + + *Note*: Advanced property. Change with caution. + This determines the total amount of block transfers to begin in + one iteration, for satisfy the policy. The actual number is obtained by + multiplying this multiplier with the total number of live nodes in the + cluster. The result number is the number of blocks to begin transfers + immediately. This number can be any positive, non-zero integer. + + + + + dfs.storage.policy.satisfier.recheck.timeout.millis + 60000 + + Blocks storage movements monitor re-check interval in milliseconds. + This check will verify whether any blocks storage movement results arrived from DN + and also verify if any of file blocks movements not at all reported to DN + since dfs.storage.policy.satisfier.self.retry.timeout. + The default value is 1 * 60 * 1000 (1 mins) + + + + + dfs.storage.policy.satisfier.self.retry.timeout.millis + 300000 + + If any of file related block movements not at all reported by datanode, + then after this timeout(in milliseconds), the item will be added back to movement needed list + at namenode which will be retried for block movements. + The default value is 5 * 60 * 1000 (5 mins) + + + + + dfs.storage.policy.satisfier.retry.max.attempts + 3 + + Max retry to satisfy the block storage policy. After this retry block will be removed + from the movement needed queue. + + + + + dfs.storage.policy.satisfier.datanode.cache.refresh.interval.ms + 300000 + + How often to refresh the datanode storages cache in milliseconds. This cache + keeps live datanode storage reports fetched from namenode. After elapsed time, + it will again fetch latest datanodes from namenode. + By default, this parameter is set to 5 minutes. + + + + + dfs.storage.policy.satisfier.max.outstanding.paths + 10000 + + Defines the maximum number of paths to satisfy that can be queued up in the + Satisfier call queue in a period of time. Default value is 10000. + + + + + dfs.storage.policy.satisfier.address + 0.0.0.0:0 + + The hostname used for a keytab based Kerberos login. Keytab based login + is required when dfs.storage.policy.satisfier.mode is external. + + + + + dfs.storage.policy.satisfier.keytab.file + + + The keytab file used by external StoragePolicySatisfier to login as its + service principal. The principal name is configured with + dfs.storage.policy.satisfier.kerberos.principal. Keytab based login + is required when dfs.storage.policy.satisfier.mode is external. + + + + + dfs.storage.policy.satisfier.kerberos.principal + + + The StoragePolicySatisfier principal. This is typically set to + satisfier/_HOST@REALM.TLD. The StoragePolicySatisfier will substitute + _HOST with its own fully qualified hostname at startup. The _HOST placeholder + allows using the same configuration setting on different servers. Keytab + based login is required when dfs.storage.policy.satisfier.mode is external. + + + dfs.pipeline.ecn false @@ -4574,6 +4722,24 @@ + + dfs.qjournal.http.open.timeout.ms + 60000 + + Timeout in milliseconds when open a new HTTP connection to remote + journals. + + + + + dfs.qjournal.http.read.timeout.ms + 60000 + + Timeout in milliseconds when reading from a HTTP connection from remote + journals. + + + dfs.quota.by.storage.type.enabled true @@ -4859,6 +5025,15 @@ + + dfs.provided.aliasmap.inmemory.server.log + false + + Ensures that InMemoryAliasMap server logs every call to it. + Set to false by default. + + + dfs.provided.aliasmap.text.delimiter , @@ -5000,4 +5175,14 @@ will throw NameNodeFormatException. + + + dfs.namenode.block.deletion.increment + 1000 + + The number of block deletion increment. + This setting will control the block increment deletion rate to + ensure that other waiters on the lock can get in. + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html index 1d66f5591f9..0fdf552e083 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html @@ -37,6 +37,9 @@ Utilities

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js index 05f8df68e41..898b1f36d9c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js @@ -208,7 +208,7 @@ processData: false, crossDomain: true }).always(function(data) { - $('#file-info-preview-body').val(data.responseText); + $('#file-info-preview-body').val(data); $('#file-info-tail').show(); }).fail(function(jqXHR, textStatus, errorThrown) { show_err_msg("Couldn't preview the file. " + errorThrown); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dfs-dust.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dfs-dust.js index a572282ea47..316a9947ca9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dfs-dust.js +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dfs-dust.js @@ -20,7 +20,7 @@ var filters = { 'fmt_bytes': function (v) { - var UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'ZB']; + var UNITS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']; var prev = 0, i = 0; while (Math.floor(v) > 0 && i < UNITS.length) { prev = v; @@ -28,7 +28,7 @@ i += 1; } - if (i > 0 && i < UNITS.length) { + if (i > 0) { v = prev; i -= 1; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ArchivalStorage.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ArchivalStorage.md index 3c49cb1666c..5fd66125e29 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ArchivalStorage.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ArchivalStorage.md @@ -97,8 +97,45 @@ The effective storage policy can be retrieved by the "[`storagepolicies -getStor The default storage type of a datanode storage location will be DISK if it does not have a storage type tagged explicitly. -Mover - A New Data Migration Tool ---------------------------------- +Storage Policy Based Data Movement +---------------------------------- + +Setting a new storage policy on already existing file/dir will change the policy in Namespace, but it will not move the blocks physically across storage medias. +Following 2 options will allow users to move the blocks based on new policy set. So, once user change/set to a new policy on file/directory, user should also perform one of the following options to achieve the desired data movement. Note that both options cannot be allowed to run simultaneously. + +### Storage Policy Satisfier (SPS) + +When user changes the storage policy on a file/directory, user can call `HdfsAdmin` API `satisfyStoragePolicy()` to move the blocks as per the new policy set. +The SPS tool running external to namenode periodically scans for the storage mismatches between new policy set and the physical blocks placed. This will only track the files/directories for which user invoked satisfyStoragePolicy. If SPS identifies some blocks to be moved for a file, then it will schedule block movement tasks to datanodes. If there are any failures in movement, the SPS will re-attempt by sending new block movement tasks. + +SPS can be enabled as an external service outside Namenode or disabled dynamically without restarting the Namenode. + +Detailed design documentation can be found at [Storage Policy Satisfier(SPS) (HDFS-10285)](https://issues.apache.org/jira/browse/HDFS-10285) + +* **Note**: When user invokes `satisfyStoragePolicy()` API on a directory, SPS will scan all sub-directories and consider all the files for satisfy the policy.. + +* HdfsAdmin API : + `public void satisfyStoragePolicy(final Path path) throws IOException` + +* Arguments : + +| | | +|:---- |:---- | +| `path` | A path which requires blocks storage movement. | + +####Configurations: + +* **dfs.storage.policy.satisfier.mode** - Used to enable external service outside NN or disable SPS. + Following string values are supported - `external`, `none`. Configuring `external` value represents SPS is enable and `none` to disable. + The default value is `none`. + +* **dfs.storage.policy.satisfier.recheck.timeout.millis** - A timeout to re-check the processed block storage movement + command results from Datanodes. + +* **dfs.storage.policy.satisfier.self.retry.timeout.millis** - A timeout to retry if no block movement results reported from + Datanode in this configured timeout. + +### Mover - A New Data Migration Tool A new data migration tool is added for archiving data. The tool is similar to Balancer. It periodically scans the files in HDFS to check if the block placement satisfies the storage policy. For the blocks violating the storage policy, it moves the replicas to a different storage type in order to fulfill the storage policy requirement. Note that it always tries to move block replicas within the same node whenever possible. If that is not possible (e.g. when a node doesn’t have the target storage type) then it will copy the block replicas to another node over the network. @@ -115,6 +152,10 @@ A new data migration tool is added for archiving data. The tool is similar to Ba Note that, when both -p and -f options are omitted, the default path is the root directory. +####Administrator notes: + +`StoragePolicySatisfier` and `Mover tool` cannot run simultaneously. If a Mover instance is already triggered and running, SPS will be disabled while starting. In that case, administrator should make sure, Mover execution finished and then enable external SPS service again. Similarly when SPS enabled already, Mover cannot be run. If administrator is looking to run Mover tool explicitly, then he/she should make sure to disable SPS first and then run Mover. Please look at the commands section to know how to enable external service outside NN or disable SPS dynamically. + Storage Policy Commands ----------------------- @@ -171,5 +212,31 @@ Get the storage policy of a file or a directory. |:---- |:---- | | `-path ` | The path referring to either a directory or a file. | +### Satisfy Storage Policy + +Schedule blocks to move based on file's/directory's current storage policy. + +* Command: + + hdfs storagepolicies -satisfyStoragePolicy -path + +* Arguments: + +| | | +|:---- |:---- | +| `-path ` | The path referring to either a directory or a file. | + +### Enable external service outside NN or Disable SPS without restarting Namenode +If administrator wants to switch modes of SPS feature while Namenode is running, first he/she needs to update the desired value(external or none) for the configuration item `dfs.storage.policy.satisfier.mode` in configuration file (`hdfs-site.xml`) and then run the following Namenode reconfig command + +* Command: + + hdfs dfsadmin -reconfig namenode start + +### Start External SPS Service. +If administrator wants to start external sps, first he/she needs to configure property `dfs.storage.policy.satisfier.mode` with `external` value in configuration file (`hdfs-site.xml`) and then run Namenode reconfig command. Please ensure that network topology configurations in the configuration file are same as namenode, this cluster will be used for matching target nodes. After this, start external sps service using following command + +* Command: + hdfs --daemon start sps diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/CentralizedCacheManagement.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/CentralizedCacheManagement.md index 89ad6709a37..7568949f3aa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/CentralizedCacheManagement.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/CentralizedCacheManagement.md @@ -220,7 +220,7 @@ The following properties are not required, but may be specified for tuning: The NameNode will use this as the amount of milliseconds between subsequent path cache rescans. This calculates the blocks to cache and each DataNode containing a replica of the block that should cache it. - By default, this parameter is set to 300000, which is five minutes. + By default, this parameter is set to 30000, which is thirty seconds. * dfs.datanode.fsdatasetcache.max.threads.per.volume @@ -238,6 +238,11 @@ The following properties are not required, but may be specified for tuning: The percentage of the Java heap which we will allocate to the cached blocks map. The cached blocks map is a hash map which uses chained hashing. Smaller maps may be accessed more slowly if the number of cached blocks is large; larger maps will consume more memory. The default is 0.25 percent. +* dfs.namenode.caching.enabled + + This parameter can be used to enable/disable the centralized caching in NameNode. When centralized caching is disabled, NameNode will not process cache reports or store information about block cache locations on the cluster. Note that NameNode will continute to store the path based cache locations in the file-system metadata, even though it will not act on this information until the caching is enabled. The default value for this parameter is true (i.e. centralized caching is enabled). + + ### OS Limits If you get the error "Cannot start datanode because the configured max locked memory size... is more than the datanode's available RLIMIT\_MEMLOCK ulimit," that means that the operating system is imposing a lower limit on the amount of memory that you can lock than what you have configured. To fix this, you must adjust the ulimit -l value that the DataNode runs with. Usually, this value is configured in `/etc/security/limits.conf`. However, it will vary depending on what operating system and distribution you are using. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md index 9ed69bfd587..c2a6ae1d601 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md @@ -420,7 +420,7 @@ Runs a HDFS dfsadmin client. Usage: `hdfs dfsrouter` -Runs the DFS router. See [Router](./HDFSRouterFederation.html#Router) for more info. +Runs the DFS router. See [Router](../hadoop-hdfs-rbf/HDFSRouterFederation.html#Router) for more info. ### `dfsrouteradmin` @@ -449,7 +449,7 @@ Usage: | `-nameservice` `disable` `enable` *nameservice* | Disable/enable a name service from the federation. If disabled, requests will not go to that name service. | | `-getDisabledNameservices` | Get the name services that are disabled in the federation. | -The commands for managing Router-based federation. See [Mount table management](./HDFSRouterFederation.html#Mount_table_management) for more info. +The commands for managing Router-based federation. See [Mount table management](../hadoop-hdfs-rbf/HDFSRouterFederation.html#Mount_table_management) for more info. ### `diskbalancer` @@ -615,6 +615,8 @@ Usage: [-setStoragePolicy -path -policy ] [-getStoragePolicy -path ] [-unsetStoragePolicy -path ] + [-satisfyStoragePolicy -path ] + [-isSatisfierRunning] [-help ] Lists out all/Gets/sets/unsets storage policies. See the [HDFS Storage Policy Documentation](./ArchivalStorage.html) for more information. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSDiskbalancer.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSDiskbalancer.md index ed0233a97bd..5dd6ffc4608 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSDiskbalancer.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSDiskbalancer.md @@ -127,6 +127,7 @@ There is a set of diskbalancer settings that can be controlled via hdfs-site.xml |`dfs.disk.balancer.block.tolerance.percent`| The tolerance percent specifies when we have reached a good enough value for any copy step. For example, if you specify 10% then getting close to 10% of the target value is good enough.| |`dfs.disk.balancer.plan.threshold.percent`| The percentage threshold value for volume Data Density in a plan. If the absolute value of volume Data Density which is out of threshold value in a node, it means that the volumes corresponding to the disks should do the balancing in the plan. The default value is 10.| |`dfs.disk.balancer.plan.valid.interval`| Maximum amount of time disk balancer plan is valid. Supports the following suffixes (case insensitive): ms(millis), s(sec), m(min), h(hour), d(day) to specify the time (such as 2s, 2m, 1h, etc.). If no suffix is specified then milliseconds is assumed. Default value is 1d| + Debugging --------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md index 60fd3abf184..67e6b750a29 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md @@ -65,11 +65,11 @@ Architecture 2. _The size of a striping cell._ This determines the granularity of striped reads and writes, including buffer sizes and encoding work. - Policies are named *codec*-*num data blocks*-*num parity blocks*-*cell size*. Currently, six built-in policies are supported: `RS-3-2-1024k`, `RS-6-3-1024k`, `RS-10-4-1024k`, `RS-LEGACY-6-3-1024k`, `XOR-2-1-1024k` and `REPLICATION`. + Policies are named *codec*-*num data blocks*-*num parity blocks*-*cell size*. Currently, five built-in policies are supported: `RS-3-2-1024k`, `RS-6-3-1024k`, `RS-10-4-1024k`, `RS-LEGACY-6-3-1024k`, `XOR-2-1-1024k`. - `REPLICATION` is a special policy. It can only be set on directory, to force the directory to adopt 3x replication scheme, instead of inheriting its ancestor's erasure coding policy. This policy makes it possible to interleave 3x replication scheme directory with erasure coding directory. + The default `REPLICATION` scheme is also supported. It can only be set on directory, to force the directory to adopt 3x replication scheme, instead of inheriting its ancestor's erasure coding policy. This policy makes it possible to interleave 3x replication scheme directory with erasure coding directory. - `REPLICATION` policy is always enabled. For other built-in policies, they are disabled by default. + `REPLICATION` is always enabled. Out of all the EC policies, RS(6,3) is enabled by default. Similar to HDFS storage policies, erasure coding policies are set on a directory. When a file is created, it inherits the EC policy of its nearest ancestor directory. @@ -107,10 +107,10 @@ Deployment This means that when reading and writing striped files, most operations are off-rack. Network bisection bandwidth is thus very important. - For rack fault-tolerance, it is also important to have at least as many racks as the configured EC stripe width. - For EC policy RS (6,3), this means minimally 9 racks, and ideally 10 or 11 to handle planned and unplanned outages. - For clusters with fewer racks than the stripe width, HDFS cannot maintain rack fault-tolerance, but will still attempt - to spread a striped file across multiple nodes to preserve node-level fault-tolerance. + For rack fault-tolerance, it is also important to have enough number of racks, so that on average, each rack holds number of blocks no more than the number of EC parity blocks. A formula to calculate this would be (data blocks + parity blocks) / parity blocks, rounding up. + For EC policy RS (6,3), this means minimally 3 racks (calculated by (6 + 3) / 3 = 3), and ideally 9 or more to handle planned and unplanned outages. + For clusters with fewer racks than the number of the parity cells, HDFS cannot maintain rack fault-tolerance, but will still attempt + to spread a striped file across multiple nodes to preserve node-level fault-tolerance. For this reason, it is recommended to setup racks with similar number of DataNodes. ### Configuration keys @@ -184,7 +184,7 @@ Below are the details about each command. This parameter can be omitted if a 'dfs.namenode.ec.system.default.policy' configuration is set. The EC policy of the path will be set with the default value in configuration. - `-replicate` apply the special `REPLICATION` policy on the directory, force the directory to adopt 3x replication scheme. + `-replicate` apply the default `REPLICATION` scheme on the directory, force the directory to adopt 3x replication scheme. `-replicate` and `-policy ` are optional arguments. They cannot be specified at the same time. @@ -203,7 +203,7 @@ Below are the details about each command. * `[-addPolicies -policyFile ]` - Add a list of erasure coding policies. Please refer etc/hadoop/user_ec_policies.xml.template for the example policy file. The maximum cell size is defined in property 'dfs.namenode.ec.policies.max.cellsize' with the default value 4MB. Currently HDFS allows the user to add 64 policies in total, and the added policy ID is in range of 64 to 127. Adding policy will fail if there are already 64 policies added. + Add a list of user defined erasure coding policies. Please refer etc/hadoop/user_ec_policies.xml.template for the example policy file. The maximum cell size is defined in property 'dfs.namenode.ec.policies.max.cellsize' with the default value 4MB. Currently HDFS allows the user to add 64 policies in total, and the added policy ID is in range of 64 to 127. Adding policy will fail if there are already 64 policies added. * `[-listCodecs]` @@ -211,7 +211,7 @@ Below are the details about each command. * `[-removePolicy -policy ]` - Remove an erasure coding policy. + Remove an user defined erasure coding policy. * `[-enablePolicy -policy ]` diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md index f32868acc07..e4363fbec73 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md @@ -423,6 +423,14 @@ This guide describes high-level uses of each of these subcommands. For specific **Note:** This is not yet implemented, and at present will always return success, unless the given NameNode is completely down. + +### Load Balancer Setup + +If you are running a set of NameNodes behind a Load Balancer (e.g. [Azure](https://docs.microsoft.com/en-us/azure/load-balancer/load-balancer-custom-probe-overview) or [AWS](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/elb-healthchecks.html) ) and would like the Load Balancer to point to the active NN, you can use the /isActive HTTP endpoint as a health probe. +http://NN_HOSTNAME/isActive will return a 200 status code response if the NN is in Active HA State, 405 otherwise. + + + Automatic Failover ------------------ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsProvidedStorage.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsProvidedStorage.md index 01e70766a0e..b8d53215343 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsProvidedStorage.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsProvidedStorage.md @@ -38,7 +38,7 @@ is limited to creating a *read-only image* of a remote namespace that implements to serve the image. Specifically, reads from a snapshot of a remote namespace are supported. Adding a remote namespace to an existing/running namenode, refreshing the remote snapshot, unmounting, and writes are not available in this release. One -can use [ViewFs](./ViewFs.html) and [RBF](HDFSRouterFederation.html) to +can use [ViewFs](./ViewFs.html) and [RBF](../hadoop-hdfs-rbf/HDFSRouterFederation.html) to integrate namespaces with `PROVIDED` storage into an existing deployment. Creating HDFS Clusters with `PROVIDED` Storage diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCacheAdminCLI.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCacheAdminCLI.java index 28321cb6131..2f8dfa5b36b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCacheAdminCLI.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCacheAdminCLI.java @@ -20,8 +20,8 @@ import static org.junit.Assert.assertTrue; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.cli.util.CLICommand; import org.apache.hadoop.cli.util.CLICommandCacheAdmin; import org.apache.hadoop.cli.util.CLICommandTypes; @@ -44,7 +44,8 @@ public class TestCacheAdminCLI extends CLITestHelper { - public static final Log LOG = LogFactory.getLog(TestCacheAdminCLI.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestCacheAdminCLI.class); protected MiniDFSCluster dfsCluster = null; protected FileSystem fs = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java index 417d31ba520..90b4f11a66a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java @@ -37,8 +37,8 @@ import org.apache.commons.collections.map.LinkedMap; import org.apache.commons.lang3.SystemUtils; import org.apache.commons.lang3.mutable.MutableBoolean; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.client.impl.BlockReaderTestUtil; import org.apache.hadoop.hdfs.ClientContext; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -78,8 +78,8 @@ * This class tests if EnhancedByteBufferAccess works correctly. */ public class TestEnhancedByteBufferAccess { - private static final Log LOG = - LogFactory.getLog(TestEnhancedByteBufferAccess.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(TestEnhancedByteBufferAccess.class.getName()); static private TemporarySocketDirectory sockDir; @@ -575,7 +575,7 @@ public void testIndirectFallbackReads() throws Exception { fis = new FileInputStream(testPath); testFallbackImpl(fis, original); } finally { - IOUtils.cleanup(LOG, fos, fis); + IOUtils.cleanupWithLogger(LOG, fos, fis); new File(testPath).delete(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSMultipartUploader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSMultipartUploader.java deleted file mode 100644 index 96c50938b35..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSMultipartUploader.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs; - -import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.test.GenericTestUtils; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.rules.TestName; - -import java.io.IOException; - -public class TestHDFSMultipartUploader - extends AbstractSystemMultipartUploaderTest { - - private static MiniDFSCluster cluster; - private Path tmp; - - @Rule - public TestName name = new TestName(); - - @BeforeClass - public static void init() throws IOException { - HdfsConfiguration conf = new HdfsConfiguration(); - cluster = new MiniDFSCluster.Builder(conf, - GenericTestUtils.getRandomizedTestDir()) - .numDataNodes(1) - .build(); - cluster.waitClusterUp(); - } - - @AfterClass - public static void cleanup() throws IOException { - if (cluster != null) { - cluster.shutdown(); - cluster = null; - } - } - - @Before - public void setup() throws IOException { - tmp = new Path(cluster.getFileSystem().getWorkingDirectory(), - name.getMethodName()); - cluster.getFileSystem().mkdirs(tmp); - } - - @Override - public FileSystem getFS() throws IOException { - return cluster.getFileSystem(); - } - - @Override - public Path getBaseTestPath() { - return tmp; - } - -} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHdfsNativeCodeLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHdfsNativeCodeLoader.java index 34164f42d8a..4ecca5e55a1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHdfsNativeCodeLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHdfsNativeCodeLoader.java @@ -20,12 +20,13 @@ import org.junit.Test; import static org.junit.Assert.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.util.NativeCodeLoader; public class TestHdfsNativeCodeLoader { - static final Log LOG = LogFactory.getLog(TestHdfsNativeCodeLoader.class); + static final Logger LOG = + LoggerFactory.getLogger(TestHdfsNativeCodeLoader.class); private static boolean requireTestJni() { String rtj = System.getProperty("require.test.libhadoop"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUnbuffer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUnbuffer.java index 655d4534cd4..ef4c04d0f20 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUnbuffer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUnbuffer.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.fs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -33,8 +33,8 @@ import org.mockito.Mockito; public class TestUnbuffer { - private static final Log LOG = - LogFactory.getLog(TestUnbuffer.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(TestUnbuffer.class.getName()); @Rule public ExpectedException exception = ExpectedException.none(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractMultipartUploader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractMultipartUploader.java new file mode 100644 index 00000000000..f3a5265de75 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractMultipartUploader.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.contract.hdfs; + +import java.io.IOException; + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractMultipartUploaderTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test MultipartUploader tests on HDFS. + */ +public class TestHDFSContractMultipartUploader extends + AbstractContractMultipartUploaderTest { + + @BeforeClass + public static void createCluster() throws IOException { + HDFSContract.createCluster(); + } + + @AfterClass + public static void teardownCluster() throws IOException { + HDFSContract.destroyCluster(); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new HDFSContract(conf); + } + + /** + * HDFS doesn't have any restriction on the part size. + * @return 1KB + */ + @Override + protected int partSizeInBytes() { + return 1024; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AdminStatesBaseTest.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AdminStatesBaseTest.java index 5d96b7bb76a..1bc6b2c351f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AdminStatesBaseTest.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AdminStatesBaseTest.java @@ -29,8 +29,8 @@ import java.util.Random; import com.google.common.collect.Lists; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; @@ -53,7 +53,8 @@ * This class provide utilities for testing of the admin operations of nodes. */ public class AdminStatesBaseTest { - public static final Log LOG = LogFactory.getLog(AdminStatesBaseTest.class); + public static final Logger LOG = + LoggerFactory.getLogger(AdminStatesBaseTest.class); static final long seed = 0xDEADBEEFL; static final int blockSize = 8192; static final int fileSize = 16384; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java index 268bdf9df78..f7d90d2b198 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java @@ -26,8 +26,8 @@ import java.util.Arrays; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; @@ -44,7 +44,7 @@ */ static final Long RANDOM_NUMBER_GENERATOR_SEED = null; - static final Log LOG = LogFactory.getLog(AppendTestUtil.class); + static final Logger LOG = LoggerFactory.getLogger(AppendTestUtil.class); private static final Random SEED = new Random(); static { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java index 7f1792fdd07..515a0a07192 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java @@ -24,9 +24,8 @@ import java.io.InputStream; import java.io.OutputStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.ChecksumFileSystem; @@ -37,7 +36,7 @@ import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.log4j.Level; +import org.slf4j.event.Level; /** * This class benchmarks the performance of the local file system, raw local @@ -172,10 +171,8 @@ private static void printUsage() { @Override public int run(String[] args) throws IOException { // silence the minidfs cluster - Log hadoopLog = LogFactory.getLog("org"); - if (hadoopLog instanceof Log4JLogger) { - GenericTestUtils.setLogLevel(hadoopLog, Level.WARN); - } + Logger hadoopLog = LoggerFactory.getLogger("org"); + GenericTestUtils.setLogLevel(hadoopLog, Level.WARN); int reps = 1; if (args.length == 1) { try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index e6a2a002527..3e22b565dc5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -59,6 +59,7 @@ import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; @@ -81,8 +82,8 @@ import com.google.common.collect.Maps; import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.fs.BlockLocation; @@ -97,8 +98,11 @@ import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Options.Rename; +import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.fs.UnresolvedLinkException; +import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclEntryScope; import org.apache.hadoop.fs.permission.AclEntryType; @@ -136,6 +140,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; @@ -155,10 +160,14 @@ import org.apache.hadoop.hdfs.server.namenode.FSDirectory; import org.apache.hadoop.hdfs.server.namenode.FSEditLog; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeFile; import org.apache.hadoop.hdfs.server.namenode.LeaseManager; import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.Namesystem; +import org.apache.hadoop.hdfs.server.namenode.XAttrStorage; import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; @@ -175,6 +184,7 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.net.unix.TemporarySocketDirectory; +import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.RefreshUserMappingsProtocol; import org.apache.hadoop.security.ShellBasedUnixGroupsMapping; import org.apache.hadoop.security.UserGroupInformation; @@ -186,6 +196,7 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.VersionInfo; import org.apache.log4j.Level; +import org.junit.Assert; import org.junit.Assume; import org.apache.hadoop.util.ToolRunner; @@ -194,7 +205,7 @@ /** Utilities for HDFS tests */ public class DFSTestUtil { - private static final Log LOG = LogFactory.getLog(DFSTestUtil.class); + private static final Logger LOG = LoggerFactory.getLogger(DFSTestUtil.class); private static final Random gen = new Random(); private static final String[] dirNames = { @@ -2419,4 +2430,105 @@ public static void verifySnapshotDiffReport(DistributedFileSystem fs, } } } + + /** + * Check whether the Block movement has been successfully + * completed to satisfy the storage policy for the given file. + * @param fileName file name. + * @param expectedStorageType storage type. + * @param expectedStorageCount expected storage type. + * @param timeout timeout. + * @param fs distributedFileSystem. + * @throws Exception + */ + public static void waitExpectedStorageType(String fileName, + final StorageType expectedStorageType, int expectedStorageCount, + int timeout, DistributedFileSystem fs) throws Exception { + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + final LocatedBlock lb; + try { + lb = fs.getClient().getLocatedBlocks(fileName, 0).get(0); + } catch (IOException e) { + LOG.error("Exception while getting located blocks", e); + return false; + } + int actualStorageCount = 0; + for(StorageType type : lb.getStorageTypes()) { + if (expectedStorageType == type) { + actualStorageCount++; + } + } + LOG.info( + expectedStorageType + " replica count, expected=" + + expectedStorageCount + " and actual=" + actualStorageCount); + return expectedStorageCount == actualStorageCount; + } + }, 500, timeout); + } + + /** + * Waits for removal of a specified Xattr on a specified file. + * + * @param srcPath + * file name. + * @param xattr + * name of the extended attribute. + * @param ns + * Namesystem + * @param timeout + * max wait time + * @throws Exception + */ + public static void waitForXattrRemoved(String srcPath, String xattr, + Namesystem ns, int timeout) throws TimeoutException, InterruptedException, + UnresolvedLinkException, AccessControlException, + ParentNotDirectoryException { + final INode inode = ns.getFSDirectory().getINode(srcPath); + final XAttr satisfyXAttr = XAttrHelper.buildXAttr(xattr); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + List existingXAttrs = XAttrStorage.readINodeXAttrs(inode); + return !existingXAttrs.contains(satisfyXAttr); + } + }, 100, timeout); + } + + /** + * Get namenode connector using the given configuration and file path. + * + * @param conf + * hdfs configuration + * @param filePath + * file path + * @param namenodeCount + * number of namenodes + * @param createMoverPath + * create move path flag to skip the path creation + * @return Namenode connector. + * @throws IOException + */ + public static NameNodeConnector getNameNodeConnector(Configuration conf, + Path filePath, int namenodeCount, boolean createMoverPath) + throws IOException { + final Collection namenodes = DFSUtil.getInternalNsRpcUris(conf); + Assert.assertEquals(namenodeCount, namenodes.size()); + NameNodeConnector.checkOtherInstanceRunning(createMoverPath); + while (true) { + try { + final List nncs = NameNodeConnector + .newNameNodeConnectors(namenodes, + StoragePolicySatisfier.class.getSimpleName(), + filePath, conf, + NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS); + return nncs.get(0); + } catch (IOException e) { + LOG.warn("Failed to connect with namenode", e); + // Ignore + } + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/FileAppendTest4.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/FileAppendTest4.java index 9e7b598b9bd..a8f7378ca0b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/FileAppendTest4.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/FileAppendTest4.java @@ -19,8 +19,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; @@ -41,7 +41,8 @@ * */ public class FileAppendTest4 { - public static final Log LOG = LogFactory.getLog(FileAppendTest4.class); + public static final Logger LOG = + LoggerFactory.getLogger(FileAppendTest4.class); private static final int BYTES_PER_CHECKSUM = 4; private static final int PACKET_SIZE = BYTES_PER_CHECKSUM; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index a2e59515d8b..11265b81ebe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -67,8 +67,8 @@ import com.google.common.base.Supplier; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -141,7 +141,8 @@ public class MiniDFSCluster implements AutoCloseable { private static final String NAMESERVICE_ID_PREFIX = "nameserviceId"; - private static final Log LOG = LogFactory.getLog(MiniDFSCluster.class); + private static final Logger LOG = + LoggerFactory.getLogger(MiniDFSCluster.class); /** System property to set the data dir: {@value} */ public static final String PROP_TEST_BUILD_DATA = GenericTestUtils.SYSPROP_TEST_DATA_DIR; @@ -2004,7 +2005,7 @@ public void shutdown(boolean deleteDfsDir, boolean closeFileSystem) { LOG.info("Shutting down the Mini HDFS Cluster"); if (checkExitOnShutdown) { if (ExitUtil.terminateCalled()) { - LOG.fatal("Test resulted in an unexpected exit", + LOG.error("Test resulted in an unexpected exit", ExitUtil.getFirstExitException()); ExitUtil.resetFirstExitException(); throw new AssertionError("Test resulted in an unexpected exit"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java index 5c011e31ba7..8b4e9e5ef73 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java @@ -21,8 +21,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; @@ -39,7 +39,8 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster { private static String[] NODE_GROUPS = null; - private static final Log LOG = LogFactory.getLog(MiniDFSClusterWithNodeGroup.class); + private static final Logger LOG = + LoggerFactory.getLogger(MiniDFSClusterWithNodeGroup.class); public MiniDFSClusterWithNodeGroup(Builder builder) throws IOException { super(builder); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/ReadStripedFileWithDecodingHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/ReadStripedFileWithDecodingHelper.java index 7057010663b..e0e2c8b4023 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/ReadStripedFileWithDecodingHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/ReadStripedFileWithDecodingHelper.java @@ -17,8 +17,6 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.Path; @@ -50,8 +48,7 @@ LoggerFactory.getLogger(ReadStripedFileWithDecodingHelper.class); static { - ((Log4JLogger)LogFactory.getLog(BlockPlacementPolicy.class)) - .getLogger().setLevel(org.apache.log4j.Level.ALL); + GenericTestUtils.setLogLevel(BlockPlacementPolicy.LOG, Level.DEBUG); GenericTestUtils.setLogLevel(BlockManager.LOG, Level.DEBUG); GenericTestUtils.setLogLevel(BlockManager.blockLog, Level.DEBUG); GenericTestUtils.setLogLevel(NameNode.stateChangeLog, Level.DEBUG); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAbandonBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAbandonBlock.java index 301f6a7d730..e7d8b38aed9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAbandonBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAbandonBlock.java @@ -21,8 +21,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; @@ -39,7 +39,8 @@ * Test abandoning blocks, which clients do on pipeline creation failure. */ public class TestAbandonBlock { - public static final Log LOG = LogFactory.getLog(TestAbandonBlock.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestAbandonBlock.class); private static final Configuration CONF = new HdfsConfiguration(); static final String FILE_NAME_PREFIX diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAclsEndToEnd.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAclsEndToEnd.java index 3ff705588a8..105836e1b44 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAclsEndToEnd.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAclsEndToEnd.java @@ -28,8 +28,8 @@ import java.net.URI; import java.security.NoSuchAlgorithmException; import java.security.PrivilegedAction; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.kms.KMSClientProvider; @@ -64,8 +64,8 @@ * values before interpreting them.) */ public class TestAclsEndToEnd { - private static final Log LOG = - LogFactory.getLog(TestAclsEndToEnd.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(TestAclsEndToEnd.class.getName()); private static final String TEXT = "The blue zone is for loading and unloading only. " + "Please park in the red zone."; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendSnapshotTruncate.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendSnapshotTruncate.java index e56f8c7401e..def2ab788f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendSnapshotTruncate.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestAppendSnapshotTruncate.java @@ -34,8 +34,8 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -62,7 +62,8 @@ static { GenericTestUtils.setLogLevel(NameNode.stateChangeLog, Level.ALL); } - private static final Log LOG = LogFactory.getLog(TestAppendSnapshotTruncate.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestAppendSnapshotTruncate.class); private static final int BLOCK_SIZE = 1024; private static final int DATANODE_NUM = 4; private static final short REPLICATION = 3; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBalancerBandwidth.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBalancerBandwidth.java index 6bbe3a10bcc..1310f45fbaf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBalancerBandwidth.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBalancerBandwidth.java @@ -27,8 +27,8 @@ import java.util.concurrent.TimeoutException; import com.google.common.base.Supplier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.tools.DFSAdmin; @@ -43,7 +43,8 @@ final static private Configuration conf = new Configuration(); final static private int NUM_OF_DATANODES = 2; final static private int DEFAULT_BANDWIDTH = 1024*1024; - public static final Log LOG = LogFactory.getLog(TestBalancerBandwidth.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestBalancerBandwidth.class); private static final Charset UTF8 = Charset.forName("UTF-8"); private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); private final PrintStream outStream = new PrintStream(outContent); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockMissingException.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockMissingException.java index 7287b5c8be3..e664f991637 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockMissingException.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockMissingException.java @@ -22,8 +22,8 @@ import java.io.File; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; @@ -36,7 +36,8 @@ import org.junit.Test; public class TestBlockMissingException { - final static Log LOG = LogFactory.getLog("org.apache.hadoop.hdfs.TestBlockMissing"); + final static Logger LOG = + LoggerFactory.getLogger("org.apache.hadoop.hdfs.TestBlockMissing"); final static int NUM_DATANODES = 3; Configuration conf; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java index ce2e79b73e4..935a6399201 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientReportBadBlock.java @@ -24,8 +24,8 @@ import java.util.Random; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -55,8 +55,8 @@ * replica. */ public class TestClientReportBadBlock { - private static final Log LOG = LogFactory - .getLog(TestClientReportBadBlock.class); + private static final Logger LOG = LoggerFactory + .getLogger(TestClientReportBadBlock.class); static final long BLOCK_SIZE = 64 * 1024; private static int buffersize; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java index 3e0ad6daf2c..85a4d19539a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java @@ -22,8 +22,8 @@ import java.io.IOException; import java.net.InetSocketAddress; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.client.impl.BlockReaderTestUtil; @@ -35,7 +35,7 @@ * mini-cluster. */ public class TestConnCache { - static final Log LOG = LogFactory.getLog(TestConnCache.class); + static final Logger LOG = LoggerFactory.getLogger(TestConnCache.class); static final int BLOCK_SIZE = 4096; static final int FILE_SIZE = 3 * BLOCK_SIZE; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java index c14ebb41dfa..64103b45293 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java @@ -34,8 +34,8 @@ import javax.net.SocketFactory; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileContext; @@ -67,7 +67,7 @@ public class TestDFSClientFailover { - private static final Log LOG = LogFactory.getLog(TestDFSClientFailover.class); + private static final Logger LOG = LoggerFactory.getLogger(TestDFSClientFailover.class); private static final Path TEST_FILE = new Path("/tmp/failover-test-file"); private static final int FILE_LENGTH_TO_VERIFY = 100; @@ -239,7 +239,7 @@ private NameService spyOnNameService() { List nsList = (List) f.get(null); NameService ns = nsList.get(0); - Log log = LogFactory.getLog("NameServiceSpy"); + Logger log = LoggerFactory.getLogger("NameServiceSpy"); ns = Mockito.mock(NameService.class, new GenericTestUtils.DelegateAnswer(log, ns)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java index 880e3dfc421..14d0ee00606 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java @@ -50,8 +50,8 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.CryptoProtocolVersion; import org.apache.hadoop.fs.ChecksumException; @@ -113,8 +113,8 @@ private static final String ADDRESS = "0.0.0.0"; final static private int PING_INTERVAL = 1000; final static private int MIN_SLEEP_TIME = 1000; - public static final Log LOG = - LogFactory.getLog(TestDFSClientRetries.class.getName()); + public static final Logger LOG = + LoggerFactory.getLogger(TestDFSClientRetries.class.getName()); static private Configuration conf = null; private static class TestServer extends Server { @@ -523,7 +523,7 @@ public Boolean answer(InvocationOnMock invocation) throws Throwable { stm.close(); stm = null; } finally { - IOUtils.cleanup(LOG, stm); + IOUtils.cleanupWithLogger(LOG, stm); } // Make sure the mock was actually properly injected. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSFinalize.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSFinalize.java index d0df9fecf59..bf9e4a0811f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSFinalize.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSFinalize.java @@ -25,8 +25,8 @@ import java.util.Collections; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; @@ -43,7 +43,7 @@ */ public class TestDFSFinalize { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( "org.apache.hadoop.hdfs.TestDFSFinalize"); private Configuration conf; private int testCounter = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java index 1b462a9a8ec..b0b85e75af4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FileSystem; @@ -48,7 +48,7 @@ public class TestDFSInotifyEventInputStream { private static final int BLOCK_SIZE = 1024; - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestDFSInotifyEventInputStream.class); public static EventBatch waitForNextEvents(DFSInotifyEventInputStream eis) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java index 2705e67e4fe..15ce06b69fb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java @@ -30,8 +30,8 @@ import java.util.Map; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; @@ -51,7 +51,8 @@ /** Unit tests for permission */ public class TestDFSPermission { - public static final Log LOG = LogFactory.getLog(TestDFSPermission.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestDFSPermission.class); final private static Configuration conf = new HdfsConfiguration(); final private static String GROUP1_NAME = "group1"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java index 8bc8b0df8e3..b2da68ad570 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java @@ -26,8 +26,8 @@ import java.util.Collections; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; @@ -50,7 +50,7 @@ */ public class TestDFSRollback { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( "org.apache.hadoop.hdfs.TestDFSRollback"); private Configuration conf; private int testCounter = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java index b19bdeab57a..5266fe409b3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java @@ -38,8 +38,8 @@ import com.google.common.collect.Lists; import org.apache.commons.lang3.RandomStringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.log4j.Level; import org.junit.Test; import org.apache.hadoop.conf.Configuration; @@ -87,7 +87,7 @@ * This class tests commands from DFSShell. */ public class TestDFSShell { - private static final Log LOG = LogFactory.getLog(TestDFSShell.class); + private static final Logger LOG = LoggerFactory.getLogger(TestDFSShell.class); private static final AtomicInteger counter = new AtomicInteger(); private final int SUCCESS = 0; private final int ERROR = 1; @@ -721,6 +721,14 @@ public void testErrOutPut() throws Exception { assertTrue(" -mkdir returned this is a file ", (returned.lastIndexOf("not a directory") != -1)); out.reset(); + argv[0] = "-mkdir"; + argv[1] = "/testParent/testChild"; + ret = ToolRunner.run(shell, argv); + returned = out.toString(); + assertEquals(" -mkdir returned 1", 1, ret); + assertTrue(" -mkdir returned there is No file or directory but has testChild in the path", + (returned.lastIndexOf("testChild") == -1)); + out.reset(); argv = new String[3]; argv[0] = "-mv"; argv[1] = "/testfile"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStartupVersions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStartupVersions.java index 0c09edafd20..7e7f4aa83e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStartupVersions.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStartupVersions.java @@ -24,8 +24,8 @@ import java.io.File; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; @@ -41,7 +41,7 @@ */ public class TestDFSStartupVersions { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( "org.apache.hadoop.hdfs.TestDFSStartupVersions"); private MiniDFSCluster cluster = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java index cd51631ba8c..60839dc17bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java @@ -27,8 +27,8 @@ import java.io.File; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.Storage; @@ -44,7 +44,7 @@ */ public class TestDFSStorageStateRecovery { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( "org.apache.hadoop.hdfs.TestDFSStorageStateRecovery"); private Configuration conf = null; private int testCounter = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java index 48ecf9ae5e8..57341714f0e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.Block; @@ -62,8 +62,8 @@ public class TestDFSStripedInputStream { - public static final Log LOG = - LogFactory.getLog(TestDFSStripedInputStream.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestDFSStripedInputStream.class); private MiniDFSCluster cluster; private Configuration conf = new Configuration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStreamWithRandomECPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStreamWithRandomECPolicy.java index 568b0184574..48430952525 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStreamWithRandomECPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStreamWithRandomECPolicy.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; /** @@ -28,14 +28,14 @@ public class TestDFSStripedInputStreamWithRandomECPolicy extends TestDFSStripedInputStream { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestDFSStripedInputStreamWithRandomECPolicy.class.getName()); private ErasureCodingPolicy ecPolicy; public TestDFSStripedInputStreamWithRandomECPolicy() { ecPolicy = StripedFileTestUtil.getRandomNonDefaultECPolicy(); - LOG.info(ecPolicy); + LOG.info("{}", ecPolicy.toString()); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java index 4b9e8763880..865a736b0cc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java @@ -25,8 +25,8 @@ import java.io.InputStream; import java.util.ArrayList; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; @@ -46,7 +46,7 @@ import org.junit.rules.Timeout; public class TestDFSStripedOutputStream { - public static final Log LOG = LogFactory.getLog( + public static final Logger LOG = LoggerFactory.getLogger( TestDFSStripedOutputStream.class); static { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java index 800fac1e798..ff521463f6d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java @@ -244,26 +244,29 @@ public void testCloseWithExceptionsInStreamer() throws Exception { // Full stripe and a partial on non-cell boundary (cellSize * dataBlocks) + 123, }; - try { - for (int length: fileLengths) { - // select the two DNs with partial block to kill - final int[] dnIndex = {dataBlocks - 2, dataBlocks - 1}; - final int[] killPos = getKillPositions(length, dnIndex.length); - try { - LOG.info("runTestWithMultipleFailure2: length==" + length - + ", killPos=" + Arrays.toString(killPos) - + ", dnIndex=" + Arrays.toString(dnIndex)); - setup(conf); - runTest(length, killPos, dnIndex, false); - } catch (Throwable e) { - final String err = "failed, killPos=" + Arrays.toString(killPos) - + ", dnIndex=" + Arrays.toString(dnIndex) + ", length=" + length; - LOG.error(err); - throw e; - } + // select the two DNs with partial block to kill + int[] dnIndex = null; + if (parityBlocks > 1) { + dnIndex = new int[] {dataBlocks - 2, dataBlocks - 1}; + } else { + dnIndex = new int[] {dataBlocks - 1}; + } + for (int length : fileLengths) { + final int[] killPos = getKillPositions(length, dnIndex.length); + try { + LOG.info("runTestWithMultipleFailure2: length==" + length + ", killPos=" + + Arrays.toString(killPos) + ", dnIndex=" + + Arrays.toString(dnIndex)); + setup(conf); + runTest(length, killPos, dnIndex, false); + } catch (Throwable e) { + final String err = "failed, killPos=" + Arrays.toString(killPos) + + ", dnIndex=" + Arrays.toString(dnIndex) + ", length=" + length; + LOG.error(err); + throw e; + } finally { + tearDown(); } - } finally { - tearDown(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailureWithRandomECPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailureWithRandomECPolicy.java index f6711498cc4..cfa7ad5d0c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailureWithRandomECPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailureWithRandomECPolicy.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.io.erasurecode.ECSchema; /** @@ -30,12 +30,12 @@ private final ECSchema schema; - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestDFSStripedOutputStreamWithRandomECPolicy.class.getName()); public TestDFSStripedOutputStreamWithFailureWithRandomECPolicy() { schema = StripedFileTestUtil.getRandomNonDefaultECPolicy().getSchema(); - LOG.info(schema); + LOG.info("{}", schema.toString()); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithRandomECPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithRandomECPolicy.java index 9a783cdabd2..eddfda5fcae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithRandomECPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithRandomECPolicy.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; /** @@ -28,14 +28,14 @@ public class TestDFSStripedOutputStreamWithRandomECPolicy extends TestDFSStripedOutputStream { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestDFSStripedOutputStreamWithRandomECPolicy.class.getName()); private ErasureCodingPolicy ecPolicy; public TestDFSStripedOutputStreamWithRandomECPolicy() { ecPolicy = StripedFileTestUtil.getRandomNonDefaultECPolicy(); - LOG.info(ecPolicy); + LOG.info("{}", ecPolicy.toString()); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java index 0d9f50258f3..1c33cc4b5fb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java @@ -31,8 +31,8 @@ import java.io.IOException; import java.util.regex.Pattern; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; @@ -62,7 +62,8 @@ // TODO: Avoid hard-coding expected_txid. The test should be more robust. private static final int EXPECTED_TXID = 61; - private static final Log LOG = LogFactory.getLog(TestDFSUpgrade.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(TestDFSUpgrade.class.getName()); private Configuration conf; private int testCounter = 0; private MiniDFSCluster cluster = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java index e42e08cf77e..5469ebbb757 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java @@ -29,8 +29,7 @@ import java.util.TreeMap; import java.util.zip.CRC32; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FileStatus; @@ -65,8 +64,8 @@ */ public class TestDFSUpgradeFromImage { - private static final Log LOG = LogFactory - .getLog(TestDFSUpgradeFromImage.class); + private static final org.slf4j.Logger LOG = LoggerFactory + .getLogger(TestDFSUpgradeFromImage.class); private static final File TEST_ROOT_DIR = new File(MiniDFSCluster.getBaseDirectory()); private static final String HADOOP_DFS_DIR_TXT = "hadoop-dfs-dir.txt"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataStream.java index 3351b68373c..c57ef941f0e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataStream.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.util.Random; -import org.apache.commons.logging.LogFactory; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; @@ -48,8 +48,8 @@ public static void setup() throws IOException { @Test(timeout = 60000) public void testDfsClient() throws IOException, InterruptedException { - LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(LogFactory - .getLog(DataStreamer.class)); + LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(LoggerFactory + .getLogger(DataStreamer.class)); byte[] toWrite = new byte[PACKET_SIZE]; new Random(1).nextBytes(toWrite); final Path path = new Path("/file1"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferProtocol.java index 7a2ac1ba3f0..b9da5f446f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferProtocol.java @@ -33,8 +33,8 @@ import java.nio.ByteBuffer; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -73,7 +73,7 @@ */ public class TestDataTransferProtocol { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( "org.apache.hadoop.hdfs.TestDataTransferProtocol"); private static final DataChecksum DEFAULT_CHECKSUM = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeRegistration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeRegistration.java index 6421e8b42a9..37042dbb176 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeRegistration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeRegistration.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.client.BlockReportOptions; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -56,7 +56,8 @@ */ public class TestDatanodeRegistration { - public static final Log LOG = LogFactory.getLog(TestDatanodeRegistration.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestDatanodeRegistration.class); private static class MonitorDNS extends SecurityManager { int lookups = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeReport.java index fea377f2a4a..b4d6fc99503 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeReport.java @@ -26,8 +26,8 @@ import java.util.Comparator; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; @@ -49,7 +49,7 @@ * This test ensures the all types of data node report work correctly. */ public class TestDatanodeReport { - static final Log LOG = LogFactory.getLog(TestDatanodeReport.class); + static final Logger LOG = LoggerFactory.getLogger(TestDatanodeReport.class); final static private Configuration conf = new HdfsConfiguration(); final static private int NUM_OF_DATANODES = 4; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java index 42b4257d710..bd266ed9053 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java @@ -38,7 +38,7 @@ import com.google.common.base.Supplier; import com.google.common.collect.Lists; -import org.apache.commons.lang3.text.StrBuilder; +import org.apache.commons.text.TextStringBuilder; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; @@ -661,7 +661,7 @@ public void testDecommissionWithOpenfile() } private static String scanIntoString(final ByteArrayOutputStream baos) { - final StrBuilder sb = new StrBuilder(); + final TextStringBuilder sb = new TextStringBuilder(); final Scanner scanner = new Scanner(baos.toString()); while (scanner.hasNextLine()) { sb.appendln(scanner.nextLine()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java index c9d831a29ab..51a28d294f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java @@ -19,8 +19,8 @@ import static org.junit.Assert.assertEquals; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; @@ -32,7 +32,7 @@ * mini-cluster. */ public class TestDisableConnCache { - static final Log LOG = LogFactory.getLog(TestDisableConnCache.class); + static final Logger LOG = LoggerFactory.getLogger(TestDisableConnCache.class); static final int BLOCK_SIZE = 4096; static final int FILE_SIZE = 3 * BLOCK_SIZE; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index f09255e525b..cae0fbf0191 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -100,12 +100,12 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.concurrent.HadoopExecutors; -import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; import org.mockito.InOrder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.slf4j.event.Level; public class TestDistributedFileSystem { private static final Random RAN = new Random(); @@ -113,7 +113,8 @@ TestDistributedFileSystem.class); static { - GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.TRACE); + GenericTestUtils.setLogLevel(LeaseRenewer.LOG, Level.DEBUG); } private boolean dualPortTesting = false; @@ -706,6 +707,7 @@ public void testStatistics() throws IOException { // Iterative ls test long mkdirOp = getOpStatistics(OpType.MKDIRS); long listStatusOp = getOpStatistics(OpType.LIST_STATUS); + long locatedListStatusOP = getOpStatistics(OpType.LIST_LOCATED_STATUS); for (int i = 0; i < 10; i++) { Path p = new Path(dir, Integer.toString(i)); fs.mkdirs(p); @@ -729,6 +731,12 @@ public void testStatistics() throws IOException { checkStatistics(fs, readOps, ++writeOps, largeReadOps); checkOpStatistics(OpType.MKDIRS, mkdirOp); checkOpStatistics(OpType.LIST_STATUS, listStatusOp); + + fs.listLocatedStatus(dir); + locatedListStatusOP++; + readOps++; + checkStatistics(fs, readOps, writeOps, largeReadOps); + checkOpStatistics(OpType.LIST_LOCATED_STATUS, locatedListStatusOP); } opCount = getOpStatistics(OpType.GET_STATUS); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptedTransfer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptedTransfer.java index 27a5b77bcfd..59230edeb8d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptedTransfer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptedTransfer.java @@ -33,8 +33,8 @@ import java.util.concurrent.TimeoutException; import com.google.common.base.Supplier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -84,7 +84,8 @@ return params; } - private static final Log LOG = LogFactory.getLog(TestEncryptedTransfer.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestEncryptedTransfer.class); private static final String PLAIN_TEXT = "this is very secret plain text"; private static final Path TEST_PATH = new Path("/non-encrypted-file"); @@ -167,9 +168,9 @@ private void testEncryptedRead(String algorithm, String cipherSuite, FileChecksum checksum = writeUnencryptedAndThenRestartEncryptedCluster(); LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs( - LogFactory.getLog(SaslDataTransferServer.class)); + LoggerFactory.getLogger(SaslDataTransferServer.class)); LogCapturer logs1 = GenericTestUtils.LogCapturer.captureLogs( - LogFactory.getLog(DataTransferSaslUtil.class)); + LoggerFactory.getLogger(DataTransferSaslUtil.class)); try { assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH)); assertEquals(checksum, fs.getFileChecksum(TEST_PATH)); @@ -238,7 +239,7 @@ public void testClientThatDoesNotSupportEncryption() throws IOException { DFSClientAdapter.setDFSClient((DistributedFileSystem) fs, spyClient); LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs( - LogFactory.getLog(DataNode.class)); + LoggerFactory.getLogger(DataNode.class)); try { assertEquals(PLAIN_TEXT, DFSTestUtil.readFile(fs, TEST_PATH)); if (resolverClazz != null && @@ -458,9 +459,9 @@ private void testEncryptedWrite(int numDns) throws IOException { fs = getFileSystem(conf); LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs( - LogFactory.getLog(SaslDataTransferServer.class)); + LoggerFactory.getLogger(SaslDataTransferServer.class)); LogCapturer logs1 = GenericTestUtils.LogCapturer.captureLogs( - LogFactory.getLog(DataTransferSaslUtil.class)); + LoggerFactory.getLogger(DataTransferSaslUtil.class)); try { writeTestDataToFile(fs); } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java index 7d97cce0b90..835d18f3a08 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java @@ -19,6 +19,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -153,6 +154,19 @@ public void testReplicatedFileUnderECDir() throws IOException { assertNotNull(files[1].getErasureCodingPolicy()); } + @Test + public void testContentSummaryOfECSubdir() throws IOException { + final Path testDir = new Path("/ec"); + fs.mkdir(testDir, FsPermission.getDirDefault()); + fs.setErasureCodingPolicy(testDir, ecPolicy.getName()); + final Path fPath = new Path("ec/file"); + fs.create(fPath).close(); + final Path subdir = new Path("/ec/sub"); + fs.mkdir(subdir, FsPermission.getDirDefault()); + ContentSummary contentSummary = fs.getContentSummary(subdir); + assertEquals(ecPolicy.getName(),contentSummary.getErasureCodingPolicy()); + } + @Test public void testBasicSetECPolicy() throws IOException, InterruptedException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java index 8acf4bf2868..85e25dd687a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs; import com.google.common.primitives.Ints; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; @@ -39,8 +39,8 @@ import java.util.UUID; public class TestExternalBlockReader { - private static final Log LOG = - LogFactory.getLog(TestExternalBlockReader.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestExternalBlockReader.class); private static long SEED = 1234; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend4.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend4.java index 40bc314dee1..ff0fd1964b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend4.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend4.java @@ -32,8 +32,8 @@ import java.util.List; import java.util.concurrent.atomic.AtomicReference; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -57,7 +57,7 @@ * using append()/sync() to recover block information */ public class TestFileAppend4 { - static final Log LOG = LogFactory.getLog(TestFileAppend4.class); + static final Logger LOG = LoggerFactory.getLogger(TestFileAppend4.class); static final long BLOCK_SIZE = 1024; static final long BBW_SIZE = 500; // don't align on bytes/checksum diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSServerPorts.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSServerPorts.java index 12d92538a92..c26c648fd9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSServerPorts.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSServerPorts.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -50,7 +50,8 @@ * a free port and start on it. */ public class TestHDFSServerPorts { - public static final Log LOG = LogFactory.getLog(TestHDFSServerPorts.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestHDFSServerPorts.class); // reset default 0.0.0.0 addresses in order to avoid IPv6 problem static final String THIS_HOST = getFullHostName() + ":0"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSTrash.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSTrash.java index b81cdb13205..fb28726b63a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSTrash.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSTrash.java @@ -24,8 +24,8 @@ import java.io.IOException; import java.util.UUID; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; @@ -46,7 +46,7 @@ */ public class TestHDFSTrash { - public static final Log LOG = LogFactory.getLog(TestHDFSTrash.class); + public static final Logger LOG = LoggerFactory.getLogger(TestHDFSTrash.class); private static MiniDFSCluster cluster = null; private static FileSystem fs; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestInjectionForSimulatedStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestInjectionForSimulatedStorage.java index e49cf5838cb..a8affa26273 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestInjectionForSimulatedStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestInjectionForSimulatedStorage.java @@ -26,8 +26,8 @@ import java.util.Map; import java.util.Set; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.Block; @@ -50,7 +50,7 @@ private final int numBlocks = 4; private final int filesize = blockSize*numBlocks; private final int numDataNodes = 4; - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( "org.apache.hadoop.hdfs.TestInjectionForSimulatedStorage"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLargeBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLargeBlock.java index a37da35a893..137571cac43 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLargeBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLargeBlock.java @@ -22,8 +22,8 @@ import java.io.IOException; import java.util.Arrays; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; @@ -45,11 +45,13 @@ GenericTestUtils.setLogLevel(TestLargeBlock.LOG, Level.ALL); } */ - private static final Log LOG = LogFactory.getLog(TestLargeBlock.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestLargeBlock.class); // should we verify the data read back from the file? (slow) static final boolean verifyData = true; static final byte[] pattern = { 'D', 'E', 'A', 'D', 'B', 'E', 'E', 'F'}; + static final int numDatanodes = 3; // creates a file static FSDataOutputStream createFile(FileSystem fileSys, Path name, int repl, @@ -158,7 +160,7 @@ static void checkFullFile(FileSystem fs, Path name, final long fileSize) * timeout here. * @throws IOException in case of errors */ - @Test (timeout = 900000) + @Test (timeout = 1800000) public void testLargeBlockSize() throws IOException { final long blockSize = 2L * 1024L * 1024L * 1024L + 512L; // 2GB + 512B runTest(blockSize); @@ -175,7 +177,8 @@ public void runTest(final long blockSize) throws IOException { final long fileSize = blockSize + 1L; Configuration conf = new Configuration(); - MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(numDatanodes).build(); FileSystem fs = cluster.getFileSystem(); try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java index fa74fadd892..381fe7148b6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java @@ -31,8 +31,8 @@ import java.io.IOException; import java.security.PrivilegedExceptionAction; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.CryptoProtocolVersion; import org.apache.hadoop.fs.CreateFlag; @@ -69,7 +69,7 @@ static int leaseCount(MiniDFSCluster cluster) { static final String dirString = "/test/lease"; final Path dir = new Path(dirString); - static final Log LOG = LogFactory.getLog(TestLease.class); + static final Logger LOG = LoggerFactory.getLogger(TestLease.class); final Configuration conf = new HdfsConfiguration(); @Test diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java index a96d8b3b431..d65fed2b934 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java @@ -25,12 +25,13 @@ import static org.mockito.Mockito.spy; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import com.google.common.base.Supplier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; @@ -61,7 +62,8 @@ public class TestLeaseRecovery2 { - public static final Log LOG = LogFactory.getLog(TestLeaseRecovery2.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestLeaseRecovery2.class); { GenericTestUtils.setLogLevel(DataNode.LOG, Level.TRACE); @@ -163,6 +165,70 @@ public void testImmediateRecoveryOfLease() throws Exception { verifyFile(dfs, filepath1, actual, size); } + @Test + public void testCloseWhileRecoverLease() throws Exception { + // test recoverLease + // set the soft limit to be 1 hour but recoverLease should + // close the file immediately + cluster.setLeasePeriod(LONG_LEASE_PERIOD, LONG_LEASE_PERIOD); + int size = AppendTestUtil.nextInt(FILE_SIZE); + String filestr = "/testCloseWhileRecoverLease"; + + AppendTestUtil.LOG.info("filestr=" + filestr); + Path filepath = new Path(filestr); + FSDataOutputStream stm = dfs.create(filepath, true, BUF_SIZE, + REPLICATION_NUM, BLOCK_SIZE); + assertTrue(dfs.dfs.exists(filestr)); + + // hflush file + AppendTestUtil.LOG.info("hflush"); + stm.hflush(); + + // Pause DN block report. + // Let client recover lease, and then close the file, and then let DN + // report blocks. + ArrayList dataNodes = cluster.getDataNodes(); + for (DataNode dn: dataNodes) { + DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, false); + } + + LOG.info("pause IBR"); + for (DataNode dn: dataNodes) { + DataNodeTestUtils.pauseIBR(dn); + } + + AppendTestUtil.LOG.info("size=" + size); + stm.write(buffer, 0, size); + + // hflush file + AppendTestUtil.LOG.info("hflush"); + stm.hflush(); + + LOG.info("recover lease"); + dfs.recoverLease(filepath); + try { + stm.close(); + fail("close() should fail because the file is under recovery."); + } catch (IOException ioe) { + GenericTestUtils.assertExceptionContains( + "whereas it is under recovery", ioe); + } + + for (DataNode dn: dataNodes) { + DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, false); + } + + LOG.info("trigger heartbeats"); + // resume DN block report + for (DataNode dn: dataNodes) { + DataNodeTestUtils.triggerHeartbeat(dn); + } + + stm.close(); + assertEquals(cluster.getNamesystem().getBlockManager(). + getMissingBlocksCount(), 0); + } + @Test public void testLeaseRecoverByAnotherUser() throws Exception { byte [] actual = new byte[FILE_SIZE]; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java index ca2fe92ea2c..f0dd5a0bc38 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSDataInputStream; @@ -44,8 +44,8 @@ */ public class TestMissingBlocksAlert { - private static final Log LOG = - LogFactory.getLog(TestMissingBlocksAlert.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestMissingBlocksAlert.class); @Test public void testMissingBlocksAlert() diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java index d54164fc3c0..a3f4dbc7b85 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java @@ -25,8 +25,8 @@ import java.nio.ByteBuffer; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.impl.BlockReaderTestUtil; import org.apache.hadoop.hdfs.server.datanode.DataNode; @@ -46,7 +46,7 @@ @Ignore public class TestParallelReadUtil { - static final Log LOG = LogFactory.getLog(TestParallelReadUtil.class); + static final Logger LOG = LoggerFactory.getLogger(TestParallelReadUtil.class); static BlockReaderTestUtil util = null; static DFSClient dfsClient = null; static final int FILE_SIZE_K = 256; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPipelines.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPipelines.java index 5804d35fef7..7125b0e7c84 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPipelines.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPipelines.java @@ -24,8 +24,8 @@ import java.util.List; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; @@ -41,7 +41,7 @@ import org.junit.Test; public class TestPipelines { - public static final Log LOG = LogFactory.getLog(TestPipelines.class); + public static final Logger LOG = LoggerFactory.getLogger(TestPipelines.class); private static final short REPL_FACTOR = 3; private static final int RAND_LIMIT = 2000; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadStripedFileWithMissingBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadStripedFileWithMissingBlocks.java index 756adbe0ef1..4c864b384a4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadStripedFileWithMissingBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadStripedFileWithMissingBlocks.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.Path; @@ -38,8 +38,8 @@ * in the block locations returned by the NameNode). */ public class TestReadStripedFileWithMissingBlocks { - public static final Log LOG = LogFactory - .getLog(TestReadStripedFileWithMissingBlocks.class); + public static final Logger LOG = LoggerFactory + .getLogger(TestReadStripedFileWithMissingBlocks.class); private MiniDFSCluster cluster; private DistributedFileSystem fs; private DFSClient dfsClient; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java index 2adddb6156c..2abfff7876c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java @@ -34,8 +34,8 @@ import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; @@ -66,7 +66,8 @@ import org.junit.Test; public class TestReconstructStripedFile { - public static final Log LOG = LogFactory.getLog(TestReconstructStripedFile.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestReconstructStripedFile.class); private ErasureCodingPolicy ecPolicy; private int dataBlkNum; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplaceDatanodeFailureReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplaceDatanodeFailureReplication.java index 9591cb4347b..432a29794c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplaceDatanodeFailureReplication.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplaceDatanodeFailureReplication.java @@ -21,8 +21,8 @@ import java.util.Arrays; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; @@ -47,8 +47,8 @@ * if a replacement could not be found. */ public class TestReplaceDatanodeFailureReplication { - static final Log LOG = LogFactory - .getLog(TestReplaceDatanodeFailureReplication.class); + static final Logger LOG = LoggerFactory + .getLogger(TestReplaceDatanodeFailureReplication.class); static final String DIR = "/" + TestReplaceDatanodeFailureReplication.class.getSimpleName() + "/"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplaceDatanodeOnFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplaceDatanodeOnFailure.java index aa5c70faa5c..2e455f7d346 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplaceDatanodeOnFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplaceDatanodeOnFailure.java @@ -23,8 +23,8 @@ import java.util.Arrays; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -45,7 +45,8 @@ * This class tests that data nodes are correctly replaced on failure. */ public class TestReplaceDatanodeOnFailure { - static final Log LOG = LogFactory.getLog(TestReplaceDatanodeOnFailure.class); + static final Logger LOG = + LoggerFactory.getLogger(TestReplaceDatanodeOnFailure.class); static final String DIR = "/" + TestReplaceDatanodeOnFailure.class.getSimpleName() + "/"; static final short REPLICATION = 3; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplication.java index 0f1bedd02e3..7c5a0ced3b8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplication.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplication.java @@ -33,8 +33,8 @@ import java.util.List; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; @@ -78,8 +78,8 @@ "/d1/r1", "/d1/r1", "/d1/r2", "/d1/r2", "/d1/r2", "/d2/r3", "/d2/r3" }; private static final int numDatanodes = racks.length; - private static final Log LOG = LogFactory.getLog( - "org.apache.hadoop.hdfs.TestReplication"); + private static final Logger LOG = LoggerFactory.getLogger( + TestReplication.class); /* check if there are at least two nodes are on the same rack */ private void checkFile(FileSystem fileSys, Path name, int repl) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java index 0545b040f3d..a6b26065d19 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java @@ -32,8 +32,8 @@ import javax.management.ReflectionException; import javax.management.openmbean.CompositeDataSupport; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -66,7 +66,8 @@ * This class tests rolling upgrade. */ public class TestRollingUpgrade { - private static final Log LOG = LogFactory.getLog(TestRollingUpgrade.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestRollingUpgrade.class); public static void runCmd(DFSAdmin dfsadmin, boolean success, String... args) throws Exception { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java index f25d28f22c3..0fde81ecdfe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java @@ -29,8 +29,8 @@ import java.security.PrivilegedExceptionAction; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -66,7 +66,7 @@ * Tests to verify safe mode correctness. */ public class TestSafeMode { - public static final Log LOG = LogFactory.getLog(TestSafeMode.class); + public static final Logger LOG = LoggerFactory.getLogger(TestSafeMode.class); private static final Path TEST_PATH = new Path("/test"); private static final int BLOCK_SIZE = 1024; private static final String NEWLINE = System.getProperty("line.separator"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestUnsetAndChangeDirectoryEcPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestUnsetAndChangeDirectoryEcPolicy.java index 52cf163e26d..4f33ce75962 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestUnsetAndChangeDirectoryEcPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestUnsetAndChangeDirectoryEcPolicy.java @@ -17,12 +17,13 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.hdfs.protocol.NoECPolicySetException; import org.apache.hadoop.io.erasurecode.CodecUtil; import org.apache.hadoop.io.erasurecode.ErasureCodeNative; import org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawErasureCoderFactory; @@ -44,8 +45,8 @@ */ public class TestUnsetAndChangeDirectoryEcPolicy { - public static final Log LOG = - LogFactory.getLog(TestUnsetAndChangeDirectoryEcPolicy.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestUnsetAndChangeDirectoryEcPolicy.class); private MiniDFSCluster cluster; private Configuration conf = new Configuration(); @@ -98,7 +99,11 @@ public void testUnsetEcPolicy() throws Exception { fs.mkdirs(dirPath); // Test unset a directory which has no EC policy - fs.unsetErasureCodingPolicy(dirPath); + try { + fs.unsetErasureCodingPolicy(dirPath); + fail(); + } catch (NoECPolicySetException e) { + } // Set EC policy on directory fs.setErasureCodingPolicy(dirPath, ecPolicy.getName()); @@ -126,8 +131,8 @@ public void testUnsetEcPolicy() throws Exception { } /* - * Test nested directory with different EC policy. - */ + * Test nested directory with different EC policy. + */ @Test public void testNestedEcPolicy() throws Exception { final int numBlocks = 1; @@ -199,7 +204,11 @@ public void testUnsetRootDirEcPolicy() throws Exception { final Path replicateFilePath = new Path(rootPath, "rep_file"); // Test unset root path which has no EC policy - fs.unsetErasureCodingPolicy(rootPath); + try { + fs.unsetErasureCodingPolicy(rootPath); + fail(); + } catch (NoECPolicySetException e) { + } // Set EC policy on root path fs.setErasureCodingPolicy(rootPath, ecPolicy.getName()); DFSTestUtil.createFile(fs, ecFilePath, fileLen, (short) 1, 0L); @@ -238,7 +247,11 @@ public void testChangeRootDirEcPolicy() throws Exception { final ErasureCodingPolicy ec32Policy = SystemErasureCodingPolicies .getByID(SystemErasureCodingPolicies.RS_3_2_POLICY_ID); - fs.unsetErasureCodingPolicy(rootPath); + try { + fs.unsetErasureCodingPolicy(rootPath); + fail(); + } catch (NoECPolicySetException e) { + } fs.setErasureCodingPolicy(rootPath, ecPolicy.getName()); // Create RS(6,3) EC policy file DFSTestUtil.createFile(fs, ec63FilePath, fileLen, (short) 1, 0L); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteRead.java index 623dafff8b9..3a9065a152f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteRead.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteRead.java @@ -21,8 +21,8 @@ import java.io.IOException; import java.util.EnumSet; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; @@ -64,7 +64,8 @@ private boolean truncateOption = false; private final boolean abortTestOnFailure = true; - static private Log LOG = LogFactory.getLog(TestWriteRead.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestWriteRead.class); @Before public void initJunitModeTest() throws Exception { @@ -95,7 +96,6 @@ public void shutdown() { // Equivalence of @Before for cluster mode testing. private void initClusterModeTest() throws IOException { - LOG = LogFactory.getLog(TestWriteRead.class); LOG.info("initClusterModeTest"); conf = new Configuration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java index 805bcea85bb..49f578a823e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.event.Level; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileSystem; @@ -32,7 +32,6 @@ import org.apache.hadoop.hdfs.web.WebHdfsTestUtil; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -46,7 +45,8 @@ import java.util.Random; public class TestWriteReadStripedFile { - public static final Log LOG = LogFactory.getLog(TestWriteReadStripedFile.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestWriteReadStripedFile.class); private final ErasureCodingPolicy ecPolicy = SystemErasureCodingPolicies.getByID( SystemErasureCodingPolicies.RS_3_2_POLICY_ID); @@ -63,11 +63,10 @@ private Configuration conf = new HdfsConfiguration(); static { - GenericTestUtils.setLogLevel(DFSOutputStream.LOG, Level.ALL); - GenericTestUtils.setLogLevel(DataStreamer.LOG, Level.ALL); - GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); - ((Log4JLogger)LogFactory.getLog(BlockPlacementPolicy.class)) - .getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DFSOutputStream.LOG, Level.TRACE); + GenericTestUtils.setLogLevel(DataStreamer.LOG, Level.TRACE); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.TRACE); + GenericTestUtils.setLogLevel(BlockPlacementPolicy.LOG, Level.TRACE); } @Rule diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteStripedFileWithFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteStripedFileWithFailure.java index c859b710976..76893615f99 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteStripedFileWithFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteStripedFileWithFailure.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -34,8 +34,8 @@ import java.util.concurrent.atomic.AtomicInteger; public class TestWriteStripedFileWithFailure { - public static final Log LOG = LogFactory - .getLog(TestWriteStripedFileWithFailure.class); + public static final Logger LOG = LoggerFactory + .getLogger(TestWriteStripedFileWithFailure.class); private MiniDFSCluster cluster; private FileSystem fs; private Configuration conf = new HdfsConfiguration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java index 26d96b2309e..42b1928e46e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.net; import com.google.common.collect.Sets; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.DFSTestUtil; @@ -45,8 +45,8 @@ * DFSNetworkTopology. */ public class TestDFSNetworkTopology { - private static final Log LOG = - LogFactory.getLog(TestDFSNetworkTopology.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDFSNetworkTopology.class); private final static DFSNetworkTopology CLUSTER = DFSNetworkTopology.getInstance(new Configuration()); private DatanodeDescriptor[] dataNodes; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/TestLocatedBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/TestLocatedBlock.java index e349da22947..3546c89938f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/TestLocatedBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/TestLocatedBlock.java @@ -18,15 +18,16 @@ package org.apache.hadoop.hdfs.protocol; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.junit.Test; import static org.junit.Assert.fail; public class TestLocatedBlock { - public static final Log LOG = LogFactory.getLog(TestLocatedBlock.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestLocatedBlock.class); @Test(timeout = 10000) public void testAddCachedLocWhenEmpty() { @@ -43,4 +44,4 @@ public void testAddCachedLocWhenEmpty() { LOG.info("Expected exception:", e); } } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java index efbc1d846c5..d6612c1c709 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java @@ -34,7 +34,7 @@ import java.net.SocketTimeoutException; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.commons.logging.LogFactory; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; @@ -139,7 +139,7 @@ public void testServerSaslNoClientSasl() throws Exception { clientConf.set(DFS_DATA_TRANSFER_PROTECTION_KEY, ""); LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs( - LogFactory.getLog(DataNode.class)); + LoggerFactory.getLogger(DataNode.class)); try { doTest(clientConf); Assert.fail("Should fail if SASL data transfer protection is not " + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java index f936d75f06f..b81b710c009 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java @@ -29,8 +29,8 @@ import java.util.concurrent.TimeoutException; import com.google.common.base.Supplier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -92,7 +92,8 @@ private JNInfo(JournalNode node) { } } - private static final Log LOG = LogFactory.getLog(MiniJournalCluster.class); + private static final Logger LOG = + LoggerFactory.getLogger(MiniJournalCluster.class); private final File baseDir; private final JNInfo[] nodes; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java index f1f74dcb149..6a68bd43312 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.qjournal; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -38,7 +38,8 @@ private MiniDFSCluster cluster; private MiniJournalCluster journalCluster; private final Configuration conf; - private static final Log LOG = LogFactory.getLog(MiniQJMHACluster.class); + private static final Logger LOG = + LoggerFactory.getLogger(MiniQJMHACluster.class); public static final String NAMESERVICE = "ns1"; private static final Random RANDOM = new Random(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestSecureNNWithQJM.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestSecureNNWithQJM.java index 18adc4ecff8..8e8bb22229a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestSecureNNWithQJM.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestSecureNNWithQJM.java @@ -17,8 +17,6 @@ */ package org.apache.hadoop.hdfs.qjournal; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import static org.junit.Assert.*; import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_KEY; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestEpochsAreUnique.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestEpochsAreUnique.java index 5101a41f0e5..a8099cd46c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestEpochsAreUnique.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestEpochsAreUnique.java @@ -24,8 +24,8 @@ import java.net.URI; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster; import org.apache.hadoop.hdfs.qjournal.client.AsyncLogger; @@ -41,7 +41,8 @@ public class TestEpochsAreUnique { - private static final Log LOG = LogFactory.getLog(TestEpochsAreUnique.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestEpochsAreUnique.class); private static final String JID = "testEpochsAreUnique-jid"; private static final NamespaceInfo FAKE_NSINFO = new NamespaceInfo( 12345, "mycluster", "my-bp", 0L); @@ -56,7 +57,7 @@ public void testSingleThreaded() throws IOException { QuorumJournalManager qjm = new QuorumJournalManager( conf, uri, FAKE_NSINFO); try { - qjm.format(FAKE_NSINFO); + qjm.format(FAKE_NSINFO, false); } finally { qjm.close(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestIPCLoggerChannel.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestIPCLoggerChannel.java index ba51372222f..d64968651ca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestIPCLoggerChannel.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestIPCLoggerChannel.java @@ -24,8 +24,8 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.qjournal.client.IPCLoggerChannel; @@ -43,7 +43,7 @@ import com.google.common.base.Supplier; public class TestIPCLoggerChannel { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestIPCLoggerChannel.class); private final Configuration conf = new Configuration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java index 6ad43f5835e..946358c7a61 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java @@ -37,8 +37,8 @@ import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster; @@ -67,7 +67,7 @@ public class TestQJMWithFaults { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestQJMWithFaults.class); private static final String RAND_SEED_PROPERTY = @@ -105,7 +105,7 @@ private static long determineMaxIpcNumber() throws Exception { long ret; try { qjm = createInjectableQJM(cluster); - qjm.format(FAKE_NSINFO); + qjm.format(FAKE_NSINFO, false); doWorkload(cluster, qjm); SortedSet ipcCounts = Sets.newTreeSet(); @@ -156,7 +156,7 @@ public void testRecoverAfterDoubleFailures() throws Exception { QuorumJournalManager qjm = null; try { qjm = createInjectableQJM(cluster); - qjm.format(FAKE_NSINFO); + qjm.format(FAKE_NSINFO, false); List loggers = qjm.getLoggerSetForTests().getLoggersForTests(); failIpcNumber(loggers.get(0), failA); failIpcNumber(loggers.get(1), failB); @@ -240,7 +240,7 @@ public void testRandomized() throws Exception { // Format the cluster using a non-faulty QJM. QuorumJournalManager qjmForInitialFormat = createInjectableQJM(cluster); - qjmForInitialFormat.format(FAKE_NSINFO); + qjmForInitialFormat.format(FAKE_NSINFO, false); qjmForInitialFormat.close(); try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java index 69856ae3fa9..f7c3a274047 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java @@ -40,8 +40,8 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster; @@ -72,7 +72,7 @@ * For true unit tests, see {@link TestQuorumJournalManagerUnit}. */ public class TestQuorumJournalManager { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestQuorumJournalManager.class); private MiniJournalCluster cluster; @@ -100,7 +100,7 @@ public void setup() throws Exception { qjm = createSpyingQJM(); spies = qjm.getLoggerSetForTests().getLoggersForTests(); - qjm.format(QJMTestUtil.FAKE_NSINFO); + qjm.format(QJMTestUtil.FAKE_NSINFO, false); qjm.recoverUnfinalizedSegments(); assertEquals(1, qjm.getLoggerSetForTests().getEpoch()); } @@ -108,7 +108,7 @@ public void setup() throws Exception { @After public void shutdown() throws IOException, InterruptedException, TimeoutException { - IOUtils.cleanup(LOG, toClose.toArray(new Closeable[0])); + IOUtils.cleanupWithLogger(LOG, toClose.toArray(new Closeable[0])); // Should not leak clients between tests -- this can cause flaky tests. // (See HDFS-4643) @@ -149,7 +149,7 @@ public void testFormat() throws Exception { QuorumJournalManager qjm = closeLater(new QuorumJournalManager( conf, cluster.getQuorumJournalURI("testFormat-jid"), FAKE_NSINFO)); assertFalse(qjm.hasSomeData()); - qjm.format(FAKE_NSINFO); + qjm.format(FAKE_NSINFO, false); assertTrue(qjm.hasSomeData()); } @@ -172,7 +172,7 @@ public void testReaderWhileAnotherWrites() throws Exception { verifyEdits(streams, 1, 3); assertNull(stream.readOp()); } finally { - IOUtils.cleanup(LOG, streams.toArray(new Closeable[0])); + IOUtils.cleanupWithLogger(LOG, streams.toArray(new Closeable[0])); streams.clear(); } @@ -187,7 +187,7 @@ public void testReaderWhileAnotherWrites() throws Exception { assertEquals(3, stream.getLastTxId()); verifyEdits(streams, 1, 3); } finally { - IOUtils.cleanup(LOG, streams.toArray(new Closeable[0])); + IOUtils.cleanupWithLogger(LOG, streams.toArray(new Closeable[0])); streams.clear(); } @@ -205,7 +205,7 @@ public void testReaderWhileAnotherWrites() throws Exception { verifyEdits(streams, 1, 6); } finally { - IOUtils.cleanup(LOG, streams.toArray(new Closeable[0])); + IOUtils.cleanupWithLogger(LOG, streams.toArray(new Closeable[0])); streams.clear(); } } @@ -234,7 +234,7 @@ public void testOneJNMissingSegments() throws Exception { readerQjm.selectInputStreams(streams, 1, false); verifyEdits(streams, 1, 9); } finally { - IOUtils.cleanup(LOG, streams.toArray(new Closeable[0])); + IOUtils.cleanupWithLogger(LOG, streams.toArray(new Closeable[0])); readerQjm.close(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java index 75dcf2fbda8..ebd1b15b4ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java @@ -19,6 +19,7 @@ import static org.junit.Assert.fail; import static org.mockito.Matchers.anyLong; +import static org.mockito.Matchers.anyBoolean; import static org.mockito.Matchers.eq; import java.io.IOException; @@ -37,7 +38,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; @@ -55,7 +56,7 @@ */ public class TestQuorumJournalManagerUnit { static { - GenericTestUtils.setLogLevel(QuorumJournalManager.LOG, Level.ALL); + GenericTestUtils.setLogLevel(QuorumJournalManager.LOG, Level.TRACE); } private static final NamespaceInfo FAKE_NSINFO = new NamespaceInfo( 12345, "mycluster", "my-bp", 0L); @@ -89,7 +90,8 @@ public void setup() throws Exception { NewEpochResponseProto.newBuilder().build() ).when(logger).newEpoch(Mockito.anyLong()); - futureReturns(null).when(logger).format(Mockito.any()); + futureReturns(null).when(logger).format(Mockito.any(), + anyBoolean()); } qjm.recoverUnfinalizedSegments(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java index b71d69445c7..b8d2652ef46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java @@ -73,7 +73,7 @@ public void setup() throws Exception { conf = new Configuration(); journal = new Journal(conf, TEST_LOG_DIR, JID, StartupOption.REGULAR, mockErrorReporter); - journal.format(FAKE_NSINFO); + journal.format(FAKE_NSINFO, false); } @After @@ -207,7 +207,7 @@ public void testFormatResetsCachedValues() throws Exception { // Clear the storage directory before reformatting it journal.getStorage().getJournalManager() .getStorageDirectory().clearDirectory(); - journal.format(FAKE_NSINFO_2); + journal.format(FAKE_NSINFO_2, false); assertEquals(0, journal.getLastPromisedEpoch()); assertEquals(0, journal.getLastWriterEpoch()); @@ -425,7 +425,7 @@ public void testFormatNonEmptyStorageDirectories() throws Exception { try { // Format again here and to format the non-empty directories in // journal node. - journal.format(FAKE_NSINFO); + journal.format(FAKE_NSINFO, false); fail("Did not fail to format non-empty directories in journal node."); } catch (IOException ioe) { GenericTestUtils.assertExceptionContains( @@ -434,4 +434,15 @@ public void testFormatNonEmptyStorageDirectories() throws Exception { } } + @Test + public void testFormatNonEmptyStorageDirectoriesWhenforceOptionIsTrue() + throws Exception { + try { + // Format again here and to format the non-empty directories in + // journal node. + journal.format(FAKE_NSINFO, true); + } catch (IOException ioe) { + fail("Format should be success with force option."); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java index 8d587927fba..4cc5968e6b5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java @@ -159,11 +159,11 @@ public void setup() throws Exception { HdfsServerConstants.StartupOption.REGULAR); NamespaceInfo fakeNameSpaceInfo = new NamespaceInfo( 12345, "mycluster", "my-bp"+nsId, 0L); - journal.format(fakeNameSpaceInfo); + journal.format(fakeNameSpaceInfo, false); } } else { journal = jn.getOrCreateJournal(journalId); - journal.format(FAKE_NSINFO); + journal.format(FAKE_NSINFO, false); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeMXBean.java index 1de37a4d245..7550c4e9e79 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeMXBean.java @@ -82,7 +82,7 @@ public void testJournalNodeMXBean() throws Exception { // format the journal ns1 final NamespaceInfo FAKE_NSINFO = new NamespaceInfo(12345, "mycluster", "my-bp", 0L); - jn.getOrCreateJournal(NAMESERVICE).format(FAKE_NSINFO); + jn.getOrCreateJournal(NAMESERVICE).format(FAKE_NSINFO, false); // check again after format // getJournalsStatus diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeRespectsBindHostKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeRespectsBindHostKeys.java index 79f35987543..9d5af1cfd90 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeRespectsBindHostKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeRespectsBindHostKeys.java @@ -41,8 +41,8 @@ import java.io.File; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import static org.junit.Assert.assertTrue; @@ -57,7 +57,7 @@ */ public class TestJournalNodeRespectsBindHostKeys { - public static final Log LOG = LogFactory.getLog( + public static final Logger LOG = LoggerFactory.getLogger( TestJournalNodeRespectsBindHostKeys.class); private static final String WILDCARD_ADDRESS = "0.0.0.0"; private static final String LOCALHOST_SERVER_ADDRESS = "127.0.0.1:0"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeSync.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeSync.java index 8de96417536..c23604b9988 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeSync.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeSync.java @@ -341,7 +341,7 @@ public void testSyncAfterJNformat() throws Exception{ } // Format the JN - journal1.format(nsInfo); + journal1.format(nsInfo, false); // Roll some more edits for (int i = 4; i < 10; i++) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java index 6112b6a9874..e50c397b9c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java @@ -28,8 +28,8 @@ import java.io.IOException; import java.net.URI; import java.security.PrivilegedExceptionAction; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -53,7 +53,7 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -63,7 +63,8 @@ private MiniDFSCluster cluster; private DelegationTokenSecretManager dtSecretManager; private Configuration config; - private static final Log LOG = LogFactory.getLog(TestDelegationToken.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDelegationToken.class); @Before public void setUp() throws Exception { @@ -170,7 +171,7 @@ public void testAddDelegationTokensDFSApi() throws Exception { @Test public void testDelegationTokenWebHdfsApi() throws Exception { - GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.ALL); + GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.TRACE); final String uri = WebHdfsConstants.WEBHDFS_SCHEME + "://" + config.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY); //get file system as JobTracker diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java index 9718bc6fffb..d79ec61470b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java @@ -29,8 +29,8 @@ import java.util.ArrayList; import java.util.Enumeration; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -67,7 +67,8 @@ private static UserGroupInformation ugi; private static UserGroupInformation proxyUgi; - private static final Log LOG = LogFactory.getLog(TestDoAsEffectiveUser.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDoAsEffectiveUser.class); private static void configureSuperUserIPAddresses(Configuration conf, String superUserShortName) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java index aaddb3654aa..a8f424005fc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java @@ -39,8 +39,8 @@ import java.util.GregorianCalendar; import java.util.Set; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -94,7 +94,8 @@ /** Unit tests for block tokens */ public class TestBlockToken { - public static final Log LOG = LogFactory.getLog(TestBlockToken.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestBlockToken.class); private static final String ADDRESS = "0.0.0.0"; static { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java index 35ebe781ff2..e6405267296 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java @@ -74,8 +74,8 @@ import java.util.concurrent.TimeoutException; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -126,7 +126,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Tool; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Test; import org.mockito.invocation.InvocationOnMock; @@ -136,10 +136,10 @@ * This class tests if a balancer schedules tasks correctly. */ public class TestBalancer { - private static final Log LOG = LogFactory.getLog(TestBalancer.class); + private static final Logger LOG = LoggerFactory.getLogger(TestBalancer.class); static { - GenericTestUtils.setLogLevel(Balancer.LOG, Level.ALL); + GenericTestUtils.setLogLevel(Balancer.LOG, Level.TRACE); GenericTestUtils.setLogLevel(Dispatcher.LOG, Level.DEBUG); } @@ -1022,7 +1022,7 @@ private static int runBalancer(Collection namenodes, } } finally { for(NameNodeConnector nnc : connectors) { - IOUtils.cleanup(LOG, nnc); + IOUtils.cleanupWithLogger(LOG, nnc); } } return ExitStatus.SUCCESS.getExitCode(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java index c8929d9cf0b..cafde333944 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java @@ -30,7 +30,7 @@ import java.util.Set; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; +import org.slf4j.Logger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -50,7 +50,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Assert; import org.junit.Test; @@ -58,10 +58,10 @@ * Test balancer with multiple NameNodes */ public class TestBalancerWithMultipleNameNodes { - static final Log LOG = Balancer.LOG; + static final Logger LOG = Balancer.LOG; { - GenericTestUtils.setLogLevel(LOG, Level.ALL); - DFSTestUtil.setNameNodeLogLevel(Level.ALL); + GenericTestUtils.setLogLevel(LOG, Level.TRACE); + DFSTestUtil.setNameNodeLogLevel(org.apache.log4j.Level.TRACE); } @@ -352,7 +352,7 @@ private static void sleep(long ms) { try { Thread.sleep(ms); } catch(InterruptedException e) { - LOG.error(e); + LOG.error("{}", e); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java index a6732c7c978..97687619b67 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java @@ -27,8 +27,8 @@ import java.util.Set; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.Path; @@ -54,7 +54,7 @@ * This class tests if a balancer schedules tasks correctly. */ public class TestBalancerWithNodeGroup { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( "org.apache.hadoop.hdfs.TestBalancerWithNodeGroup"); final private static long CAPACITY = 5000L; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java index 99986e6ac84..c2a5a097ac3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BaseReplicationPolicyTest.java @@ -22,7 +22,6 @@ import java.util.List; import java.util.Set; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -158,4 +157,4 @@ boolean isOnSameRack(DatanodeStorageInfo left, DatanodeDescriptor right) { chosenNodes, false, excludedNodes, BLOCK_SIZE, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY, null); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockInfo.java index 2dfa9003b53..fa0dd70a7e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockInfo.java @@ -20,8 +20,8 @@ import static org.apache.hadoop.hdfs.server.namenode.INodeId.INVALID_INODE_ID; import static org.hamcrest.core.Is.is; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo.AddBlockResult; @@ -38,8 +38,8 @@ public class TestBlockInfo { - private static final Log LOG = LogFactory - .getLog("org.apache.hadoop.hdfs.TestBlockInfo"); + private static final Logger LOG = LoggerFactory + .getLogger("org.apache.hadoop.hdfs.TestBlockInfo"); @Test public void testIsDeleted() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index 58ca2e3d598..0097da80c49 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -21,8 +21,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.Lists; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; @@ -75,8 +74,7 @@ import org.apache.hadoop.test.MetricsAsserts; import org.apache.hadoop.util.GSet; import org.apache.hadoop.util.LightWeightGSet; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; +import org.slf4j.event.Level; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -140,7 +138,8 @@ */ private static final int NUM_TEST_ITERS = 30; private static final int BLOCK_SIZE = 64*1024; - private static final Log LOG = LogFactory.getLog(TestBlockManager.class); + private static final org.slf4j.Logger LOG = + LoggerFactory.getLogger(TestBlockManager.class); private FSNamesystem fsn; private BlockManager bm; @@ -1156,7 +1155,8 @@ public void run() { // spam the block manager with IBRs to verify queuing is occurring. @Test public void testAsyncIBR() throws Exception { - Logger.getRootLogger().setLevel(Level.WARN); + GenericTestUtils.setLogLevel( + LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME), Level.WARN); // will create files with many small blocks. final int blkSize = 4*1024; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java index 866b533b267..6ad530d7c46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java @@ -237,15 +237,7 @@ public void testIncrementSafeBlockCount() { BlockInfo blockInfo = mock(BlockInfo.class); doReturn(false).when(blockInfo).isStriped(); bmSafeMode.incrementSafeBlockCount(1, blockInfo); - if (i < BLOCK_THRESHOLD) { - assertEquals(i, getblockSafe()); - assertTrue(bmSafeMode.isInSafeMode()); - } else { - // block manager leaves safe mode if block threshold is met - assertFalse(bmSafeMode.isInSafeMode()); - // the increment will be a no-op if safe mode is OFF - assertEquals(BLOCK_THRESHOLD, getblockSafe()); - } + assertSafeModeIsLeftAtThreshold(i); } } @@ -314,14 +306,35 @@ public void testIncrementAndDecrementSafeBlockCount() { bmSafeMode.decrementSafeBlockCount(blockInfo); bmSafeMode.incrementSafeBlockCount(1, blockInfo); - if (i < BLOCK_THRESHOLD) { - assertEquals(i, getblockSafe()); - assertTrue(bmSafeMode.isInSafeMode()); - } else { - // block manager leaves safe mode if block threshold is met - assertEquals(BLOCK_THRESHOLD, getblockSafe()); - assertFalse(bmSafeMode.isInSafeMode()); - } + assertSafeModeIsLeftAtThreshold(i); + } + } + + /** + * Test when the block safe increment and decrement interleave + * for striped blocks. + * + * Both the increment and decrement will be a no-op if the safe mode is OFF. + * The safe mode status lifecycle: OFF -> PENDING_THRESHOLD -> OFF + */ + @Test(timeout = 30000) + public void testIncrementAndDecrementStripedSafeBlockCount() { + bmSafeMode.activate(BLOCK_TOTAL); + Whitebox.setInternalState(bmSafeMode, "extension", 0); + + // this number is used only by the decrementSafeBlockCount method + final int liveReplicasWhenDecrementing = 1; + final short realDataBlockNum = 2; + mockBlockManagerForStripedBlockSafeDecrement(liveReplicasWhenDecrementing); + for (long i = 1; i <= BLOCK_TOTAL; i++) { + BlockInfoStriped blockInfo = mock(BlockInfoStriped.class); + when(blockInfo.getRealDataBlockNum()).thenReturn(realDataBlockNum); + + bmSafeMode.incrementSafeBlockCount(realDataBlockNum, blockInfo); + bmSafeMode.decrementSafeBlockCount(blockInfo); + bmSafeMode.incrementSafeBlockCount(realDataBlockNum, blockInfo); + + assertSafeModeIsLeftAtThreshold(i); } } @@ -508,10 +521,27 @@ public void testGetSafeModeTipForBlocksWithFutureGS() throws Exception { */ private void mockBlockManagerForBlockSafeDecrement() { BlockInfo storedBlock = mock(BlockInfo.class); + mockBlockManagerForBlockSafeDecrement(storedBlock, 0); + } + + /** + * Mock block manager internal state for decrement safe block + * in case of striped block. + */ + private void mockBlockManagerForStripedBlockSafeDecrement(int liveReplicas) { + BlockInfo storedBlock = mock(BlockInfoStriped.class); + mockBlockManagerForBlockSafeDecrement(storedBlock, liveReplicas); + } + + /** + * Mock block manager internal state for decrement safe block. + */ + private void mockBlockManagerForBlockSafeDecrement(BlockInfo storedBlock, + int liveReplicas) { when(storedBlock.isComplete()).thenReturn(true); doReturn(storedBlock).when(bm).getStoredBlock(any(Block.class)); NumberReplicas numberReplicas = mock(NumberReplicas.class); - when(numberReplicas.liveReplicas()).thenReturn(0); + when(numberReplicas.liveReplicas()).thenReturn(liveReplicas); doReturn(numberReplicas).when(bm).countNodes(any(BlockInfo.class)); } @@ -552,4 +582,19 @@ private void setBlockSafe(long blockSafe) { private long getblockSafe() { return (long)Whitebox.getInternalState(bmSafeMode, "blockSafe"); } + + private void assertSafeModeIsLeftAtThreshold(long blockIndex) { + if (blockIndex < BLOCK_THRESHOLD) { + assertEquals("Current block index should be equal to " + + "the safe block counter.", blockIndex, getblockSafe()); + assertTrue("Block Manager should stay in safe mode until " + + "the safe block threshold is reached.", bmSafeMode.isInSafeMode()); + } else { + assertEquals("If safe block threshold is reached, safe block " + + "counter should not increase further.", + BLOCK_THRESHOLD, getblockSafe()); + assertFalse("Block manager leaves safe mode if block " + + "threshold is met.", bmSafeMode.isInSafeMode()); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportRateLimiting.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportRateLimiting.java index 3cc1b026ade..20a5fca7b74 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportRateLimiting.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportRateLimiting.java @@ -24,8 +24,8 @@ import com.google.common.base.Joiner; import com.google.common.base.Supplier; import com.google.common.util.concurrent.Uninterruptibles; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -44,7 +44,8 @@ import java.util.concurrent.atomic.AtomicReference; public class TestBlockReportRateLimiting { - static final Log LOG = LogFactory.getLog(TestBlockReportRateLimiting.class); + static final Logger LOG = + LoggerFactory.getLogger(TestBlockReportRateLimiting.class); private static void setFailure(AtomicReference failure, String what) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlocksWithNotEnoughRacks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlocksWithNotEnoughRacks.java index 2bf6045b6f4..5e59443f1ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlocksWithNotEnoughRacks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlocksWithNotEnoughRacks.java @@ -23,8 +23,8 @@ import java.util.ArrayList; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileSystem; @@ -44,7 +44,8 @@ import org.slf4j.event.Level; public class TestBlocksWithNotEnoughRacks { - public static final Log LOG = LogFactory.getLog(TestBlocksWithNotEnoughRacks.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestBlocksWithNotEnoughRacks.class); static { GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.TRACE); GenericTestUtils.setLogLevel(LOG, Level.TRACE); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCachedBlocksList.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCachedBlocksList.java index 0643346fee0..eda60bd9e98 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCachedBlocksList.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCachedBlocksList.java @@ -21,8 +21,8 @@ import java.util.Iterator; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList; @@ -31,7 +31,8 @@ import org.junit.Test; public class TestCachedBlocksList { - public static final Log LOG = LogFactory.getLog(TestCachedBlocksList.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestCachedBlocksList.class); @Test(timeout=60000) public void testSingleList() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java index 299df56b2b0..9716d4061c3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java @@ -29,8 +29,8 @@ import java.util.HashMap; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.StripedFileTestUtil; import org.apache.hadoop.hdfs.protocol.Block; @@ -48,7 +48,7 @@ */ public class TestCorruptReplicaInfo { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestCorruptReplicaInfo.class); private final Map replicaMap = new HashMap<>(); private final Map stripedBlocksMap = new HashMap<>(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java index dd6f40adeab..aa7f4d2f7ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java @@ -34,8 +34,8 @@ import java.util.Map.Entry; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileUtil; @@ -75,7 +75,8 @@ public class TestDatanodeManager { - public static final Log LOG = LogFactory.getLog(TestDatanodeManager.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestDatanodeManager.class); //The number of times the registration / removal of nodes should happen final int NUM_ITERATIONS = 500; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java index 97a5a6e2227..cf40c39993a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java @@ -63,7 +63,8 @@ private BlockInfo genStripedBlockInfo(long id, long numBytes) { private void verifyBlockStats(LowRedundancyBlocks queues, int lowRedundancyReplicaCount, int corruptReplicaCount, int corruptReplicationOneCount, int lowRedundancyStripedCount, - int corruptStripedCount) { + int corruptStripedCount, int highestPriorityReplicatedBlockCount, + int highestPriorityECBlockCount) { assertEquals("Low redundancy replica count incorrect!", lowRedundancyReplicaCount, queues.getLowRedundancyBlocks()); assertEquals("Corrupt replica count incorrect!", @@ -81,6 +82,14 @@ private void verifyBlockStats(LowRedundancyBlocks queues, assertEquals("LowRedundancyBlocks queue size incorrect!", (lowRedundancyReplicaCount + corruptReplicaCount + lowRedundancyStripedCount + corruptStripedCount), queues.size()); + assertEquals("Highest priority replicated low redundancy " + + "blocks count is incorrect!", + highestPriorityReplicatedBlockCount, + queues.getHighestPriorityReplicatedBlockCount()); + assertEquals("Highest priority erasure coded low redundancy " + + "blocks count is incorrect!", + highestPriorityECBlockCount, + queues.getHighestPriorityECBlockCount()); } /** @@ -100,42 +109,46 @@ public void testBlockPriorities() throws Throwable { // Add a block with a single entry assertAdded(queues, block1, 1, 0, 3); assertInLevel(queues, block1, LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY); - verifyBlockStats(queues, 1, 0, 0, 0, 0); + verifyBlockStats(queues, 1, 0, 0, 0, 0, 1, 0); // Repeated additions fail assertFalse(queues.add(block1, 1, 0, 0, 3)); - verifyBlockStats(queues, 1, 0, 0, 0, 0); + verifyBlockStats(queues, 1, 0, 0, 0, 0, 1, 0); // Add a second block with two replicas assertAdded(queues, block2, 2, 0, 3); assertInLevel(queues, block2, LowRedundancyBlocks.QUEUE_LOW_REDUNDANCY); - verifyBlockStats(queues, 2, 0, 0, 0, 0); + verifyBlockStats(queues, 2, 0, 0, 0, 0, 1, 0); // Now try to add a block that is corrupt assertAdded(queues, block_corrupt, 0, 0, 3); assertInLevel(queues, block_corrupt, LowRedundancyBlocks.QUEUE_WITH_CORRUPT_BLOCKS); - verifyBlockStats(queues, 2, 1, 0, 0, 0); + verifyBlockStats(queues, 2, 1, 0, 0, 0, 1, 0); // Insert a very insufficiently redundancy block assertAdded(queues, block_very_low_redundancy, 4, 0, 25); assertInLevel(queues, block_very_low_redundancy, LowRedundancyBlocks.QUEUE_VERY_LOW_REDUNDANCY); - verifyBlockStats(queues, 3, 1, 0, 0, 0); + verifyBlockStats(queues, 3, 1, 0, 0, 0, 1, 0); // Insert a corrupt block with replication factor 1 assertAdded(queues, block_corrupt_repl_one, 0, 0, 1); - verifyBlockStats(queues, 3, 2, 1, 0, 0); + verifyBlockStats(queues, 3, 2, 1, 0, 0, 1, 0); // Bump up the expected count for corrupt replica one block from 1 to 3 queues.update(block_corrupt_repl_one, 0, 0, 0, 3, 0, 2); - verifyBlockStats(queues, 3, 2, 0, 0, 0); + verifyBlockStats(queues, 3, 2, 0, 0, 0, 1, 0); // Reduce the expected replicas to 1 queues.update(block_corrupt, 0, 0, 0, 1, 0, -2); - verifyBlockStats(queues, 3, 2, 1, 0, 0); + verifyBlockStats(queues, 3, 2, 1, 0, 0, 1, 0); queues.update(block_very_low_redundancy, 0, 0, 0, 1, -4, -24); - verifyBlockStats(queues, 2, 3, 2, 0, 0); + verifyBlockStats(queues, 2, 3, 2, 0, 0, 1, 0); + + // Reduce the expected replicas to 1 for block1 + queues.update(block1, 1, 0, 0, 1, 0, 0); + verifyBlockStats(queues, 2, 3, 2, 0, 0, 0, 0); } @Test @@ -145,12 +158,12 @@ public void testRemoveWithWrongPriority() { assertAdded(queues, corruptBlock, 0, 0, 3); assertInLevel(queues, corruptBlock, LowRedundancyBlocks.QUEUE_WITH_CORRUPT_BLOCKS); - verifyBlockStats(queues, 0, 1, 0, 0, 0); + verifyBlockStats(queues, 0, 1, 0, 0, 0, 0, 0); // Remove with wrong priority queues.remove(corruptBlock, LowRedundancyBlocks.QUEUE_LOW_REDUNDANCY); // Verify the number of corrupt block is decremented - verifyBlockStats(queues, 0, 0, 0, 0, 0); + verifyBlockStats(queues, 0, 0, 0, 0, 0, 0, 0); } @Test @@ -186,17 +199,17 @@ private void doTestStripedBlockPriorities(int dataBlkNum, int parityBlkNum) assertInLevel(queues, block, LowRedundancyBlocks.QUEUE_LOW_REDUNDANCY); } - verifyBlockStats(queues, 0, 0, 0, numUR, 0); + verifyBlockStats(queues, 0, 0, 0, numUR, 0, 0, 1); } // add a corrupted block BlockInfo block_corrupt = genStripedBlockInfo(-10, numBytes); assertEquals(numCorrupt, queues.getCorruptBlockSize()); - verifyBlockStats(queues, 0, 0, 0, numUR, numCorrupt); + verifyBlockStats(queues, 0, 0, 0, numUR, numCorrupt, 0, 1); assertAdded(queues, block_corrupt, dataBlkNum - 1, 0, groupSize); numCorrupt++; - verifyBlockStats(queues, 0, 0, 0, numUR, numCorrupt); + verifyBlockStats(queues, 0, 0, 0, numUR, numCorrupt, 0, 1); assertInLevel(queues, block_corrupt, LowRedundancyBlocks.QUEUE_WITH_CORRUPT_BLOCKS); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNameNodePrunesMissingStorages.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNameNodePrunesMissingStorages.java index 05b6d3023b3..f64767afe26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNameNodePrunesMissingStorages.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNameNodePrunesMissingStorages.java @@ -22,8 +22,8 @@ import java.util.ArrayList; import java.util.Collection; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -69,7 +69,8 @@ public class TestNameNodePrunesMissingStorages { - static final Log LOG = LogFactory.getLog(TestNameNodePrunesMissingStorages.class); + static final Logger LOG = + LoggerFactory.getLogger(TestNameNodePrunesMissingStorages.class); private static void runTest(final String testCaseName, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java index cb2ee9c8506..40f54cba345 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java @@ -24,8 +24,8 @@ import java.io.IOException; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -52,7 +52,8 @@ * and then the under replicated block gets replicated to the datanode. */ public class TestRBWBlockInvalidation { - private static final Log LOG = LogFactory.getLog(TestRBWBlockInvalidation.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestRBWBlockInvalidation.class); private static NumberReplicas countReplicas(final FSNamesystem namesystem, ExtendedBlock block) { @@ -236,7 +237,7 @@ public void testRWRInvalidation() throws Exception { assertEquals("old gs data\n" + "new gs data\n", ret); } } finally { - IOUtils.cleanup(LOG, streams.toArray(new Closeable[0])); + IOUtils.cleanupWithLogger(LOG, streams.toArray(new Closeable[0])); } } finally { cluster.shutdown(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java index 27dcbf1856b..f08fa131bf7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java @@ -65,6 +65,7 @@ import org.apache.hadoop.hdfs.server.namenode.TestINodeFile; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.net.Node; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.spi.LoggingEvent; @@ -1559,4 +1560,31 @@ public void testAvoidLocalWriteNoEnoughNodes() throws IOException { } assertTrue(found); } + + @Test + public void testMaxLoad() { + FSClusterStats statistics = mock(FSClusterStats.class); + DatanodeDescriptor node = mock(DatanodeDescriptor.class); + + when(statistics.getInServiceXceiverAverage()).thenReturn(0.0); + when(node.getXceiverCount()).thenReturn(1); + + final Configuration conf = new Configuration(); + final Class replicatorClass = conf + .getClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, + DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_DEFAULT, + BlockPlacementPolicy.class); + BlockPlacementPolicy bpp = ReflectionUtils. + newInstance(replicatorClass, conf); + assertTrue(bpp instanceof BlockPlacementPolicyDefault); + + BlockPlacementPolicyDefault bppd = (BlockPlacementPolicyDefault) bpp; + bppd.initialize(conf, statistics, null, null); + assertFalse(bppd.excludeNodeByLoad(node)); + + when(statistics.getInServiceXceiverAverage()).thenReturn(1.0); + when(node.getXceiverCount()).thenReturn(10); + assertTrue(bppd.excludeNodeByLoad(node)); + + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockGroupId.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockGroupId.java index 8a0f75ed7f3..25b2a028835 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockGroupId.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockGroupId.java @@ -28,8 +28,8 @@ import java.io.IOException; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -54,8 +54,8 @@ * collision handling. */ public class TestSequentialBlockGroupId { - private static final Log LOG = LogFactory - .getLog("TestSequentialBlockGroupId"); + private static final Logger LOG = LoggerFactory + .getLogger("TestSequentialBlockGroupId"); private final ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockId.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockId.java index bfda3931c98..89fe8a40abb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockId.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockId.java @@ -20,8 +20,8 @@ import java.io.IOException; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -43,7 +43,8 @@ * collision handling. */ public class TestSequentialBlockId { - private static final Log LOG = LogFactory.getLog("TestSequentialBlockId"); + private static final Logger LOG = + LoggerFactory.getLogger("TestSequentialBlockId"); final int BLOCK_SIZE = 1024; final int IO_SIZE = BLOCK_SIZE; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestGetUriFromString.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestGetUriFromString.java index e4f9697f465..e033f18f620 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestGetUriFromString.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestGetUriFromString.java @@ -23,8 +23,8 @@ import java.io.IOException; import java.net.URI; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.junit.Test; /** @@ -32,7 +32,8 @@ * for Windows and Unix style file paths. */ public class TestGetUriFromString { - private static final Log LOG = LogFactory.getLog(TestGetUriFromString.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestGetUriFromString.class); private static final String RELATIVE_FILE_PATH = "relativeFilePath"; private static final String ABSOLUTE_PATH_UNIX = "/tmp/file1"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/TestInMemoryLevelDBAliasMapClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/TestInMemoryLevelDBAliasMapClient.java index f0626335bb0..fccb6f2ab38 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/TestInMemoryLevelDBAliasMapClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/blockaliasmap/impl/TestInMemoryLevelDBAliasMapClient.java @@ -28,14 +28,19 @@ import org.apache.hadoop.hdfs.server.aliasmap.InMemoryLevelDBAliasMapServer; import org.apache.hadoop.hdfs.server.common.blockaliasmap.BlockAliasMap; import org.apache.hadoop.hdfs.server.common.FileRegion; +import org.apache.hadoop.test.LambdaTestUtils; import org.junit.After; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; @@ -59,6 +64,9 @@ private Configuration conf; private final static String BPID = "BPID-0"; + @Rule + public final ExpectedException exception = ExpectedException.none(); + @Before public void setUp() throws IOException { conf = new Configuration(); @@ -348,4 +356,35 @@ public void testServerBindHost() throws Exception { conf.set(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY, "0.0.0.0"); writeRead(); } + + @Test + public void testNonExistentFile() throws Exception { + // delete alias map location + FileUtils.deleteDirectory(tempDir); + // expect a RuntimeException when the aliasmap is started. + exception.expect(RuntimeException.class); + levelDBAliasMapServer.setConf(conf); + } + + @Test + public void testNonExistentBlock() throws Exception { + inMemoryLevelDBAliasMapClient.setConf(conf); + levelDBAliasMapServer.setConf(conf); + levelDBAliasMapServer.start(); + Block block1 = new Block(100, 43, 44); + ProvidedStorageLocation providedStorageLocation1 = null; + BlockAliasMap.Writer writer1 = + inMemoryLevelDBAliasMapClient.getWriter(null, BPID); + try { + writer1.store(new FileRegion(block1, providedStorageLocation1)); + fail("Should fail on writing a region with null ProvidedLocation"); + } catch (IOException | IllegalArgumentException e) { + assertTrue(e.getMessage().contains("not be null")); + } + + BlockAliasMap.Reader reader = + inMemoryLevelDBAliasMapClient.getReader(null, BPID); + LambdaTestUtils.assertOptionalUnset("Expected empty BlockAlias", + reader.resolve(block1)); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/BlockReportTestBase.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/BlockReportTestBase.java index c9ff57221f1..535b629a676 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/BlockReportTestBase.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/BlockReportTestBase.java @@ -38,8 +38,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; @@ -87,7 +87,7 @@ * and messages. */ public abstract class BlockReportTestBase { - public static final Log LOG = LogFactory.getLog(BlockReportTestBase.class); + public static final Logger LOG = LoggerFactory.getLogger(BlockReportTestBase.class); private static short REPL_FACTOR = 1; private static final int RAND_LIMIT = 2000; @@ -879,7 +879,7 @@ public boolean accept(File file, String s) { private static void initLoggers() { DFSTestUtil.setNameNodeLogLevel(Level.ALL); GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); - GenericTestUtils.setLogLevel(BlockReportTestBase.LOG, Level.ALL); + GenericTestUtils.setLogLevel(BlockReportTestBase.LOG, org.slf4j.event.Level.DEBUG); } private Block findBlock(Path path, long size) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java index 19d9dfcd47d..04b99e65f19 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java @@ -22,8 +22,8 @@ import java.io.File; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.ReconfigurationException; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -53,8 +53,8 @@ * dependencies to {@link MiniDFSCluster}. */ public class DataNodeTestUtils { - private static final Log LOG = - LogFactory.getLog(DataNodeTestUtils.class); + private static final Logger LOG = + LoggerFactory.getLogger(DataNodeTestUtils.class); private static final String DIR_FAILURE_SUFFIX = ".origin"; public final static String TEST_CLUSTER_ID = "testClusterID"; @@ -98,6 +98,9 @@ public static void triggerBlockReport(DataNode dn) throws IOException { } } + public static void pauseIBR(DataNode dn) { + dn.setIBRDisabledForTest(true); + } public static InterDatanodeProtocol createInterDatanodeProtocolProxy( DataNode dn, DatanodeID datanodeid, final Configuration conf, boolean connectToDnViaHostname) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimpleBlocksMovementsStatusHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimpleBlocksMovementsStatusHandler.java new file mode 100644 index 00000000000..b361ce55c13 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimpleBlocksMovementsStatusHandler.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.datanode; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.server.common.sps.BlockMovementAttemptFinished; +import org.apache.hadoop.hdfs.server.common.sps.BlocksMovementsStatusHandler; + +/** + * Blocks movements status handler, which is used to collect details of the + * completed block movements and later these attempted finished(with success or + * failure) blocks can be accessed to notify respective listeners, if any. + */ +public class SimpleBlocksMovementsStatusHandler + implements BlocksMovementsStatusHandler { + private final List blockIdVsMovementStatus = new ArrayList<>(); + + /** + * Collect all the storage movement attempt finished blocks. Later this will + * be send to namenode via heart beat. + * + * @param moveAttemptFinishedBlk + * storage movement attempt finished block + */ + public void handle(BlockMovementAttemptFinished moveAttemptFinishedBlk) { + // Adding to the tracking report list. Later this can be accessed to know + // the attempted block movements. + synchronized (blockIdVsMovementStatus) { + blockIdVsMovementStatus.add(moveAttemptFinishedBlk.getBlock()); + } + } + + /** + * @return unmodifiable list of storage movement attempt finished blocks. + */ + public List getMoveAttemptFinishedBlocks() { + List moveAttemptFinishedBlks = new ArrayList<>(); + // 1. Adding all the completed block ids. + synchronized (blockIdVsMovementStatus) { + if (blockIdVsMovementStatus.size() > 0) { + moveAttemptFinishedBlks = Collections + .unmodifiableList(blockIdVsMovementStatus); + } + } + return moveAttemptFinishedBlks; + } + + /** + * Remove the storage movement attempt finished blocks from the tracking list. + * + * @param moveAttemptFinishedBlks + * set of storage movement attempt finished blocks + */ + public void remove(List moveAttemptFinishedBlks) { + if (moveAttemptFinishedBlks != null) { + blockIdVsMovementStatus.removeAll(moveAttemptFinishedBlks); + } + } + + /** + * Clear the blockID vs movement status tracking map. + */ + public void removeAll() { + synchronized (blockIdVsMovementStatus) { + blockIdVsMovementStatus.clear(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java index 4863ca18f92..26a9f378c9a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java @@ -36,8 +36,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.DFSTestUtil; @@ -85,7 +85,7 @@ private static final String FAKE_BPID = "fake bpid"; private static final String FAKE_CLUSTERID = "fake cluster"; - protected static final Log LOG = LogFactory.getLog( + protected static final Logger LOG = LoggerFactory.getLogger( TestBPOfferService.class); private static final ExtendedBlock FAKE_BLOCK = new ExtendedBlock(FAKE_BPID, 12345L); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBatchIbr.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBatchIbr.java index 38c8a386831..f44075014e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBatchIbr.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBatchIbr.java @@ -30,8 +30,8 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -44,7 +44,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.MetricsAsserts; import org.apache.hadoop.util.Time; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Assert; import org.junit.Test; @@ -53,7 +53,7 @@ * and the namenode allows closing a file with COMMITTED blocks. */ public class TestBatchIbr { - public static final Log LOG = LogFactory.getLog(TestBatchIbr.class); + public static final Logger LOG = LoggerFactory.getLogger(TestBatchIbr.class); private static final short NUM_DATANODES = 4; private static final int BLOCK_SIZE = 1024; @@ -66,7 +66,8 @@ static { GenericTestUtils.setLogLevel( - LogFactory.getLog(IncrementalBlockReportManager.class), Level.ALL); + LoggerFactory.getLogger(IncrementalBlockReportManager.class), + Level.TRACE); } static HdfsConfiguration newConf(long ibrInterval) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockHasMultipleReplicasOnSameDN.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockHasMultipleReplicasOnSameDN.java index 61321e420e7..fb65d0e2227 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockHasMultipleReplicasOnSameDN.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockHasMultipleReplicasOnSameDN.java @@ -22,8 +22,8 @@ import java.util.Collections; import java.util.Comparator; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSClient; @@ -56,7 +56,8 @@ * the same DataNode. Excess replicas on the same DN should be ignored by the NN. */ public class TestBlockHasMultipleReplicasOnSameDN { - public static final Log LOG = LogFactory.getLog(TestBlockHasMultipleReplicasOnSameDN.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestBlockHasMultipleReplicasOnSameDN.class); private static final short NUM_DATANODES = 2; private static final int BLOCK_SIZE = 1024; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java index 63ae36d936d..e061e18242f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java @@ -25,8 +25,8 @@ import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; @@ -40,7 +40,8 @@ public class TestBlockPoolManager { - private final Log LOG = LogFactory.getLog(TestBlockPoolManager.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestBlockPoolManager.class); private final DataNode mockDN = Mockito.mock(DataNode.class); private BlockPoolManager bpm; private final StringBuilder log = new StringBuilder(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java index 07fd4ae58d7..80f9fb6b002 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java @@ -61,8 +61,8 @@ import java.util.concurrent.atomic.AtomicReference; import com.google.common.collect.Iterators; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; @@ -107,7 +107,7 @@ import org.apache.hadoop.test.GenericTestUtils.SleepAnswer; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.Time; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -124,7 +124,8 @@ * This tests if sync all replicas in block recovery works correctly. */ public class TestBlockRecovery { - private static final Log LOG = LogFactory.getLog(TestBlockRecovery.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestBlockRecovery.class); private static final String DATA_DIR = MiniDFSCluster.getBaseDirectory() + "data"; private DataNode dn; @@ -173,8 +174,8 @@ 11 * cellSize}, {36 * cellSize}}, }; static { - GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); - GenericTestUtils.setLogLevel(LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.TRACE); + GenericTestUtils.setLogLevel(LOG, Level.TRACE); } private final long diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReplacement.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReplacement.java index f75b23dd3ff..67b41f80612 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReplacement.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReplacement.java @@ -31,8 +31,8 @@ import java.util.Random; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -63,7 +63,7 @@ * This class tests if block replacement request to data nodes work correctly. */ public class TestBlockReplacement { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( "org.apache.hadoop.hdfs.TestBlockReplacement"); MiniDFSCluster cluster; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java index 753c3a8d6fb..438be89be92 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.server.datanode; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.server.datanode.BPServiceActor.Scheduler; import org.junit.Rule; import org.junit.Test; @@ -43,7 +43,8 @@ * using a few different values . */ public class TestBpServiceActorScheduler { - protected static final Log LOG = LogFactory.getLog(TestBpServiceActorScheduler.class); + protected static final Logger LOG = + LoggerFactory.getLogger(TestBpServiceActorScheduler.class); @Rule public Timeout timeout = new Timeout(300000); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java index 7194385090e..ce65b6b6749 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.datanode; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -51,8 +51,8 @@ * This file tests the erasure coding metrics in DataNode. */ public class TestDataNodeErasureCodingMetrics { - public static final Log LOG = LogFactory. - getLog(TestDataNodeErasureCodingMetrics.class); + public static final Logger LOG = LoggerFactory. + getLogger(TestDataNodeErasureCodingMetrics.class); private final ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy(); private final int dataBlocks = ecPolicy.getNumDataUnits(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeFaultInjector.java index 4afacd9a7d8..6879dc0eab5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeFaultInjector.java @@ -21,8 +21,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -39,8 +39,8 @@ * This class tests various cases where faults are injected to DataNode. */ public class TestDataNodeFaultInjector { - private static final Log LOG = LogFactory - .getLog(TestDataNodeFaultInjector.class); + private static final Logger LOG = LoggerFactory + .getLogger(TestDataNodeFaultInjector.class); private static class MetricsDataNodeFaultInjector extends DataNodeFaultInjector { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java index 6530720b361..c19c8493194 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java @@ -74,8 +74,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -98,7 +98,7 @@ import static org.mockito.Mockito.timeout; public class TestDataNodeHotSwapVolumes { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestDataNodeHotSwapVolumes.class); private static final int BLOCK_SIZE = 512; private static final int DEFAULT_STORAGES_PER_DATANODE = 2; @@ -757,7 +757,7 @@ private static void assertFileLocksReleased(Collection dirs) try { FsDatasetTestUtil.assertFileLockReleased(dir); } catch (IOException e) { - LOG.warn(e); + LOG.warn("{}", e); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeInitStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeInitStorage.java index 07a26cc60e4..df8be222069 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeInitStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeInitStorage.java @@ -20,8 +20,8 @@ import java.io.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -38,7 +38,8 @@ * FsDataSet initialization. */ public class TestDataNodeInitStorage { - public static final Log LOG = LogFactory.getLog(TestDataNodeInitStorage.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestDataNodeInitStorage.class); static private class SimulatedFsDatasetVerifier extends SimulatedFSDataset { static class Factory extends FsDatasetSpi.Factory { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java index 9107aaef600..c0d3a4944bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java @@ -29,8 +29,8 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Supplier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; @@ -38,6 +38,8 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.SaslDataTransferTestCase; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; import org.junit.Assert; import org.junit.Test; @@ -49,9 +51,10 @@ /** * Class for testing {@link DataNodeMXBean} implementation */ -public class TestDataNodeMXBean { +public class TestDataNodeMXBean extends SaslDataTransferTestCase { - public static final Log LOG = LogFactory.getLog(TestDataNodeMXBean.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestDataNodeMXBean.class); @Test public void testDataNodeMXBean() throws Exception { @@ -117,6 +120,49 @@ public void testDataNodeMXBean() throws Exception { } } } + + @Test + public void testDataNodeMXBeanSecurityEnabled() throws Exception { + Configuration simpleConf = new Configuration(); + Configuration secureConf = createSecureConfig("authentication"); + + // get attribute "SecurityEnabled" with simple configuration + try (MiniDFSCluster cluster = + new MiniDFSCluster.Builder(simpleConf).build()) { + List datanodes = cluster.getDataNodes(); + Assert.assertEquals(datanodes.size(), 1); + DataNode datanode = datanodes.get(0); + + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName mxbeanName = new ObjectName( + "Hadoop:service=DataNode,name=DataNodeInfo"); + + boolean securityEnabled = (boolean) mbs.getAttribute(mxbeanName, + "SecurityEnabled"); + Assert.assertFalse(securityEnabled); + Assert.assertEquals(datanode.isSecurityEnabled(), securityEnabled); + } + + // get attribute "SecurityEnabled" with secure configuration + try (MiniDFSCluster cluster = + new MiniDFSCluster.Builder(secureConf).build()) { + List datanodes = cluster.getDataNodes(); + Assert.assertEquals(datanodes.size(), 1); + DataNode datanode = datanodes.get(0); + + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName mxbeanName = new ObjectName( + "Hadoop:service=DataNode,name=DataNodeInfo"); + + boolean securityEnabled = (boolean) mbs.getAttribute(mxbeanName, + "SecurityEnabled"); + Assert.assertTrue(securityEnabled); + Assert.assertEquals(datanode.isSecurityEnabled(), securityEnabled); + } + + // setting back the authentication method + UserGroupInformation.setConfiguration(simpleConf); + } private static String replaceDigits(final String s) { return s.replaceAll("[0-9]+", "_DIGITS_"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java index 7b3dea70e5c..98ccd8eea6d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java @@ -35,8 +35,8 @@ import com.google.common.collect.Lists; import net.jcip.annotations.NotThreadSafe; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -65,7 +65,8 @@ @NotThreadSafe public class TestDataNodeMetrics { - private static final Log LOG = LogFactory.getLog(TestDataNodeMetrics.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDataNodeMetrics.class); @Test public void testDataNodeMetrics() throws Exception { @@ -253,7 +254,7 @@ public void testTimeoutMetric() throws Exception { assertTrue("expected to see networkErrors", allDnc.indexOf("networkErrors") >= 0); } finally { - IOUtils.cleanup(LOG, streams.toArray(new Closeable[0])); + IOUtils.cleanupWithLogger(LOG, streams.toArray(new Closeable[0])); if (cluster != null) { cluster.shutdown(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetricsLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetricsLogger.java index 32fda370161..4c443afc19e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetricsLogger.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetricsLogger.java @@ -33,8 +33,9 @@ import java.util.regex.Pattern; import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileUtil; @@ -59,7 +60,8 @@ * Test periodic logging of DataNode metrics. */ public class TestDataNodeMetricsLogger { - static final Log LOG = LogFactory.getLog(TestDataNodeMetricsLogger.class); + static final Logger LOG = + LoggerFactory.getLogger(TestDataNodeMetricsLogger.class); private static final String DATA_DIR = MiniDFSCluster.getBaseDirectory() + "data"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java index 8e1e23673af..4a49477c85f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java @@ -29,8 +29,8 @@ import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -47,8 +47,8 @@ import org.junit.Test; public class TestDataNodeMultipleRegistrations { - private static final Log LOG = - LogFactory.getLog(TestDataNodeMultipleRegistrations.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDataNodeMultipleRegistrations.class); Configuration conf; @Before diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java index 1dfd3c31db6..4e6f5699aca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java @@ -28,8 +28,8 @@ import java.io.IOException; import java.net.InetSocketAddress; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.ReconfigurationException; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -48,7 +48,8 @@ */ public class TestDataNodeReconfiguration { - private static final Log LOG = LogFactory.getLog(TestBlockRecovery.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestBlockRecovery.class); private static final String DATA_DIR = MiniDFSCluster.getBaseDirectory() + "data"; private final static InetSocketAddress NN_ADDR = new InetSocketAddress( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java index f08606ec5e2..04f6ce1f59e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java @@ -27,8 +27,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSClient; @@ -56,7 +56,8 @@ * finalize and rollback. */ public class TestDataNodeRollingUpgrade { - private static final Log LOG = LogFactory.getLog(TestDataNodeRollingUpgrade.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDataNodeRollingUpgrade.class); private static final short REPL_FACTOR = 1; private static final int BLOCK_SIZE = 1024 * 1024; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeTcpNoDelay.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeTcpNoDelay.java index 4d9f11981d2..ead7baa72c7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeTcpNoDelay.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeTcpNoDelay.java @@ -21,8 +21,8 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.Path; @@ -55,8 +55,8 @@ * Checks that used sockets have TCP_NODELAY set when configured. */ public class TestDataNodeTcpNoDelay { - private static final Log LOG = - LogFactory.getLog(TestDataNodeTcpNoDelay.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDataNodeTcpNoDelay.class); private static Configuration baseConf; @BeforeClass diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java index f7c716d871d..326c54c8f36 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java @@ -37,8 +37,8 @@ import javax.management.MBeanServer; import javax.management.ObjectName; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.ReconfigurationException; import org.apache.hadoop.fs.FileSystem; @@ -55,7 +55,7 @@ import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -68,11 +68,11 @@ */ public class TestDataNodeVolumeFailureReporting { - private static final Log LOG = - LogFactory.getLog(TestDataNodeVolumeFailureReporting.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDataNodeVolumeFailureReporting.class); { GenericTestUtils.setLogLevel(TestDataNodeVolumeFailureReporting.LOG, - Level.ALL); + Level.TRACE); } private FileSystem fs; @@ -103,7 +103,7 @@ public void setUp() throws Exception { @After public void tearDown() throws Exception { - IOUtils.cleanup(LOG, fs); + IOUtils.cleanupWithLogger(LOG, fs); if (cluster != null) { cluster.shutdown(); cluster = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java index f83609a08db..825887c1af5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java @@ -201,7 +201,11 @@ private void restartDatanodes(int volTolerated, boolean manageDfsDirs) @Test public void testVolumeAndTolerableConfiguration() throws Exception { // Check if Block Pool Service exit for an invalid conf value. - testVolumeConfig(-1, 0, false, true); + testVolumeConfig(-2, 0, false, true); + // Test for one good volume at least + testVolumeConfig(-1, 0, true, true); + testVolumeConfig(-1, 1, true, true); + testVolumeConfig(-1, 2, false, true); // Ditto if the value is too big. testVolumeConfig(100, 0, false, true); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeMetrics.java index aa9a70728d5..d2c9c62eb02 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeMetrics.java @@ -26,8 +26,8 @@ import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -50,8 +50,8 @@ * Test class for DataNodeVolumeMetrics. */ public class TestDataNodeVolumeMetrics { - private static final Log LOG = - LogFactory.getLog(TestDataNodeVolumeMetrics.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDataNodeVolumeMetrics.class); private static final int BLOCK_SIZE = 1024; private static final short REPL = 1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeProtocolRetryPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeProtocolRetryPolicy.java index bb1d9eff0ee..634968b5051 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeProtocolRetryPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeProtocolRetryPolicy.java @@ -31,8 +31,8 @@ import java.util.concurrent.ThreadLocalRandom; import com.google.common.base.Supplier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; @@ -55,7 +55,7 @@ import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -68,7 +68,7 @@ * This tests DatanodeProtocol retry policy */ public class TestDatanodeProtocolRetryPolicy { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestDatanodeProtocolRetryPolicy.class); private static final String DATA_DIR = MiniDFSCluster.getBaseDirectory() + "data"; @@ -84,7 +84,7 @@ DFSTestUtil.getLocalDatanodeRegistration(); static { - GenericTestUtils.setLogLevel(LOG, Level.ALL); + GenericTestUtils.setLogLevel(LOG, Level.TRACE); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java index f2a5d089bca..38eb0545c28 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java @@ -27,8 +27,8 @@ import java.io.IOException; import java.net.InetSocketAddress; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.common.IncorrectVersionException; @@ -39,7 +39,8 @@ import org.junit.Test; public class TestDatanodeRegister { - public static final Log LOG = LogFactory.getLog(TestDatanodeRegister.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestDatanodeRegister.class); // Invalid address private static final InetSocketAddress INVALID_ADDR = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java index f79252318ff..312bc86bd21 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java @@ -40,11 +40,12 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.DF; import org.apache.hadoop.fs.FileSystem; @@ -82,7 +83,8 @@ * between blocks on the disk and block in memory. */ public class TestDirectoryScanner { - private static final Log LOG = LogFactory.getLog(TestDirectoryScanner.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDirectoryScanner.class); private static final Configuration CONF = new HdfsConfiguration(); private static final int DEFAULT_GEN_STAMP = 9999; @@ -137,7 +139,7 @@ private long truncateBlockFile() throws IOException { LOG.info("Truncated block file " + f.getAbsolutePath()); return b.getBlockId(); } finally { - IOUtils.cleanup(LOG, channel, s); + IOUtils.cleanupWithLogger(LOG, channel, s); } } } @@ -312,18 +314,29 @@ private long createBlockMetaFile() throws IOException { return id; } - private void scan(long totalBlocks, int diffsize, long missingMetaFile, long missingBlockFile, - long missingMemoryBlocks, long mismatchBlocks) throws IOException { + private void scan(long totalBlocks, int diffsize, long missingMetaFile, + long missingBlockFile, long missingMemoryBlocks, long mismatchBlocks) + throws IOException, InterruptedException, TimeoutException { scan(totalBlocks, diffsize, missingMetaFile, missingBlockFile, missingMemoryBlocks, mismatchBlocks, 0); } private void scan(long totalBlocks, int diffsize, long missingMetaFile, long missingBlockFile, long missingMemoryBlocks, long mismatchBlocks, - long duplicateBlocks) throws IOException { + long duplicateBlocks) + throws IOException, InterruptedException, TimeoutException { scanner.reconcile(); - verifyStats(totalBlocks, diffsize, missingMetaFile, missingBlockFile, - missingMemoryBlocks, mismatchBlocks, duplicateBlocks); + + GenericTestUtils.waitFor(() -> { + try { + verifyStats(totalBlocks, diffsize, missingMetaFile, missingBlockFile, + missingMemoryBlocks, mismatchBlocks, duplicateBlocks); + } catch (AssertionError ex) { + return false; + } + + return true; + }, 50, 2000); } private void verifyStats(long totalBlocks, int diffsize, long missingMetaFile, @@ -785,7 +798,8 @@ public void run() { } } - private float runThrottleTest(int blocks) throws IOException { + private float runThrottleTest(int blocks) + throws IOException, InterruptedException, TimeoutException { scanner.setRetainDiffs(true); scan(blocks, 0, 0, 0, 0, 0); scanner.shutdown(); @@ -1069,10 +1083,19 @@ public void testDirectoryScannerInFederatedCluster() throws Exception { scanner.setRetainDiffs(true); scanner.reconcile(); //Check blocks in corresponding BP - bpid = cluster.getNamesystem(1).getBlockPoolId(); - verifyStats(bp1Files, 0, 0, 0, 0, 0, 0); - bpid = cluster.getNamesystem(3).getBlockPoolId(); - verifyStats(bp2Files, 0, 0, 0, 0, 0, 0); + + GenericTestUtils.waitFor(() -> { + try { + bpid = cluster.getNamesystem(1).getBlockPoolId(); + verifyStats(bp1Files, 0, 0, 0, 0, 0, 0); + bpid = cluster.getNamesystem(3).getBlockPoolId(); + verifyStats(bp2Files, 0, 0, 0, 0, 0, 0); + } catch (AssertionError ex) { + return false; + } + + return true; + }, 50, 2000); } finally { if (scanner != null) { scanner.shutdown(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java index bf0e3c11bdd..d168c2e9c53 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java @@ -21,8 +21,8 @@ import java.io.IOException; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.*; @@ -50,7 +50,8 @@ * {@link DFSConfigKeys#DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY} */ public class TestDnRespectsBlockReportSplitThreshold { - public static final Log LOG = LogFactory.getLog(TestStorageReport.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestStorageReport.class); private static final int BLOCK_SIZE = 1024; private static final short REPL_FACTOR = 1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java index 2dbd5b9bd01..a0c64983319 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java @@ -38,8 +38,7 @@ import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -88,8 +87,7 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import org.apache.log4j.Level; -import org.apache.log4j.LogManager; +import org.slf4j.event.Level; import com.google.common.base.Supplier; import com.google.common.primitives.Ints; @@ -98,7 +96,8 @@ @NotThreadSafe public class TestFsDatasetCache { - private static final Log LOG = LogFactory.getLog(TestFsDatasetCache.class); + private static final org.slf4j.Logger LOG = + LoggerFactory.getLogger(TestFsDatasetCache.class); // Most Linux installs allow a default of 64KB locked memory static final long CACHE_CAPACITY = 64 * 1024; @@ -126,7 +125,8 @@ private static DataNodeFaultInjector oldInjector; static { - LogManager.getLogger(FsDatasetCache.class).setLevel(Level.DEBUG); + GenericTestUtils.setLogLevel( + LoggerFactory.getLogger(FsDatasetCache.class), Level.DEBUG); } @BeforeClass @@ -263,7 +263,7 @@ private static DatanodeCommand getResponse(HdfsBlockLocation[] locs, blockChannel = blockInputStream.getChannel(); sizes[i] = blockChannel.size(); } finally { - IOUtils.cleanup(LOG, blockChannel, blockInputStream); + IOUtils.cleanupWithLogger(LOG, blockChannel, blockInputStream); } } return sizes; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBlockReports.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBlockReports.java index 03553fed4f3..3a6276c7d71 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBlockReports.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBlockReports.java @@ -25,8 +25,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.*; import org.apache.hadoop.hdfs.protocol.Block; @@ -48,7 +48,8 @@ * block additions/deletions. */ public class TestIncrementalBlockReports { - public static final Log LOG = LogFactory.getLog(TestIncrementalBlockReports.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestIncrementalBlockReports.class); private static final short DN_COUNT = 1; private static final long DUMMY_BLOCK_ID = 5678; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBrVariations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBrVariations.java index 78b5cf91c0d..c556d0d3810 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBrVariations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBrVariations.java @@ -26,8 +26,9 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.util.UUID; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.event.Level; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSClient; @@ -45,7 +46,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo.BlockStatus; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -61,7 +61,8 @@ * in the future). */ public class TestIncrementalBrVariations { - public static final Log LOG = LogFactory.getLog(TestIncrementalBrVariations.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestIncrementalBrVariations.class); private static final short NUM_DATANODES = 1; static final int BLOCK_SIZE = 1024; @@ -79,13 +80,13 @@ private DatanodeRegistration dn0Reg; // DataNodeRegistration for dn0 static { - GenericTestUtils.setLogLevel(NameNode.stateChangeLog, Level.ALL); - GenericTestUtils.setLogLevel(BlockManager.blockLog, Level.ALL); - GenericTestUtils.setLogLevel(NameNode.blockStateChangeLog, Level.ALL); + GenericTestUtils.setLogLevel(NameNode.stateChangeLog, Level.TRACE); + GenericTestUtils.setLogLevel(BlockManager.blockLog, Level.TRACE); + GenericTestUtils.setLogLevel(NameNode.blockStateChangeLog, Level.TRACE); GenericTestUtils - .setLogLevel(LogFactory.getLog(FSNamesystem.class), Level.ALL); - GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); - GenericTestUtils.setLogLevel(TestIncrementalBrVariations.LOG, Level.ALL); + .setLogLevel(LoggerFactory.getLogger(FSNamesystem.class), Level.TRACE); + GenericTestUtils.setLogLevel(DataNode.LOG, Level.TRACE); + GenericTestUtils.setLogLevel(TestIncrementalBrVariations.LOG, Level.TRACE); } @Before diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java index 22cba6d8b11..9d5cde3e744 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java @@ -26,8 +26,8 @@ import java.net.InetSocketAddress; import java.util.Collections; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSClient; @@ -61,7 +61,8 @@ */ public class TestReadOnlySharedStorage { - public static final Log LOG = LogFactory.getLog(TestReadOnlySharedStorage.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestReadOnlySharedStorage.class); private static final short NUM_DATANODES = 3; private static final int RO_NODE_INDEX = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java index 5f62ddb084b..e2bae620305 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java @@ -20,8 +20,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -49,7 +49,8 @@ import static org.mockito.Matchers.anyLong; public class TestStorageReport { - public static final Log LOG = LogFactory.getLog(TestStorageReport.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestStorageReport.class); private static final short REPL_FACTOR = 1; private static final StorageType storageType = StorageType.SSD; // pick non-default. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestTransferRbw.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestTransferRbw.java index 708fbaf30af..357f1ec22ba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestTransferRbw.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestTransferRbw.java @@ -20,8 +20,8 @@ import java.util.Collection; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSClientAdapter; @@ -44,7 +44,8 @@ /** Test transferring RBW between datanodes */ public class TestTransferRbw { - private static final Log LOG = LogFactory.getLog(TestTransferRbw.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestTransferRbw.class); { GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java index baaed9f338a..c4d1e57e720 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java @@ -20,8 +20,8 @@ import com.google.common.base.Preconditions; import org.apache.commons.io.FileExistsException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.DF; @@ -68,8 +68,8 @@ @InterfaceStability.Unstable @InterfaceAudience.Private public class FsDatasetImplTestUtils implements FsDatasetTestUtils { - private static final Log LOG = - LogFactory.getLog(FsDatasetImplTestUtils.class); + private static final Logger LOG = + LoggerFactory.getLogger(FsDatasetImplTestUtils.class); private final FsDatasetImpl dataset; private static final DataChecksum DEFAULT_CHECKSUM = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/LazyPersistTestCase.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/LazyPersistTestCase.java index aae59ddc5e9..ece5739f88f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/LazyPersistTestCase.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/LazyPersistTestCase.java @@ -43,8 +43,8 @@ import com.google.common.base.Preconditions; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; @@ -89,7 +89,8 @@ private static final String JMX_SERVICE_NAME = "DataNode"; protected static final int LAZY_WRITE_FILE_SCRUBBER_INTERVAL_SEC = 3; protected static final int LAZY_WRITER_INTERVAL_SEC = 1; - protected static final Log LOG = LogFactory.getLog(LazyPersistTestCase.class); + protected static final Logger LOG = + LoggerFactory.getLogger(LazyPersistTestCase.class); protected static final short REPL_FACTOR = 1; protected final long osPageSize = NativeIO.POSIX.getCacheManipulator().getOperatingSystemPageSize(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestSpaceReservation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestSpaceReservation.java index 2daca863205..559828bda83 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestSpaceReservation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestSpaceReservation.java @@ -20,8 +20,8 @@ import com.google.common.base.Supplier; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import static org.apache.hadoop.hdfs.DFSConfigKeys.*; import static org.hamcrest.core.Is.is; @@ -69,7 +69,7 @@ * replica being written (RBW) & Replica being copied from another DN. */ public class TestSpaceReservation { - static final Log LOG = LogFactory.getLog(TestSpaceReservation.class); + static final Logger LOG = LoggerFactory.getLogger(TestSpaceReservation.class); private static final int DU_REFRESH_INTERVAL_MSEC = 500; private static final int STORAGES_PER_DATANODE = 1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/TestDataModels.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/TestDataModels.java index ace8212fd85..12fbcf1d0d9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/TestDataModels.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/TestDataModels.java @@ -224,4 +224,20 @@ public void testClusterSerialize() throws Exception { Assert .assertEquals(cluster.getNodes().size(), newCluster.getNodes().size()); } + + @Test + public void testUsageLimitedToCapacity() throws Exception { + DiskBalancerTestUtil util = new DiskBalancerTestUtil(); + + // If usage is greater than capacity, then it should be set to capacity + DiskBalancerVolume v1 = util.createRandomVolume(StorageType.DISK); + v1.setCapacity(DiskBalancerTestUtil.GB); + v1.setUsed(2 * DiskBalancerTestUtil.GB); + Assert.assertEquals(v1.getUsed(),v1.getCapacity()); + // If usage is less than capacity, usage should be set to the real usage + DiskBalancerVolume v2 = util.createRandomVolume(StorageType.DISK); + v2.setCapacity(2*DiskBalancerTestUtil.GB); + v2.setUsed(DiskBalancerTestUtil.GB); + Assert.assertEquals(v1.getUsed(),DiskBalancerTestUtil.GB); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/TestDiskBalancerWithMockMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/TestDiskBalancerWithMockMover.java index 7df03333b4c..f6a5d9c4692 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/TestDiskBalancerWithMockMover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/TestDiskBalancerWithMockMover.java @@ -22,8 +22,8 @@ import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import org.apache.commons.codec.digest.DigestUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -63,7 +63,8 @@ * Tests diskbalancer with a mock mover. */ public class TestDiskBalancerWithMockMover { - static final Log LOG = LogFactory.getLog(TestDiskBalancerWithMockMover.class); + static final Logger LOG = + LoggerFactory.getLogger(TestDiskBalancerWithMockMover.class); @Rule public ExpectedException thrown = ExpectedException.none(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java index 62c91bf9e33..200178d262c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java @@ -72,12 +72,14 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DBlock; import org.apache.hadoop.hdfs.server.balancer.ExitStatus; import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; import org.apache.hadoop.hdfs.server.balancer.TestBalancer; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.mover.Mover.MLocation; import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.http.HttpConfig; @@ -113,6 +115,8 @@ static void initConf(Configuration conf) { conf.setLong(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1L); conf.setLong(DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY, 2000L); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); } static Mover newMover(Configuration conf) throws IOException { @@ -123,15 +127,18 @@ static Mover newMover(Configuration conf) throws IOException { nnMap.put(nn, null); } - final List nncs = NameNodeConnector.newNameNodeConnectors( - nnMap, Mover.class.getSimpleName(), Mover.MOVER_ID_PATH, conf, - NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS); + final List nncs = NameNodeConnector. + newNameNodeConnectors(nnMap, Mover.class.getSimpleName(), + HdfsServerConstants.MOVER_ID_PATH, conf, + NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS); return new Mover(nncs.get(0), conf, new AtomicInteger(0), new HashMap<>()); } @Test public void testScheduleSameBlock() throws IOException { final Configuration conf = new HdfsConfiguration(); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(4).build(); try { @@ -454,8 +461,11 @@ private void checkMovePaths(List actual, Path... expected) { */ @Test public void testMoverCli() throws Exception { + final Configuration clusterConf = new HdfsConfiguration(); + clusterConf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); final MiniDFSCluster cluster = new MiniDFSCluster - .Builder(new HdfsConfiguration()).numDataNodes(0).build(); + .Builder(clusterConf).numDataNodes(0).build(); try { final Configuration conf = cluster.getConfiguration(0); try { @@ -487,8 +497,10 @@ public void testMoverCli() throws Exception { @Test public void testMoverCliWithHAConf() throws Exception { final Configuration conf = new HdfsConfiguration(); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); final MiniDFSCluster cluster = new MiniDFSCluster - .Builder(new HdfsConfiguration()) + .Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(0).build(); HATestUtil.setFailoverConfigurations(cluster, conf, "MyCluster"); @@ -509,11 +521,16 @@ public void testMoverCliWithHAConf() throws Exception { @Test public void testMoverCliWithFederation() throws Exception { + final Configuration clusterConf = new HdfsConfiguration(); + clusterConf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); final MiniDFSCluster cluster = new MiniDFSCluster - .Builder(new HdfsConfiguration()) + .Builder(clusterConf) .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(3)) .numDataNodes(0).build(); final Configuration conf = new HdfsConfiguration(); + clusterConf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); DFSTestUtil.setFederatedConfiguration(cluster, conf); try { Collection namenodes = DFSUtil.getInternalNsRpcUris(conf); @@ -557,11 +574,16 @@ public void testMoverCliWithFederation() throws Exception { @Test public void testMoverCliWithFederationHA() throws Exception { + final Configuration clusterConf = new HdfsConfiguration(); + clusterConf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); final MiniDFSCluster cluster = new MiniDFSCluster - .Builder(new HdfsConfiguration()) + .Builder(clusterConf) .nnTopology(MiniDFSNNTopology.simpleHAFederatedTopology(3)) .numDataNodes(0).build(); final Configuration conf = new HdfsConfiguration(); + clusterConf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); DFSTestUtil.setFederatedHAConfiguration(cluster, conf); try { Collection namenodes = DFSUtil.getInternalNsRpcUris(conf); @@ -625,6 +647,8 @@ public void testTwoReplicaSameStorageTypeShouldNotSelect() throws Exception { public void testMoveWhenStoragePolicyNotSatisfying() throws Exception { // HDFS-8147 final Configuration conf = new HdfsConfiguration(); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(3) .storageTypes( @@ -650,6 +674,39 @@ public void testMoveWhenStoragePolicyNotSatisfying() throws Exception { } } + @Test(timeout = 300000) + public void testMoveWhenStoragePolicySatisfierIsRunning() throws Exception { + final Configuration conf = new HdfsConfiguration(); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(3) + .storageTypes( + new StorageType[][] {{StorageType.DISK}, {StorageType.DISK}, + {StorageType.DISK}}).build(); + try { + cluster.waitActive(); + // Simulate External sps by creating #getNameNodeConnector instance. + DFSTestUtil.getNameNodeConnector(conf, HdfsServerConstants.MOVER_ID_PATH, + 1, true); + final DistributedFileSystem dfs = cluster.getFileSystem(); + final String file = "/testMoveWhenStoragePolicySatisfierIsRunning"; + // write to DISK + final FSDataOutputStream out = dfs.create(new Path(file)); + out.writeChars("testMoveWhenStoragePolicySatisfierIsRunning"); + out.close(); + + // move to ARCHIVE + dfs.setStoragePolicy(new Path(file), "COLD"); + int rc = ToolRunner.run(conf, new Mover.Cli(), + new String[] {"-p", file.toString()}); + int exitcode = ExitStatus.IO_EXCEPTION.getExitCode(); + Assert.assertEquals("Exit code should be " + exitcode, exitcode, rc); + } finally { + cluster.shutdown(); + } + } + @Test public void testMoverFailedRetry() throws Exception { // HDFS-8147 @@ -746,6 +803,8 @@ void initConfWithStripe(Configuration conf) { 1L); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); } @Test(timeout = 300000) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java index 356ae3a8b2b..1b3faa0eee9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java @@ -26,8 +26,8 @@ import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -61,7 +61,7 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Assert; import org.junit.Test; @@ -72,13 +72,13 @@ * Test the data migration tool (for Archival Storage) */ public class TestStorageMover { - static final Log LOG = LogFactory.getLog(TestStorageMover.class); + static final Logger LOG = LoggerFactory.getLogger(TestStorageMover.class); static { - GenericTestUtils.setLogLevel(LogFactory.getLog(BlockPlacementPolicy.class), - Level.ALL); - GenericTestUtils.setLogLevel(LogFactory.getLog(Dispatcher.class), - Level.ALL); - GenericTestUtils.setLogLevel(DataTransferProtocol.LOG, Level.ALL); + GenericTestUtils.setLogLevel( + LoggerFactory.getLogger(BlockPlacementPolicy.class), Level.TRACE); + GenericTestUtils.setLogLevel(LoggerFactory.getLogger(Dispatcher.class), + Level.TRACE); + GenericTestUtils.setLogLevel(DataTransferProtocol.LOG, Level.TRACE); } private static final int BLOCK_SIZE = 1024; @@ -96,6 +96,8 @@ DEFAULT_CONF.setLong( DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 2L); DEFAULT_CONF.setLong(DFSConfigKeys.DFS_MOVER_MOVEDWINWIDTH_KEY, 2000L); + DEFAULT_CONF.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_DEFAULT); DEFAULT_POLICIES = BlockStoragePolicySuite.createDefaultSuite(); HOT = DEFAULT_POLICIES.getPolicy(HdfsConstants.HOT_STORAGE_POLICY_NAME); @@ -568,7 +570,7 @@ public void testMigrateOpenFileToArchival() throws Exception { // make sure the writing can continue out.writeBytes("world!"); ((DFSOutputStream) out.getWrappedStream()).hsync(); - IOUtils.cleanup(LOG, out); + IOUtils.cleanupWithLogger(LOG, out); lbs = test.dfs.getClient().getLocatedBlocks( barFile.toString(), BLOCK_SIZE); @@ -583,7 +585,7 @@ public void testMigrateOpenFileToArchival() throws Exception { byte[] buf = new byte[13]; // read from offset 1024 in.readFully(BLOCK_SIZE, buf, 0, buf.length); - IOUtils.cleanup(LOG, in); + IOUtils.cleanupWithLogger(LOG, in); Assert.assertEquals("hello, world!", new String(buf)); } finally { test.shutdownCluster(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java index f990c5eb6c2..883e43ca904 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java @@ -43,8 +43,8 @@ import java.util.Set; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.permission.FsPermission; @@ -75,7 +75,8 @@ */ public abstract class FSImageTestUtil { - public static final Log LOG = LogFactory.getLog(FSImageTestUtil.class); + public static final Logger LOG = + LoggerFactory.getLogger(FSImageTestUtil.class); /** * The position in the fsimage header where the txid is @@ -562,15 +563,15 @@ public static void assertReasonableNameCurrentDir(File curDir) assertNotNull(image); } - public static void logStorageContents(Log LOG, NNStorage storage) { - LOG.info("current storages and corresponding sizes:"); + public static void logStorageContents(Logger log, NNStorage storage) { + log.info("current storages and corresponding sizes:"); for (StorageDirectory sd : storage.dirIterable(null)) { File curDir = sd.getCurrentDir(); - LOG.info("In directory " + curDir); + log.info("In directory " + curDir); File[] files = curDir.listFiles(); Arrays.sort(files); for (File f : files) { - LOG.info(" file " + f.getAbsolutePath() + "; len = " + f.length()); + log.info(" file " + f.getAbsolutePath() + "; len = " + f.length()); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java index 3a3c47177aa..a9c4578ec16 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java @@ -28,8 +28,8 @@ import com.google.common.base.Preconditions; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.CryptoProtocolVersion; import org.apache.hadoop.fs.CreateFlag; @@ -103,7 +103,8 @@ * documentation accordingly. */ public class NNThroughputBenchmark implements Tool { - private static final Log LOG = LogFactory.getLog(NNThroughputBenchmark.class); + private static final Logger LOG = + LoggerFactory.getLogger(NNThroughputBenchmark.class); private static final int BLOCK_SIZE = 16; private static final String GENERAL_OPTIONS_USAGE = "[-keepResults] | [-logLevel L] | [-UGCacheRefreshCount G]"; @@ -145,7 +146,7 @@ void close() { } static void setNameNodeLoggingLevel(Level logLevel) { - LOG.fatal("Log level = " + logLevel.toString()); + LOG.error("Log level = " + logLevel.toString()); // change log level to NameNode logs DFSTestUtil.setNameNodeLogLevel(logLevel); GenericTestUtils.setLogLevel(LogManager.getLogger( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java index d637af5b497..e7f51ce2e94 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java @@ -22,8 +22,8 @@ import java.io.IOException; import java.util.Iterator; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -45,8 +45,8 @@ @InterfaceStability.Unstable public class OfflineEditsViewerHelper { - private static final Log LOG = - LogFactory.getLog(OfflineEditsViewerHelper.class); + private static final Logger LOG = + LoggerFactory.getLogger(OfflineEditsViewerHelper.class); final long blockSize = 512; MiniDFSCluster cluster = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlockRetry.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlockRetry.java index 1aa77266f0c..13cd16f71a7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlockRetry.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlockRetry.java @@ -24,8 +24,8 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.EnumSet; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.permission.FsPermission; @@ -46,7 +46,8 @@ * FSNamesystem.getAdditionalBlock(). */ public class TestAddBlockRetry { - public static final Log LOG = LogFactory.getLog(TestAddBlockRetry.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestAddBlockRetry.class); private static final short REPLICATION = 3; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAllowFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAllowFormat.java index 7e3a030b60d..da4e71e3c5d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAllowFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAllowFormat.java @@ -29,8 +29,8 @@ import java.io.IOException; import java.net.InetSocketAddress; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -53,8 +53,8 @@ public class TestAllowFormat { public static final String NAME_NODE_HOST = "localhost:"; public static final String NAME_NODE_HTTP_HOST = "0.0.0.0:"; - private static final Log LOG = - LogFactory.getLog(TestAllowFormat.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(TestAllowFormat.class.getName()); private static final File DFS_BASE_DIR = new File(PathUtils.getTestDir(TestAllowFormat.class), "dfs"); private static Configuration config; private static MiniDFSCluster cluster = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java index 9a4f98b0d1e..3db16e83bab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java @@ -20,8 +20,8 @@ import com.google.common.base.Joiner; import com.google.common.base.Optional; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -43,7 +43,7 @@ * Test that the HDFS Audit logger respects DFS_NAMENODE_AUDIT_LOG_DEBUG_CMDLIST. */ public class TestAuditLogAtDebug { - static final Log LOG = LogFactory.getLog(TestAuditLogAtDebug.class); + static final Logger LOG = LoggerFactory.getLogger(TestAuditLogAtDebug.class); @Rule public Timeout timeout = new Timeout(300000); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java index 267896464fd..5ae9a3eb977 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java @@ -29,8 +29,8 @@ import java.util.Collections; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; @@ -52,7 +52,7 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Before; import org.junit.Test; @@ -62,12 +62,13 @@ import com.google.common.collect.Lists; public class TestBackupNode { - public static final Log LOG = LogFactory.getLog(TestBackupNode.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestBackupNode.class); static { - GenericTestUtils.setLogLevel(Checkpointer.LOG, Level.ALL); - GenericTestUtils.setLogLevel(BackupImage.LOG, Level.ALL); + GenericTestUtils.setLogLevel(Checkpointer.LOG, Level.TRACE); + GenericTestUtils.setLogLevel(BackupImage.LOG, Level.TRACE); } static final String BASE_DIR = MiniDFSCluster.getBaseDirectory(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java index 551670e1d78..b3e91e5072a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY; import static org.apache.hadoop.hdfs.protocol.CachePoolInfo.RELATIVE_EXPIRY_NEVER; import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; import static org.junit.Assert.assertEquals; @@ -42,8 +43,8 @@ import java.util.List; import org.apache.commons.lang3.time.DateUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.CacheFlag; @@ -95,7 +96,7 @@ import com.google.common.base.Supplier; public class TestCacheDirectives { - static final Log LOG = LogFactory.getLog(TestCacheDirectives.class); + static final Logger LOG = LoggerFactory.getLogger(TestCacheDirectives.class); private static final UserGroupInformation unprivilegedUser = UserGroupInformation.createRemoteUser("unprivilegedUser"); @@ -1556,4 +1557,52 @@ public void testNoLookupsWhenNotUsed() throws Exception { cm.setCachedLocations(locations); Mockito.verifyZeroInteractions(locations); } + + @Test(timeout=120000) + public void testAddingCacheDirectiveInfosWhenCachingIsDisabled() + throws Exception { + cluster.shutdown(); + HdfsConfiguration config = createCachingConf(); + config.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, false); + cluster = new MiniDFSCluster.Builder(config) + .numDataNodes(NUM_DATANODES).build(); + + cluster.waitActive(); + dfs = cluster.getFileSystem(); + namenode = cluster.getNameNode(); + CacheManager cacheManager = namenode.getNamesystem().getCacheManager(); + assertFalse(cacheManager.isEnabled()); + assertNull(cacheManager.getCacheReplicationMonitor()); + // Create the pool + String pool = "pool1"; + namenode.getRpcServer().addCachePool(new CachePoolInfo(pool)); + // Create some test files + final int numFiles = 2; + final int numBlocksPerFile = 2; + final List paths = new ArrayList(numFiles); + for (int i=0; i() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + // flip the latch to unleash the spamming threads to congest + // the queue. + startSpamLatch.countDown(); + // wait until unblocked after a synchronized thread is started. + blockerSemaphore.acquire(); + invocation.callRealMethod(); + return null; + } + } + ).when(blockingOp).setTransactionId(Mockito.anyLong()); + // don't reset fields so instance can be reused. + Mockito.doNothing().when(blockingOp).reset(); + + // repeatedly overflow the queue and verify it doesn't deadlock. + for (int i = 0; i < 8; i++) { + // when the blockingOp is logged, it triggers the latch to unleash the + // spammers to overflow the edit queue, then waits for a permit + // from blockerSemaphore that will be released at the bottom of + // this loop. + Future blockingEdit = executor.submit(new Callable() { + @Override + public Void call() throws Exception { + Thread.currentThread().setName("Log blocker"); + editLog.logEdit(blockingOp); + editLog.logSync(); + return null; + } + }); + + // wait for spammers to seize up the edit log. + long startTxId = editLog.getLastWrittenTxIdWithoutLock(); + final long[] txIds = { startTxId, startTxId, startTxId }; + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + txIds[0] = txIds[1]; + txIds[1] = txIds[2]; + txIds[2] = editLog.getLastWrittenTxIdWithoutLock(); + return (txIds[0] == txIds[1] && + txIds[1] == txIds[2] && + txIds[2] > startTxId); + } + }, 100, 10000); + + // callers that synchronize on the edit log while the queue is full + // are prone to deadlock if the locking is incorrect. at this point: + // 1. the blocking edit is holding the log's monitor. + // 2. the spammers have filled the queue. + // 3. the spammers are blocked waiting to queue another edit. + // Now we'll start another thread to synchronize on the log (simulates + // what log rolling does), unblock the op currently holding the + // monitor, and ensure deadlock does not occur. + CountDownLatch readyLatch = new CountDownLatch(1); + Future synchedEdits = executor.submit(new Callable() { + @Override + public Void call() throws Exception { + Thread.currentThread().setName("Log synchronizer"); + // the sync is CRUCIAL for this test. it's what causes edit + // log rolling to deadlock when queue is full. + readyLatch.countDown(); + synchronized (editLog) { + editLog.logEdit(reuseOp); + editLog.logSync(); + } + return null; + } + }); + // unblock the edit jammed in setting its txid. queued edits should + // start flowing and the synced edits should complete. + readyLatch.await(); + blockerSemaphore.release(); + blockingEdit.get(); + synchedEdits.get(); + } + + // tell spammers to stop. + done.set(true); + for (int i=0; i < logSpammers.length; i++) { + logSpammers[i].get(); + } + // just make sure everything can be synced. + editLog.logSyncAll(); + } finally { + LOG.info("Closing nn"); + executor.shutdownNow(); + if (namesystem != null) { + namesystem.getFSImage().getStorage().close(); + namesystem.close(); + } + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java index bdd48e873cd..45a785be75e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.server.namenode; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager; @@ -34,7 +34,8 @@ * Test {@link FSDirAttrOp}. */ public class TestFSDirAttrOp { - public static final Log LOG = LogFactory.getLog(TestFSDirAttrOp.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestFSDirAttrOp.class); private boolean unprotectedSetTimes(long atime, long atime0, long precision, long mtime, boolean force) throws QuotaExceededException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirWriteFileOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirWriteFileOp.java new file mode 100644 index 00000000000..762fa61dd6d --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirWriteFileOp.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import static org.junit.Assert.assertNull; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyByte; +import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.anyList; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Matchers.anySet; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.EnumSet; + +import org.apache.hadoop.hdfs.AddBlockFlag; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.ValidateAddBlockResult; +import org.apache.hadoop.net.Node; +import org.junit.Test; +import org.mockito.ArgumentCaptor; + +public class TestFSDirWriteFileOp { + + @Test + @SuppressWarnings("unchecked") + public void testIgnoreClientLocality() throws IOException { + ValidateAddBlockResult addBlockResult = + new ValidateAddBlockResult(1024L, 3, (byte) 0x01, null, null, null); + + EnumSet addBlockFlags = + EnumSet.of(AddBlockFlag.IGNORE_CLIENT_LOCALITY); + + BlockManager bmMock = mock(BlockManager.class); + + ArgumentCaptor nodeCaptor = ArgumentCaptor.forClass(Node.class); + + when(bmMock.chooseTarget4NewBlock(anyString(), anyInt(), any(), anySet(), + anyLong(), anyList(), anyByte(), any(), any(), any())).thenReturn(null); + + FSDirWriteFileOp.chooseTargetForNewBlock(bmMock, "localhost", null, null, + addBlockFlags, addBlockResult); + + // There should be no other interactions with the block manager when the + // IGNORE_CLIENT_LOCALITY is passed in because there is no need to discover + // the local node requesting the new block + verify(bmMock, times(1)).chooseTarget4NewBlock(anyString(), anyInt(), + nodeCaptor.capture(), anySet(), anyLong(), anyList(), anyByte(), any(), + any(), any()); + + verifyNoMoreInteractions(bmMock); + + assertNull( + "Source node was assigned a value. Expected 'null' value because " + + "chooseTarget was flagged to ignore source node locality", + nodeCaptor.getValue()); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirectory.java index c35c95a7353..d143013fdef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirectory.java @@ -28,8 +28,8 @@ import java.util.Random; import com.google.common.collect.ImmutableList; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.Path; @@ -58,7 +58,8 @@ * Test {@link FSDirectory}, the in-memory namespace tree. */ public class TestFSDirectory { - public static final Log LOG = LogFactory.getLog(TestFSDirectory.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestFSDirectory.class); private static final long seed = 0; private static final short REPLICATION = 3; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java index c074ae16816..daeeff29c31 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java @@ -62,7 +62,7 @@ import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -94,8 +94,8 @@ private static Configuration getConf() { } static { - GenericTestUtils.setLogLevel(FSImage.LOG, Level.ALL); - GenericTestUtils.setLogLevel(FSEditLogLoader.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSImage.LOG, Level.TRACE); + GenericTestUtils.setLogLevel(FSEditLogLoader.LOG, Level.TRACE); } private static final File TEST_DIR = PathUtils.getTestDir(TestFSEditLogLoader.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java index 58ecc8a7600..6f31b58d1e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java @@ -47,7 +47,7 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Assert; @@ -60,7 +60,7 @@ public class TestFSImageWithSnapshot { { SnapshotTestHelper.disableLogs(); - GenericTestUtils.setLogLevel(INode.LOG, Level.ALL); + GenericTestUtils.setLogLevel(INode.LOG, Level.TRACE); } static final long seed = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFavoredNodesEndToEnd.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFavoredNodesEndToEnd.java index 3352fd00ccf..f5a112c7acd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFavoredNodesEndToEnd.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFavoredNodesEndToEnd.java @@ -29,7 +29,7 @@ import java.util.EnumSet; import java.util.Random; -import org.apache.commons.logging.LogFactory; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.CreateFlag; @@ -43,7 +43,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -52,8 +52,8 @@ public class TestFavoredNodesEndToEnd { { - GenericTestUtils.setLogLevel(LogFactory.getLog(BlockPlacementPolicy.class), - Level.ALL); + GenericTestUtils.setLogLevel( + LoggerFactory.getLogger(BlockPlacementPolicy.class), Level.TRACE); } private static MiniDFSCluster cluster; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java index 4e4c64bd10b..f258caea0d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java @@ -34,8 +34,8 @@ import java.util.List; import java.util.PriorityQueue; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; @@ -53,7 +53,8 @@ import com.google.common.collect.ImmutableList; public class TestFileJournalManager { - static final Log LOG = LogFactory.getLog(TestFileJournalManager.class); + static final Logger LOG = + LoggerFactory.getLogger(TestFileJournalManager.class); private Configuration conf; @@ -107,11 +108,12 @@ static long getNumberOfTransactions(FileJournalManager jm, long fromTxId, numTransactions++; } } finally { - IOUtils.cleanup(LOG, elis); + IOUtils.cleanupWithLogger(LOG, elis); } } } finally { - IOUtils.cleanup(LOG, allStreams.toArray(new EditLogInputStream[0])); + IOUtils.cleanupWithLogger( + LOG, allStreams.toArray(new EditLogInputStream[0])); } return numTransactions; } @@ -420,8 +422,9 @@ private static EditLogInputStream getJournalInputStream(FileJournalManager jm, return ret; } } finally { - IOUtils.cleanup(LOG, allStreams.toArray(new EditLogInputStream[0])); - IOUtils.cleanup(LOG, elis); + IOUtils.cleanupWithLogger( + LOG, allStreams.toArray(new EditLogInputStream[0])); + IOUtils.cleanupWithLogger(LOG, elis); } return null; } @@ -445,7 +448,7 @@ public void testReadFromMiddleOfEditLog() throws CorruptionException, FSEditLogOp op = elis.readOp(); assertEquals("read unexpected op", op.getTransactionId(), 5); } finally { - IOUtils.cleanup(LOG, elis); + IOUtils.cleanupWithLogger(LOG, elis); } } @@ -475,7 +478,7 @@ public void testExcludeInProgressStreams() throws CorruptionException, assertTrue(lastReadOp.getTransactionId() <= 100); } } finally { - IOUtils.cleanup(LOG, elis); + IOUtils.cleanupWithLogger(LOG, elis); } } @@ -502,7 +505,7 @@ public void testDoPreUpgradeIOError() throws IOException { } jm.doPreUpgrade(); } finally { - IOUtils.cleanup(LOG, jm); + IOUtils.cleanupWithLogger(LOG, jm); // Restore permissions on storage directory and make sure we can delete. FileUtil.setWritable(storageDir, true); FileUtil.fullyDelete(storageDir); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java index 51a94e73f5e..f1083875e13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java @@ -33,8 +33,8 @@ import java.io.IOException; import java.util.concurrent.ThreadLocalRandom; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; @@ -65,17 +65,17 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.ToolRunner; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Before; import org.junit.Test; public class TestFileTruncate { static { - GenericTestUtils.setLogLevel(NameNode.stateChangeLog, Level.ALL); - GenericTestUtils.setLogLevel(FSEditLogLoader.LOG, Level.ALL); + GenericTestUtils.setLogLevel(NameNode.stateChangeLog, Level.TRACE); + GenericTestUtils.setLogLevel(FSEditLogLoader.LOG, Level.TRACE); } - static final Log LOG = LogFactory.getLog(TestFileTruncate.class); + static final Logger LOG = LoggerFactory.getLogger(TestFileTruncate.class); static final int BLOCK_SIZE = 4; static final short REPLICATION = 3; static final int DATANODE_NUM = 3; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index 1a392da93b5..0a2b53812f1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -60,9 +60,8 @@ import java.util.regex.Pattern; import com.google.common.base.Supplier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSDataOutputStream; @@ -136,8 +135,8 @@ * A JUnit test for doing fsck. */ public class TestFsck { - private static final Log LOG = - LogFactory.getLog(TestFsck.class.getName()); + private static final org.slf4j.Logger LOG = + LoggerFactory.getLogger(TestFsck.class.getName()); static final String AUDITLOG_FILE = GenericTestUtils.getTempPath("TestFsck-audit.log"); @@ -171,13 +170,15 @@ static String runFsck(Configuration conf, int expectedErrCode, throws Exception { ByteArrayOutputStream bStream = new ByteArrayOutputStream(); PrintStream out = new PrintStream(bStream, true); - GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.ALL); + GenericTestUtils.setLogLevel( + FSPermissionChecker.LOG, org.slf4j.event.Level.TRACE); int errCode = ToolRunner.run(new DFSck(conf, out), path); LOG.info("OUTPUT = " + bStream.toString()); if (checkErrorCode) { assertEquals(expectedErrCode, errCode); } - GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO); + GenericTestUtils.setLogLevel( + FSPermissionChecker.LOG, org.slf4j.event.Level.INFO); return bStream.toString(); } @@ -2488,4 +2489,22 @@ public void testFsckCorruptWhenOneReplicaIsCorrupt() runFsck(cluster.getConfiguration(0), 0, true, "/"); } } + + @Test + public void testFsckNonPrivilegedListCorrupt() throws Exception { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); + UserGroupInformation ugi = UserGroupInformation.createUserForTesting("systest", new String[]{""}); + ugi.doAs(new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + String path = "/"; + String outStr = runFsck(conf, -1, true, path, "-list-corruptfileblocks"); + + assertFalse(outStr.contains("The list of corrupt files under path '" + path + "' are:")); + assertFalse(outStr.contains("The filesystem under path '" + path + "' has ")); + assertTrue(outStr.contains("Failed to open path '" + path + "': Permission denied")); + return null; + } + }); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsckWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsckWithMultipleNameNodes.java index 124b301cb98..e414296796d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsckWithMultipleNameNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsckWithMultipleNameNodes.java @@ -22,8 +22,8 @@ import java.util.Random; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -44,7 +44,8 @@ * Test fsck with multiple NameNodes */ public class TestFsckWithMultipleNameNodes { - static final Log LOG = LogFactory.getLog(TestFsckWithMultipleNameNodes.class); + static final Logger LOG = + LoggerFactory.getLogger(TestFsckWithMultipleNameNodes.class); { DFSTestUtil.setNameNodeLogLevel(Level.ALL); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java index 020ecb56aa5..edcf9e15a91 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java @@ -155,7 +155,7 @@ public DummyJournalManager(Configuration conf, URI u, } @Override - public void format(NamespaceInfo nsInfo) throws IOException { + public void format(NamespaceInfo nsInfo, boolean force) throws IOException { formatCalled = true; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHDFSConcat.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHDFSConcat.java index b5e0efe0649..4839783c952 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHDFSConcat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHDFSConcat.java @@ -27,8 +27,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataInputStream; @@ -51,7 +51,8 @@ import org.junit.Test; public class TestHDFSConcat { - public static final Log LOG = LogFactory.getLog(TestHDFSConcat.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestHDFSConcat.class); private static final short REPL_FACTOR = 2; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHostsFiles.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHostsFiles.java index 79e7acc2601..e86413d8bf3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHostsFiles.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHostsFiles.java @@ -22,8 +22,8 @@ import java.lang.management.ManagementFactory; import java.util.Arrays; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileSystem; @@ -50,8 +50,8 @@ */ @RunWith(Parameterized.class) public class TestHostsFiles { - private static final Log LOG = - LogFactory.getLog(TestHostsFiles.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(TestHostsFiles.class.getName()); private Class hostFileMgrClass; public TestHostsFiles(Class hostFileMgrClass) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeAttributeProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeAttributeProvider.java index 9c7dcd33523..b3bab06e3f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeAttributeProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeAttributeProvider.java @@ -57,6 +57,11 @@ public static class MyAuthorizationProvider extends INodeAttributeProvider { public static class MyAccessControlEnforcer implements AccessControlEnforcer { + AccessControlEnforcer ace; + + public MyAccessControlEnforcer(AccessControlEnforcer defaultEnforcer) { + this.ace = defaultEnforcer; + } @Override public void checkPermission(String fsOwner, String supergroup, @@ -65,6 +70,13 @@ public void checkPermission(String fsOwner, String supergroup, int ancestorIndex, boolean doCheckOwner, FsAction ancestorAccess, FsAction parentAccess, FsAction access, FsAction subAccess, boolean ignoreEmptyDir) throws AccessControlException { + if (ancestorIndex > 1 + && inodes[1].getLocalName().equals("user") + && inodes[2].getLocalName().equals("acl")) { + this.ace.checkPermission(fsOwner, supergroup, ugi, inodeAttrs, inodes, + pathByNameArr, snapshotId, path, ancestorIndex, doCheckOwner, + ancestorAccess, parentAccess, access, subAccess, ignoreEmptyDir); + } CALLED.add("checkPermission|" + ancestorAccess + "|" + parentAccess + "|" + access); } } @@ -84,6 +96,7 @@ public INodeAttributes getAttributes(String[] pathElements, final INodeAttributes inode) { CALLED.add("getAttributes"); final boolean useDefault = useDefault(pathElements); + final boolean useNullAcl = useNullAclFeature(pathElements); return new INodeAttributes() { @Override public boolean isDirectory() { @@ -126,7 +139,10 @@ public long getPermissionLong() { @Override public AclFeature getAclFeature() { AclFeature f; - if (useDefault) { + if (useNullAcl) { + int[] entries = new int[0]; + f = new AclFeature(entries); + } else if (useDefault) { f = inode.getAclFeature(); } else { AclEntry acl = new AclEntry.Builder().setType(AclEntryType.GROUP). @@ -167,8 +183,8 @@ public long getAccessTime() { @Override public AccessControlEnforcer getExternalAccessControlEnforcer( - AccessControlEnforcer deafultEnforcer) { - return new MyAccessControlEnforcer(); + AccessControlEnforcer defaultEnforcer) { + return new MyAccessControlEnforcer(defaultEnforcer); } private boolean useDefault(String[] pathElements) { @@ -176,6 +192,11 @@ private boolean useDefault(String[] pathElements) { !(pathElements[0].equals("user") && pathElements[1].equals("authz")); } + private boolean useNullAclFeature(String[] pathElements) { + return (pathElements.length > 2) + && pathElements[1].equals("user") + && pathElements[2].equals("acl"); + } } @Before @@ -368,4 +389,20 @@ public void testCustomProvider() throws Exception { }); } } + + @Test + public void testAclFeature() throws Exception { + UserGroupInformation ugi = UserGroupInformation.createUserForTesting( + "testuser", new String[]{"testgroup"}); + ugi.doAs((PrivilegedExceptionAction) () -> { + FileSystem fs = miniDFS.getFileSystem(); + Path aclDir = new Path("/user/acl"); + fs.mkdirs(aclDir); + Path aclChildDir = new Path(aclDir, "subdir"); + fs.mkdirs(aclChildDir); + AclStatus aclStatus = fs.getAclStatus(aclDir); + Assert.assertEquals(0, aclStatus.getEntries().size()); + return null; + }); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java index 22463573432..1392f9d9eb2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java @@ -31,8 +31,8 @@ import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException; import org.apache.hadoop.fs.FSDataOutputStream; @@ -82,7 +82,7 @@ static { FileSystem.enableSymlinks(); } - public static final Log LOG = LogFactory.getLog(TestINodeFile.class); + public static final Logger LOG = LoggerFactory.getLogger(TestINodeFile.class); static final short BLOCKBITS = 48; static final long BLKSIZE_MAXVALUE = ~(0xffffL << BLOCKBITS); @@ -1066,7 +1066,7 @@ public void testDotdotInodePath() throws Exception { assertTrue(parentId == status.getFileId()); } finally { - IOUtils.cleanup(LOG, client); + IOUtils.cleanupWithLogger(LOG, client); if (cluster != null) { cluster.shutdown(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLargeDirectoryDelete.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLargeDirectoryDelete.java index 0882d18386c..df36322e9f7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLargeDirectoryDelete.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestLargeDirectoryDelete.java @@ -20,8 +20,8 @@ import java.io.IOException; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -39,7 +39,8 @@ * deletion completes and handles new requests from other clients */ public class TestLargeDirectoryDelete { - private static final Log LOG = LogFactory.getLog(TestLargeDirectoryDelete.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestLargeDirectoryDelete.class); private static final Configuration CONF = new HdfsConfiguration(); private static final int TOTAL_BLOCKS = 10000; private MiniDFSCluster mc = null; @@ -49,6 +50,7 @@ static { CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1); CONF.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 1); + CONF.setInt(DFSConfigKeys.DFS_NAMENODE_BLOCK_DELETION_INCREMENT_KEY, 1); } /** create a file with a length of filelen */ @@ -137,7 +139,6 @@ protected void execute() throws Throwable { threads[1].start(); final long start = Time.now(); - FSNamesystem.BLOCK_DELETION_INCREMENT = 1; mc.getFileSystem().delete(new Path("/root"), true); // recursive delete final long end = Time.now(); threads[0].endThread(); @@ -168,7 +169,7 @@ public void run() { try { execute(); } catch (Throwable throwable) { - LOG.warn(throwable); + LOG.warn("{}", throwable); setThrown(throwable); } finally { synchronized (this) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java index 1f31bdc88ef..e1c8ae3d243 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java @@ -452,7 +452,7 @@ public void testMaxCorruptFiles() throws Exception { cluster = new MiniDFSCluster.Builder(conf).build(); FileSystem fs = cluster.getFileSystem(); final int maxCorruptFileBlocks = - FSNamesystem.DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED; + conf.getInt(DFSConfigKeys.DFS_NAMENODE_MAX_CORRUPT_FILE_BLOCKS_RETURNED_KEY, 100); // create 110 files with one block each DFSTestUtil util = new DFSTestUtil.Builder().setName("testMaxCorruptFiles"). diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListOpenFiles.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListOpenFiles.java index 70550d5785c..337e3728a91 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListOpenFiles.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListOpenFiles.java @@ -31,8 +31,8 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedEntries; import org.apache.hadoop.fs.FSDataOutputStream; @@ -66,7 +66,8 @@ private static MiniDFSCluster cluster = null; private static DistributedFileSystem fs = null; private static NamenodeProtocols nnRpc = null; - private static final Log LOG = LogFactory.getLog(TestListOpenFiles.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestListOpenFiles.class); @Before public void setUp() throws IOException { @@ -254,4 +255,4 @@ public void run() { } } } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java index b8dc44e89f1..baf999d6b7a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java @@ -26,8 +26,8 @@ import java.io.File; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -49,7 +49,7 @@ private static final File TEST_ROOT_DIR = new File(MiniDFSCluster.getBaseDirectory()); - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestNNStorageRetentionFunctional.class); /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java index 1abca36b8c0..7a2fc9abef1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java @@ -26,8 +26,8 @@ import java.io.IOException; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -50,7 +50,7 @@ */ public class TestNameEditsConfigs { - private static final Log LOG = LogFactory.getLog(FSEditLog.class); + private static final Logger LOG = LoggerFactory.getLogger(FSEditLog.class); static final long SEED = 0xDEADBEEFL; static final int BLOCK_SIZE = 4096; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetadataConsistency.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetadataConsistency.java index fb9705d9533..ff49ddfaaf5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetadataConsistency.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetadataConsistency.java @@ -177,4 +177,4 @@ public Boolean get() { } }, SCAN_WAIT * 1000, 60000); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetricsLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetricsLogger.java index 9a0e67c3790..25642faffbf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetricsLogger.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetricsLogger.java @@ -20,8 +20,9 @@ import com.google.common.base.Supplier; import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.metrics2.util.MBeans; @@ -48,7 +49,8 @@ * Test periodic logging of NameNode metrics. */ public class TestNameNodeMetricsLogger { - static final Log LOG = LogFactory.getLog(TestNameNodeMetricsLogger.class); + static final Logger LOG = + LoggerFactory.getLogger(TestNameNodeMetricsLogger.class); @Rule public Timeout timeout = new Timeout(300000); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeReconfigure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeReconfigure.java index c0de63a8f5d..3265bed80c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeReconfigure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeReconfigure.java @@ -26,13 +26,20 @@ import static org.junit.Assert.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.ReconfigurationException; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfyManager; +import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.test.GenericTestUtils; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_DEFAULT; @@ -40,13 +47,15 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_BACKOFF_ENABLE_DEFAULT; public class TestNameNodeReconfigure { - public static final Log LOG = LogFactory - .getLog(TestNameNodeReconfigure.class); + public static final Logger LOG = LoggerFactory + .getLogger(TestNameNodeReconfigure.class); private MiniDFSCluster cluster; private final int customizedBlockInvalidateLimit = 500; @@ -216,6 +225,127 @@ public void testReconfigureHearbeatCheck() throws ReconfigurationException { datanodeManager.getHeartbeatRecheckInterval()); } + /** + * Tests enable/disable Storage Policy Satisfier dynamically when + * "dfs.storage.policy.enabled" feature is disabled. + * + * @throws ReconfigurationException + * @throws IOException + */ + @Test(timeout = 30000) + public void testReconfigureSPSWithStoragePolicyDisabled() + throws ReconfigurationException, IOException { + // shutdown cluster + cluster.shutdown(); + Configuration conf = new HdfsConfiguration(); + conf.setBoolean(DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY, false); + cluster = new MiniDFSCluster.Builder(conf).build(); + cluster.waitActive(); + + final NameNode nameNode = cluster.getNameNode(); + verifySPSEnabled(nameNode, DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE, false); + + // enable SPS internally by keeping DFS_STORAGE_POLICY_ENABLED_KEY + nameNode.reconfigureProperty(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + + // Since DFS_STORAGE_POLICY_ENABLED_KEY is disabled, SPS can't be enabled. + assertNull("SPS shouldn't start as " + + DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY + " is disabled", + nameNode.getNamesystem().getBlockManager().getSPSManager()); + verifySPSEnabled(nameNode, DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL, false); + + assertEquals(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY + " has wrong value", + StoragePolicySatisfierMode.EXTERNAL.toString(), nameNode.getConf() + .get(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + DFS_STORAGE_POLICY_SATISFIER_MODE_DEFAULT)); + } + + /** + * Tests enable/disable Storage Policy Satisfier dynamically. + */ + @Test(timeout = 30000) + public void testReconfigureStoragePolicySatisfierEnabled() + throws ReconfigurationException { + final NameNode nameNode = cluster.getNameNode(); + + verifySPSEnabled(nameNode, DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE, false); + // try invalid values + try { + nameNode.reconfigureProperty(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + "text"); + fail("ReconfigurationException expected"); + } catch (ReconfigurationException e) { + GenericTestUtils.assertExceptionContains( + "For enabling or disabling storage policy satisfier, must " + + "pass either internal/external/none string value only", + e.getCause()); + } + + // disable SPS + nameNode.reconfigureProperty(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); + verifySPSEnabled(nameNode, DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE, false); + + // enable external SPS + nameNode.reconfigureProperty(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + assertEquals(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY + " has wrong value", + false, nameNode.getNamesystem().getBlockManager().getSPSManager() + .isSatisfierRunning()); + assertEquals(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY + " has wrong value", + StoragePolicySatisfierMode.EXTERNAL.toString(), + nameNode.getConf().get(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + DFS_STORAGE_POLICY_SATISFIER_MODE_DEFAULT)); + } + + /** + * Test to satisfy storage policy after disabled storage policy satisfier. + */ + @Test(timeout = 30000) + public void testSatisfyStoragePolicyAfterSatisfierDisabled() + throws ReconfigurationException, IOException { + final NameNode nameNode = cluster.getNameNode(); + + // disable SPS + nameNode.reconfigureProperty(DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); + verifySPSEnabled(nameNode, DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE, false); + + Path filePath = new Path("/testSPS"); + DistributedFileSystem fileSystem = cluster.getFileSystem(); + fileSystem.create(filePath); + fileSystem.setStoragePolicy(filePath, "COLD"); + try { + fileSystem.satisfyStoragePolicy(filePath); + fail("Expected to fail, as storage policy feature has disabled."); + } catch (RemoteException e) { + GenericTestUtils + .assertExceptionContains("Cannot request to satisfy storage policy " + + "when storage policy satisfier feature has been disabled" + + " by admin. Seek for an admin help to enable it " + + "or use Mover tool.", e); + } + } + + void verifySPSEnabled(final NameNode nameNode, String property, + StoragePolicySatisfierMode expected, boolean isSatisfierRunning) { + StoragePolicySatisfyManager spsMgr = nameNode + .getNamesystem().getBlockManager().getSPSManager(); + boolean isSPSRunning = spsMgr != null ? spsMgr.isSatisfierRunning() + : false; + assertEquals(property + " has wrong value", isSPSRunning, isSPSRunning); + String actual = nameNode.getConf().get(property, + DFS_STORAGE_POLICY_SATISFIER_MODE_DEFAULT); + assertEquals(property + " has wrong value", expected, + StoragePolicySatisfierMode.fromString(actual)); + } + @Test public void testBlockInvalidateLimitAfterReconfigured() throws ReconfigurationException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java index 877f43cde3e..26701a59d8a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java @@ -33,8 +33,8 @@ import java.util.Set; import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -85,7 +85,8 @@ private static Configuration getConf() { return conf; } - private static final Log LOG = LogFactory.getLog(TestNameNodeRecovery.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestNameNodeRecovery.class); private static final StartupOption recoverStartOpt = StartupOption.RECOVER; private static final File TEST_DIR = PathUtils.getTestDir(TestNameNodeRecovery.class); @@ -164,7 +165,7 @@ static void runEditLogTest(EditLogTestSetup elts) throws IOException { // We should have read every valid transaction. assertTrue(validTxIds.isEmpty()); } finally { - IOUtils.cleanup(LOG, elfos, elfis); + IOUtils.cleanupWithLogger(LOG, elfos, elfis); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRespectsBindHostKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRespectsBindHostKeys.java index 80856469c22..594b07b583e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRespectsBindHostKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRespectsBindHostKeys.java @@ -33,8 +33,8 @@ import java.io.File; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import static org.apache.hadoop.hdfs.DFSConfigKeys.*; import static org.junit.Assert.assertTrue; @@ -52,7 +52,8 @@ * - DFS_NAMENODE_HTTPS_BIND_HOST_KEY */ public class TestNameNodeRespectsBindHostKeys { - public static final Log LOG = LogFactory.getLog(TestNameNodeRespectsBindHostKeys.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestNameNodeRespectsBindHostKeys.class); private static final String WILDCARD_ADDRESS = "0.0.0.0"; private static final String LOCALHOST_SERVER_ADDRESS = "127.0.0.1:0"; private static String keystoresDir; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeStatusMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeStatusMXBean.java index f9bfc373749..1b0a671ff0c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeStatusMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeStatusMXBean.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.server.namenode; import com.google.common.base.Supplier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.server.datanode.DataNode; @@ -40,7 +40,7 @@ */ public class TestNameNodeStatusMXBean { - public static final Log LOG = LogFactory.getLog( + public static final Logger LOG = LoggerFactory.getLogger( TestNameNodeStatusMXBean.class); @Test(timeout = 120000L) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java index b7f0cfc1f63..4343b0acd03 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeCapacityReport.java @@ -25,8 +25,8 @@ import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -52,7 +52,8 @@ * This tests InterDataNodeProtocol for block handling. */ public class TestNamenodeCapacityReport { - private static final Log LOG = LogFactory.getLog(TestNamenodeCapacityReport.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestNamenodeCapacityReport.class); /** * The following test first creates a file. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java index 0995f135d97..a1946c23f84 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java @@ -463,7 +463,7 @@ public void testRetryCacheRebuild() throws Exception { assertTrue(namesystem.hasRetryCache()); cacheSet = (LightWeightCache) namesystem .getRetryCache().getCacheSet(); - assertEquals("Retry cache size is wrong", 39, cacheSet.size()); + assertEquals("Retry cache size is wrong", 38, cacheSet.size()); iter = cacheSet.iterator(); while (iter.hasNext()) { CacheEntry entry = iter.next(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestPersistentStoragePolicySatisfier.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestPersistentStoragePolicySatisfier.java new file mode 100644 index 00000000000..1ac9257a572 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestPersistentStoragePolicySatisfier.java @@ -0,0 +1,531 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.fs.XAttr; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier; +import org.apache.hadoop.hdfs.server.sps.ExternalSPSContext; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Test; + +import com.google.common.base.Supplier; + +import java.io.IOException; +import java.util.List; + +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY; +import static org.junit.Assert.*; + +/** + * Test persistence of satisfying files/directories. + */ +public class TestPersistentStoragePolicySatisfier { + private static Configuration conf; + + private static MiniDFSCluster cluster; + private static DistributedFileSystem fs; + private NameNodeConnector nnc; + private StoragePolicySatisfier sps; + private ExternalSPSContext ctxt; + + private static Path testFile = + new Path("/testFile"); + private static String testFileName = testFile.toString(); + + private static Path parentDir = new Path("/parentDir"); + private static Path parentFile = new Path(parentDir, "parentFile"); + private static String parentFileName = parentFile.toString(); + private static Path childDir = new Path(parentDir, "childDir"); + private static Path childFile = new Path(childDir, "childFile"); + private static String childFileName = childFile.toString(); + + private static final String COLD = "COLD"; + private static final String WARM = "WARM"; + private static final String ONE_SSD = "ONE_SSD"; + + private static StorageType[][] storageTypes = new StorageType[][] { + {StorageType.DISK, StorageType.ARCHIVE, StorageType.SSD}, + {StorageType.DISK, StorageType.ARCHIVE, StorageType.SSD}, + {StorageType.DISK, StorageType.ARCHIVE, StorageType.SSD} + }; + + private final int timeout = 90000; + + /** + * Setup environment for every test case. + * @throws IOException + */ + public void clusterSetUp() throws Exception { + clusterSetUp(false, new HdfsConfiguration()); + } + + /** + * Setup environment for every test case. + * @param hdfsConf hdfs conf. + * @throws Exception + */ + public void clusterSetUp(Configuration hdfsConf) throws Exception { + clusterSetUp(false, hdfsConf); + } + + /** + * Setup cluster environment. + * @param isHAEnabled if true, enable simple HA. + * @throws IOException + */ + private void clusterSetUp(boolean isHAEnabled, Configuration newConf) + throws Exception { + conf = newConf; + conf.set( + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_KEY, + "3000"); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + // Reduced refresh cycle to update latest datanodes. + conf.setLong(DFSConfigKeys.DFS_SPS_DATANODE_CACHE_REFRESH_INTERVAL_MS, + 1000); + conf.setInt( + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MAX_RETRY_ATTEMPTS_KEY, 20); + final int dnNumber = storageTypes.length; + final short replication = 3; + MiniDFSCluster.Builder clusterBuilder = new MiniDFSCluster.Builder(conf) + .storageTypes(storageTypes).storagesPerDatanode(3) + .numDataNodes(dnNumber); + if (isHAEnabled) { + clusterBuilder.nnTopology(MiniDFSNNTopology.simpleHATopology()); + } + cluster = clusterBuilder.build(); + cluster.waitActive(); + if (isHAEnabled) { + cluster.transitionToActive(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + } else { + fs = cluster.getFileSystem(); + } + nnc = DFSTestUtil.getNameNodeConnector(conf, + HdfsServerConstants.MOVER_ID_PATH, 1, false); + + sps = new StoragePolicySatisfier(conf); + ctxt = new ExternalSPSContext(sps, nnc); + + sps.init(ctxt); + sps.start(StoragePolicySatisfierMode.EXTERNAL); + + createTestFiles(fs, replication); + } + + /** + * Setup test files for testing. + * @param dfs + * @param replication + * @throws Exception + */ + private void createTestFiles(DistributedFileSystem dfs, + short replication) throws Exception { + DFSTestUtil.createFile(dfs, testFile, 1024L, replication, 0L); + DFSTestUtil.createFile(dfs, parentFile, 1024L, replication, 0L); + DFSTestUtil.createFile(dfs, childFile, 1024L, replication, 0L); + + DFSTestUtil.waitReplication(dfs, testFile, replication); + DFSTestUtil.waitReplication(dfs, parentFile, replication); + DFSTestUtil.waitReplication(dfs, childFile, replication); + } + + /** + * Tear down environment for every test case. + * @throws IOException + */ + private void clusterShutdown() throws IOException{ + if(fs != null) { + fs.close(); + fs = null; + } + if(cluster != null) { + cluster.shutdown(true); + cluster = null; + } + if (sps != null) { + sps.stopGracefully(); + } + } + + /** + * While satisfying file/directory, trigger the cluster's checkpoint to + * make sure satisfier persistence work as expected. This test case runs + * as below: + * 1. use satisfyStoragePolicy and add xAttr to the file. + * 2. do the checkpoint by secondary NameNode. + * 3. restart the cluster immediately. + * 4. make sure all the storage policies are satisfied. + * @throws Exception + */ + @Test(timeout = 300000) + public void testWithCheckpoint() throws Exception { + SecondaryNameNode secondary = null; + try { + clusterSetUp(); + fs.setStoragePolicy(testFile, WARM); + fs.satisfyStoragePolicy(testFile); + + // Start the checkpoint. + conf.set( + DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0"); + secondary = new SecondaryNameNode(conf); + secondary.doCheckpoint(); + restartCluster(); + + DFSTestUtil.waitExpectedStorageType( + testFileName, StorageType.DISK, 1, timeout, fs); + DFSTestUtil.waitExpectedStorageType( + testFileName, StorageType.ARCHIVE, 2, timeout, fs); + + fs.setStoragePolicy(parentDir, COLD); + fs.satisfyStoragePolicy(parentDir); + + DFSTestUtil.waitExpectedStorageType( + parentFileName, StorageType.ARCHIVE, 3, timeout, fs); + DFSTestUtil.waitExpectedStorageType( + childFileName, StorageType.ARCHIVE, 3, timeout, fs); + + } finally { + if (secondary != null) { + secondary.shutdown(); + } + clusterShutdown(); + } + } + + /** + * Tests to verify satisfier persistence working well with multiple + * restarts operations. This test case runs as below: + * 1. satisfy the storage policy of file1. + * 2. restart the cluster. + * 3. check whether all the blocks are satisfied. + * 4. satisfy the storage policy of file2. + * 5. restart the cluster. + * 6. check whether all the blocks are satisfied. + * @throws Exception + */ + @Test(timeout = 300000) + public void testWithRestarts() throws Exception { + try { + clusterSetUp(); + fs.setStoragePolicy(testFile, ONE_SSD); + fs.satisfyStoragePolicy(testFile); + restartCluster(); + DFSTestUtil.waitExpectedStorageType( + testFileName, StorageType.SSD, 1, timeout, fs); + DFSTestUtil.waitExpectedStorageType( + testFileName, StorageType.DISK, 2, timeout, fs); + + // test directory + fs.setStoragePolicy(parentDir, COLD); + fs.satisfyStoragePolicy(parentDir); + restartCluster(); + DFSTestUtil.waitExpectedStorageType( + parentFileName, StorageType.ARCHIVE, 3, timeout, fs); + DFSTestUtil.waitExpectedStorageType( + childFileName, StorageType.ARCHIVE, 3, timeout, fs); + } finally { + clusterShutdown(); + } + } + + /** + * Tests to verify SPS xattr will be removed if the satisfy work has + * been finished, expect that the method satisfyStoragePolicy can be + * invoked on the same file again after the block movement has been + * finished: + * 1. satisfy storage policy of file1. + * 2. wait until storage policy is satisfied. + * 3. satisfy storage policy of file1 again + * 4. make sure step 3 works as expected. + * @throws Exception + */ + @Test(timeout = 300000) + public void testMultipleSatisfyStoragePolicy() throws Exception { + try { + // Lower block movement check for testing. + conf = new HdfsConfiguration(); + final long minCheckTimeout = 500; // minimum value + conf.setLong( + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_KEY, + minCheckTimeout); + clusterSetUp(conf); + fs.setStoragePolicy(testFile, ONE_SSD); + fs.satisfyStoragePolicy(testFile); + DFSTestUtil.waitExpectedStorageType( + testFileName, StorageType.SSD, 1, timeout, fs); + DFSTestUtil.waitExpectedStorageType( + testFileName, StorageType.DISK, 2, timeout, fs); + + // Make sure satisfy xattr has been removed. + DFSTestUtil.waitForXattrRemoved(testFileName, + XATTR_SATISFY_STORAGE_POLICY, cluster.getNamesystem(), 30000); + + fs.setStoragePolicy(testFile, COLD); + fs.satisfyStoragePolicy(testFile); + DFSTestUtil.waitExpectedStorageType( + testFileName, StorageType.ARCHIVE, 3, timeout, fs); + } finally { + clusterShutdown(); + } + } + + /** + * Tests to verify SPS xattr is removed after SPS is dropped, + * expect that if the SPS is disabled/dropped, the SPS + * xattr should be removed accordingly: + * 1. satisfy storage policy of file1. + * 2. drop SPS thread in block manager. + * 3. make sure sps xattr is removed. + * @throws Exception + */ + @Test(timeout = 300000000) + public void testDropSPS() throws Exception { + try { + clusterSetUp(); + fs.setStoragePolicy(testFile, ONE_SSD); + fs.satisfyStoragePolicy(testFile); + + cluster.getNamesystem().getBlockManager().getSPSManager() + .changeModeEvent(StoragePolicySatisfierMode.NONE); + + // Make sure satisfy xattr has been removed. + DFSTestUtil.waitForXattrRemoved(testFileName, + XATTR_SATISFY_STORAGE_POLICY, cluster.getNamesystem(), 30000); + + } finally { + clusterShutdown(); + } + } + + /** + * Tests that Xattrs should be cleaned if all blocks already satisfied. + * + * @throws Exception + */ + @Test(timeout = 300000) + public void testSPSShouldNotLeakXattrIfStorageAlreadySatisfied() + throws Exception { + try { + clusterSetUp(); + DFSTestUtil.waitExpectedStorageType(testFileName, StorageType.DISK, 3, + timeout, fs); + fs.satisfyStoragePolicy(testFile); + + DFSTestUtil.waitExpectedStorageType(testFileName, StorageType.DISK, 3, + timeout, fs); + + // Make sure satisfy xattr has been removed. + DFSTestUtil.waitForXattrRemoved(testFileName, + XATTR_SATISFY_STORAGE_POLICY, cluster.getNamesystem(), 30000); + + } finally { + clusterShutdown(); + } + } + + /** + * Test loading of SPS xAttrs from the edits log when satisfyStoragePolicy + * called on child file and parent directory. + * 1. Create one directory and create one child file. + * 2. Set storage policy for child file and call + * satisfyStoragePolicy. + * 3. wait for SPS to remove xAttr for file child file. + * 4. Set storage policy for parent directory and call + * satisfyStoragePolicy. + * 5. restart the namenode. + * NameNode should be started successfully. + */ + @Test(timeout = 300000) + public void testNameNodeRestartWhenSPSCalledOnChildFileAndParentDir() + throws Exception { + try { + clusterSetUp(); + fs.setStoragePolicy(childFile, "COLD"); + fs.satisfyStoragePolicy(childFile); + DFSTestUtil.waitExpectedStorageType(childFile.toUri().getPath(), + StorageType.ARCHIVE, 3, 30000, cluster.getFileSystem()); + // wait for SPS to remove Xattr from file + Thread.sleep(30000); + fs.setStoragePolicy(childDir, "COLD"); + fs.satisfyStoragePolicy(childDir); + try { + cluster.restartNameNodes(); + } catch (Exception e) { + assertFalse(e.getMessage().contains( + "Cannot request to call satisfy storage policy")); + } + } finally { + clusterShutdown(); + } + } + + /** + * Test SPS when satisfyStoragePolicy called on child file and + * parent directory. + * 1. Create one parent directory and child directory. + * 2. Create some file in both the directory. + * 3. Set storage policy for parent directory and call + * satisfyStoragePolicy. + * 4. Set storage policy for child directory and call + * satisfyStoragePolicy. + * 5. restart the namenode. + * All the file blocks should satisfy the policy. + */ + @Test(timeout = 300000) + public void testSPSOnChildAndParentDirectory() throws Exception { + try { + clusterSetUp(); + fs.setStoragePolicy(parentDir, "COLD"); + fs.satisfyStoragePolicy(childDir); + DFSTestUtil.waitExpectedStorageType(childFileName, StorageType.ARCHIVE, + 3, 30000, cluster.getFileSystem()); + fs.satisfyStoragePolicy(parentDir); + DFSTestUtil.waitExpectedStorageType(parentFileName, StorageType.ARCHIVE, + 3, 30000, cluster.getFileSystem()); + } finally { + clusterShutdown(); + } + } + + /** + * Test SPS xAttr on directory. xAttr should be removed from the directory + * once all the files blocks moved to specific storage. + */ + @Test(timeout = 300000) + public void testSPSxAttrWhenSpsCalledForDir() throws Exception { + try { + clusterSetUp(); + Path parent = new Path("/parent"); + // create parent dir + fs.mkdirs(parent); + + // create 10 child files + for (int i = 0; i < 5; i++) { + DFSTestUtil.createFile(fs, new Path(parent, "f" + i), 1024, (short) 3, + 0); + } + + // Set storage policy for parent directory + fs.setStoragePolicy(parent, "COLD"); + + // Stop one DN so we can check the SPS xAttr for directory. + DataNodeProperties stopDataNode = cluster.stopDataNode(0); + + fs.satisfyStoragePolicy(parent); + + // Check xAttr for parent directory + FSNamesystem namesystem = cluster.getNamesystem(); + INode inode = namesystem.getFSDirectory().getINode("/parent"); + XAttrFeature f = inode.getXAttrFeature(); + assertTrue("SPS xAttr should be exist", + f.getXAttr(XATTR_SATISFY_STORAGE_POLICY) != null); + + // check for the child, SPS xAttr should not be there + for (int i = 0; i < 5; i++) { + inode = namesystem.getFSDirectory().getINode("/parent/f" + i); + f = inode.getXAttrFeature(); + assertTrue(f == null); + } + + cluster.restartDataNode(stopDataNode, false); + + // wait and check all the file block moved in ARCHIVE + for (int i = 0; i < 5; i++) { + DFSTestUtil.waitExpectedStorageType("/parent/f" + i, + StorageType.ARCHIVE, 3, 30000, cluster.getFileSystem()); + } + DFSTestUtil.waitForXattrRemoved("/parent", XATTR_SATISFY_STORAGE_POLICY, + namesystem, 10000); + } finally { + clusterShutdown(); + } + + } + + /** + * Test SPS xAttr on file. xAttr should be removed from the file + * once all the blocks moved to specific storage. + */ + @Test(timeout = 300000) + public void testSPSxAttrWhenSpsCalledForFile() throws Exception { + try { + clusterSetUp(); + Path file = new Path("/file"); + DFSTestUtil.createFile(fs, file, 1024, (short) 3, 0); + + // Set storage policy for file + fs.setStoragePolicy(file, "COLD"); + + // Stop one DN so we can check the SPS xAttr for file. + DataNodeProperties stopDataNode = cluster.stopDataNode(0); + + fs.satisfyStoragePolicy(file); + + // Check xAttr for parent directory + FSNamesystem namesystem = cluster.getNamesystem(); + INode inode = namesystem.getFSDirectory().getINode("/file"); + XAttrFeature f = inode.getXAttrFeature(); + assertTrue("SPS xAttr should be exist", + f.getXAttr(XATTR_SATISFY_STORAGE_POLICY) != null); + + cluster.restartDataNode(stopDataNode, false); + + // wait and check all the file block moved in ARCHIVE + DFSTestUtil.waitExpectedStorageType("/file", StorageType.ARCHIVE, 3, + 30000, cluster.getFileSystem()); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + List existingXAttrs = XAttrStorage.readINodeXAttrs(inode); + return !existingXAttrs.contains(XATTR_SATISFY_STORAGE_POLICY); + } + }, 100, 10000); + } finally { + clusterShutdown(); + } + } + + /** + * Restart the hole env and trigger the DataNode's heart beats. + * @throws Exception + */ + private void restartCluster() throws Exception { + cluster.restartDataNodes(); + cluster.restartNameNodes(); + cluster.waitActive(); + cluster.triggerHeartbeats(); + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestQuotaByStorageType.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestQuotaByStorageType.java index c0757e65201..a53b319eabd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestQuotaByStorageType.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestQuotaByStorageType.java @@ -21,26 +21,26 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; - import org.apache.commons.logging.Log; - import org.apache.commons.logging.LogFactory; - import org.apache.hadoop.conf.Configuration; - import org.apache.hadoop.fs.ContentSummary; - import org.apache.hadoop.fs.Path; - import org.apache.hadoop.fs.StorageType; - import org.apache.hadoop.hdfs.DFSConfigKeys; - import org.apache.hadoop.hdfs.DFSTestUtil; - import org.apache.hadoop.hdfs.DistributedFileSystem; - import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.QuotaByStorageTypeExceededException; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; - import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.junit.After; - import org.junit.Before; - import org.junit.Test; +import org.junit.Before; +import org.junit.Test; - import java.io.IOException; +import java.io.IOException; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.allOf; import static org.hamcrest.CoreMatchers.is; @@ -58,7 +58,8 @@ private DistributedFileSystem dfs; private FSNamesystem fsn; - protected static final Log LOG = LogFactory.getLog(TestQuotaByStorageType.class); + protected static final Logger LOG = + LoggerFactory.getLogger(TestQuotaByStorageType.class); @Before public void setUp() throws Exception { @@ -944,4 +945,4 @@ public void testStorageSpaceQuotaPerQuotaClear() throws IOException { testDirNode.asDirectory().getDirectoryWithQuotaFeature().getQuota() .getTypeSpace(StorageType.SSD)); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java index 5409f0d483a..5d34d3cc75e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java @@ -68,6 +68,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -207,8 +208,7 @@ public void testReencryptionBasic() throws Exception { ZoneReencryptionStatus zs = it.next(); assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertNotEquals(fei0.getEzKeyVersionName(), zs.getEzKeyVersionName()); assertEquals(fei1.getEzKeyVersionName(), zs.getEzKeyVersionName()); assertEquals(10, zs.getFilesReencrypted()); @@ -600,14 +600,27 @@ private void verifyZoneStatus(final Path zone, final FileEncryptionInfo fei, final ZoneReencryptionStatus zs = it.next(); assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); if (fei != null) { assertNotEquals(fei.getEzKeyVersionName(), zs.getEzKeyVersionName()); } assertEquals(expectedFiles, zs.getFilesReencrypted()); } + /** + * Verify the zone status' completion time is larger than 0, and is no less + * than submission time. + */ + private void verifyZoneCompletionTime(final ZoneReencryptionStatus zs) { + assertNotNull(zs); + assertTrue("Completion time should be positive. " + zs.getCompletionTime(), + zs.getCompletionTime() > 0); + assertTrue("Completion time " + zs.getCompletionTime() + + " should be no less than submission time " + + zs.getSubmissionTime(), + zs.getCompletionTime() >= zs.getSubmissionTime()); + } + @Test public void testReencryptLoadedFromFsimage() throws Exception { /* @@ -1476,7 +1489,7 @@ private void cancelFutureDuringReencryption(final Path zone) } @Override - public void reencryptEncryptedKeys() throws IOException { + public synchronized void reencryptEncryptedKeys() throws IOException { if (exceptionCount > 0) { exceptionCount--; try { @@ -1537,8 +1550,7 @@ public Boolean get() { assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); assertTrue(zs.isCanceled()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertEquals(0, zs.getFilesReencrypted()); assertTrue(getUpdater().isRunning()); @@ -1560,8 +1572,7 @@ public void testCancelFutureThenReencrypt() throws Exception { assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); assertFalse(zs.isCanceled()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertEquals(10, zs.getFilesReencrypted()); } @@ -1579,8 +1590,7 @@ public void testCancelFutureThenRestart() throws Exception { assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); assertTrue(zs.isCanceled()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertEquals(0, zs.getFilesReencrypted()); // verify re-encryption works after restart. @@ -1592,8 +1602,7 @@ public void testCancelFutureThenRestart() throws Exception { assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); assertFalse(zs.isCanceled()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertEquals(10, zs.getFilesReencrypted()); } @@ -1679,8 +1688,7 @@ public void testReencryptionWithoutProvider() throws Exception { ZoneReencryptionStatus zs = it.next(); assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertEquals(10, zs.getFilesReencrypted()); } @@ -1736,7 +1744,7 @@ public void testReencryptionKMSDown() throws Exception { } @Override - public void reencryptEncryptedKeys() throws IOException { + public synchronized void reencryptEncryptedKeys() throws IOException { if (exceptionCount > 0) { --exceptionCount; throw new IOException("Injected KMS failure"); @@ -1772,8 +1780,7 @@ public void reencryptEncryptedKeys() throws IOException { ZoneReencryptionStatus zs = it.next(); assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertEquals(5, zs.getFilesReencrypted()); assertEquals(5, zs.getNumReencryptionFailures()); } @@ -1788,7 +1795,8 @@ public void testReencryptionUpdaterFaultOneTask() throws Exception { } @Override - public void reencryptUpdaterProcessOneTask() throws IOException { + public synchronized void reencryptUpdaterProcessOneTask() + throws IOException { if (exceptionCount > 0) { --exceptionCount; throw new IOException("Injected process task failure"); @@ -1824,8 +1832,7 @@ public void reencryptUpdaterProcessOneTask() throws IOException { ZoneReencryptionStatus zs = it.next(); assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertEquals(5, zs.getFilesReencrypted()); assertEquals(1, zs.getNumReencryptionFailures()); } @@ -1841,7 +1848,8 @@ public void testReencryptionUpdaterFaultCkpt() throws Exception { } @Override - public void reencryptUpdaterProcessCheckpoint() throws IOException { + public synchronized void reencryptUpdaterProcessCheckpoint() + throws IOException { if (exceptionCount > 0) { --exceptionCount; throw new IOException("Injected process checkpoint failure"); @@ -1877,8 +1885,7 @@ public void reencryptUpdaterProcessCheckpoint() throws IOException { ZoneReencryptionStatus zs = it.next(); assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertEquals(10, zs.getFilesReencrypted()); assertEquals(1, zs.getNumReencryptionFailures()); } @@ -1893,7 +1900,8 @@ public void testReencryptionUpdaterFaultRecover() throws Exception { } @Override - public void reencryptUpdaterProcessOneTask() throws IOException { + public synchronized void reencryptUpdaterProcessOneTask() + throws IOException { if (exceptionCount > 0) { --exceptionCount; throw new RetriableException("Injected process task failure"); @@ -1930,8 +1938,7 @@ public void reencryptUpdaterProcessOneTask() throws IOException { ZoneReencryptionStatus zs = it.next(); assertEquals(zone.toString(), zs.getZoneName()); assertEquals(ZoneReencryptionStatus.State.Completed, zs.getState()); - assertTrue(zs.getCompletionTime() > 0); - assertTrue(zs.getCompletionTime() > zs.getSubmissionTime()); + verifyZoneCompletionTime(zs); assertEquals(10, zs.getFilesReencrypted()); assertEquals(0, zs.getNumReencryptionFailures()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java index 74778247bc1..8fa870186c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java @@ -35,8 +35,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -59,7 +59,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.apache.hadoop.test.Whitebox; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; @@ -79,10 +79,10 @@ */ public class TestSaveNamespace { static { - GenericTestUtils.setLogLevel(FSImage.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSImage.LOG, Level.TRACE); } - private static final Log LOG = LogFactory.getLog(TestSaveNamespace.class); + private static final Logger LOG = LoggerFactory.getLogger(TestSaveNamespace.class); private static class FaultySaveImage implements Answer { private int count = 0; @@ -325,7 +325,7 @@ public void testReinsertnamedirsInSavenamespace() throws Exception { try { fsn.close(); } catch (Throwable t) { - LOG.fatal("Failed to shut down", t); + LOG.error("Failed to shut down", t); } } } @@ -625,7 +625,7 @@ public void testSaveNamespaceWithRenamedLease() throws Exception { cluster.getNameNodeRpc().saveNamespace(0, 0); fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); } finally { - IOUtils.cleanup(LOG, out, fs); + IOUtils.cleanupWithLogger(LOG, out, fs); cluster.shutdown(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecureNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecureNameNode.java index 6b6ce53d82a..c90a91c67d4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecureNameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecureNameNode.java @@ -22,8 +22,10 @@ import static org.junit.Assert.fail; import java.io.IOException; +import java.lang.management.ManagementFactory; import java.security.PrivilegedExceptionAction; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; @@ -33,10 +35,12 @@ import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.SaslDataTransferTestCase; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; +import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; - +import javax.management.MBeanServer; +import javax.management.ObjectName; public class TestSecureNameNode extends SaslDataTransferTestCase { final static private int NUM_OF_DATANODES = 0; @@ -117,4 +121,50 @@ public void testKerberosHdfsBlockTokenInconsistencyNNStartup() throws Exception return; } + /** + * Test NameNodeStatusMXBean with security enabled and disabled. + * + * @throws Exception + */ + @Test + public void testNameNodeStatusMXBeanSecurityEnabled() throws Exception { + Configuration simpleConf = new Configuration(); + Configuration secureConf = createSecureConfig("authentication"); + + // disabling security + UserGroupInformation.setConfiguration(simpleConf); + + // get attribute "SecurityEnabled" with simple configuration + try (MiniDFSCluster cluster = + new MiniDFSCluster.Builder(simpleConf).build()) { + cluster.waitActive(); + NameNode namenode = cluster.getNameNode(); + + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName mxbeanName = new ObjectName( + "Hadoop:service=NameNode,name=NameNodeStatus"); + + boolean securityEnabled = (boolean) mbs.getAttribute(mxbeanName, + "SecurityEnabled"); + Assert.assertFalse(securityEnabled); + Assert.assertEquals(namenode.isSecurityEnabled(), securityEnabled); + } + + // get attribute "SecurityEnabled" with secure configuration + try (MiniDFSCluster cluster = + new MiniDFSCluster.Builder(secureConf).build()) { + cluster.waitActive(); + NameNode namenode = cluster.getNameNode(); + + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName mxbeanName = new ObjectName( + "Hadoop:service=NameNode,name=NameNodeStatus"); + + boolean securityEnabled = (boolean) mbs.getAttribute(mxbeanName, + "SecurityEnabled"); + Assert.assertTrue(securityEnabled); + Assert.assertEquals(namenode.isSecurityEnabled(), securityEnabled); + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java index 24016087dab..3e5fe756404 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java @@ -38,8 +38,7 @@ import java.util.Iterator; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -88,8 +87,8 @@ public class TestStartup { public static final String NAME_NODE_HOST = "localhost:"; public static final String WILDCARD_HTTP_HOST = "0.0.0.0:"; - private static final Log LOG = - LogFactory.getLog(TestStartup.class.getName()); + private static final org.slf4j.Logger LOG = + LoggerFactory.getLogger(TestStartup.class.getName()); private Configuration config; private File hdfsDir=null; static final long seed = 0xAAAAEEFL; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStoragePolicySatisfierWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStoragePolicySatisfierWithHA.java new file mode 100644 index 00000000000..cf04db0d401 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStoragePolicySatisfierWithHA.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.ReconfigurationException; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests that StoragePolicySatisfier is able to work with HA enabled. + */ +public class TestStoragePolicySatisfierWithHA { + private MiniDFSCluster cluster = null; + + private final Configuration config = new HdfsConfiguration(); + private static final int DEFAULT_BLOCK_SIZE = 1024; + private DistributedFileSystem dfs = null; + + private StorageType[][] allDiskTypes = + new StorageType[][]{{StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}}; + private int numOfDatanodes = 3; + private int storagesPerDatanode = 2; + private long capacity = 2 * 256 * 1024 * 1024; + private int nnIndex = 0; + + private void createCluster() throws IOException { + config.setLong("dfs.block.size", DEFAULT_BLOCK_SIZE); + config.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + // Reduced refresh cycle to update latest datanodes. + config.setLong(DFSConfigKeys.DFS_SPS_DATANODE_CACHE_REFRESH_INTERVAL_MS, + 1000); + startCluster(config, allDiskTypes, numOfDatanodes, storagesPerDatanode, + capacity); + dfs = cluster.getFileSystem(nnIndex); + } + + private void startCluster(final Configuration conf, + StorageType[][] storageTypes, int numberOfDatanodes, int storagesPerDn, + long nodeCapacity) throws IOException { + long[][] capacities = new long[numberOfDatanodes][storagesPerDn]; + for (int i = 0; i < numberOfDatanodes; i++) { + for (int j = 0; j < storagesPerDn; j++) { + capacities[i][j] = nodeCapacity; + } + } + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(numberOfDatanodes).storagesPerDatanode(storagesPerDn) + .storageTypes(storageTypes).storageCapacities(capacities).build(); + cluster.waitActive(); + cluster.transitionToActive(0); + } + + /** + * Tests to verify that SPS should run/stop automatically when NN state + * changes between Standby and Active. + */ + @Test(timeout = 90000) + public void testWhenNNHAStateChanges() throws IOException { + try { + createCluster(); + // NN transits from Active to Standby + cluster.transitionToStandby(0); + cluster.waitActive(); + try { + cluster.getNameNode(0).reconfigurePropertyImpl( + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.NONE.toString()); + Assert.fail("It's not allowed to enable or disable" + + " StoragePolicySatisfier on Standby NameNode"); + } catch (ReconfigurationException e) { + GenericTestUtils.assertExceptionContains("Could not change property " + + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY + + " from 'EXTERNAL' to 'NONE'", e); + GenericTestUtils.assertExceptionContains( + "Enabling or disabling storage policy satisfier service on " + + "standby NameNode is not allowed", e.getCause()); + } + } finally { + cluster.shutdown(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java index 6f4546db884..aa10291ef0f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java @@ -33,8 +33,8 @@ import java.util.Iterator; import java.util.Set; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.cli.CLITestCmdDFS; import org.apache.hadoop.cli.util.CLICommandDFSAdmin; import org.apache.hadoop.cli.util.CommandExecutor; @@ -60,8 +60,8 @@ public class TestStorageRestore { public static final String NAME_NODE_HOST = "localhost:"; public static final String NAME_NODE_HTTP_HOST = "0.0.0.0:"; - private static final Log LOG = - LogFactory.getLog(TestStorageRestore.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(TestStorageRestore.class.getName()); private Configuration config; private File hdfsDir=null; static final long seed = 0xAAAAEEFL; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStripedINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStripedINodeFile.java index 468e47fd18a..59afd8e030e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStripedINodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStripedINodeFile.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageType; @@ -63,7 +63,7 @@ * This class tests INodeFile with striped feature. */ public class TestStripedINodeFile { - public static final Log LOG = LogFactory.getLog(TestINodeFile.class); + public static final Logger LOG = LoggerFactory.getLogger(TestINodeFile.class); private static final PermissionStatus perm = new PermissionStatus( "userName", null, FsPermission.getDefault()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java index 169bbeef119..bbe29cf1348 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java @@ -32,8 +32,8 @@ import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.collect.Iterables; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -55,7 +55,7 @@ * Static utility functions useful for testing HA. */ public abstract class HATestUtil { - private static final Log LOG = LogFactory.getLog(HATestUtil.class); + private static final Logger LOG = LoggerFactory.getLogger(HATestUtil.class); private static final String LOGICAL_HOSTNAME = "ha-nn-uri-%d"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java index 48d505c7481..defa6e53e70 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java @@ -28,8 +28,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.base.Supplier; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -51,7 +51,8 @@ import com.google.common.collect.ImmutableList; public class TestBootstrapStandby { - private static final Log LOG = LogFactory.getLog(TestBootstrapStandby.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestBootstrapStandby.class); private static final int maxNNCount = 3; private static final int STARTING_PORT = 20000; @@ -197,14 +198,14 @@ public void testSharedEditsMissingLogs() throws Exception { // Trying to bootstrap standby should now fail since the edit // logs aren't available in the shared dir. LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs( - LogFactory.getLog(BootstrapStandby.class)); + LoggerFactory.getLogger(BootstrapStandby.class)); try { assertEquals(BootstrapStandby.ERR_CODE_LOGS_UNAVAILABLE, forceBootstrap(1)); } finally { logs.stopCapturing(); } - GenericTestUtils.assertMatches(logs.getOutput(), - "FATAL.*Unable to read transaction ids 1-3 from the configured shared"); + assertTrue(logs.getOutput().contains( + "Unable to read transaction ids 1-3 from the configured shared")); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java index 7b0a0e19256..c470cc6f1d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java @@ -28,8 +28,8 @@ import java.net.URISyntaxException; import java.util.Collection; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; @@ -60,7 +60,8 @@ */ public class TestDFSUpgradeWithHA { - private static final Log LOG = LogFactory.getLog(TestDFSUpgradeWithHA.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDFSUpgradeWithHA.class); private Configuration conf; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java index c1521bd5623..1166dae42cc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java @@ -29,8 +29,8 @@ import com.google.common.base.Supplier; import com.google.common.collect.Lists; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -69,7 +69,7 @@ public class TestDNFencing { - protected static final Log LOG = LogFactory.getLog(TestDNFencing.class); + protected static final Logger LOG = LoggerFactory.getLogger(TestDNFencing.class); private static final String TEST_FILE = "/testStandbyIsHot"; private static final Path TEST_FILE_PATH = new Path(TEST_FILE); private static final int SMALL_BLOCK = 1024; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java index b900c8b875f..7076ec674bf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.server.namenode.ha; import com.google.common.base.Joiner; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.AbstractFileSystem; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -65,8 +65,8 @@ **/ public class TestDelegationTokensWithHA { private static final Configuration conf = new Configuration(); - private static final Log LOG = - LogFactory.getLog(TestDelegationTokensWithHA.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDelegationTokensWithHA.class); private static MiniDFSCluster cluster; private static NameNode nn0; private static NameNode nn1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java index c88ac57c27f..68b3e2ba879 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java @@ -48,7 +48,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.net.ServerSocketUtil; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -60,7 +60,7 @@ @RunWith(Parameterized.class) public class TestEditLogTailer { static { - GenericTestUtils.setLogLevel(FSEditLog.LOG, org.slf4j.event.Level.DEBUG); + GenericTestUtils.setLogLevel(FSEditLog.LOG, Level.DEBUG); } @Parameters diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java index 4405c5b7b69..4387a3372bc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java @@ -27,8 +27,8 @@ import java.util.Collections; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.HAUtil; @@ -52,8 +52,8 @@ * and startup of the standby node. */ public class TestEditLogsDuringFailover { - private static final Log LOG = - LogFactory.getLog(TestEditLogsDuringFailover.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestEditLogsDuringFailover.class); private static final int NUM_DIRS_IN_LOG = 5; static { @@ -159,7 +159,7 @@ private void testFailoverFinalizesAndReadsInProgress( outs.write(new byte[] { 0x18, 0x00, 0x00, 0x00 } ); LOG.error("editLogFile = " + editLogFile); } finally { - IOUtils.cleanup(LOG, outs); + IOUtils.cleanupWithLogger(LOG, outs); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java index 284e2835155..87859938402 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java @@ -33,8 +33,8 @@ import java.util.LinkedList; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -66,8 +66,8 @@ @RunWith(Parameterized.class) public class TestFailureToReadEdits { - private static final Log LOG = - LogFactory.getLog(TestFailureToReadEdits.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestFailureToReadEdits.class); private static final String TEST_DIR1 = "/test1"; private static final String TEST_DIR2 = "/test2"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java index 8665e094699..cc8ead16874 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java @@ -32,13 +32,13 @@ import org.apache.hadoop.hdfs.tools.DFSck; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.ToolRunner; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Test; public class TestHAFsck { static { - GenericTestUtils.setLogLevel(DFSUtil.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSUtil.LOG, Level.TRACE); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java index 432f7df091b..8beba74c6fb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode.ha; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -44,7 +44,8 @@ */ public class TestHAMetrics { - private static final Log LOG = LogFactory.getLog(TestHAMetrics.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestHAMetrics.class); @Test(timeout = 300000) public void testHAMetrics() throws Exception { @@ -117,7 +118,7 @@ public void testHAMetrics() throws Exception { newMillisSinceLastLoadedEdits, millisSinceLastLoadedEdits > newMillisSinceLastLoadedEdits); } finally { - IOUtils.cleanup(LOG, fs); + IOUtils.cleanupWithLogger(LOG, fs); cluster.shutdown(); } } @@ -170,7 +171,7 @@ public void testHAInodeCount() throws Exception { nn0 = cluster.getNamesystem(0); assertEquals(5, nn0.getFilesTotal()); } finally { - IOUtils.cleanup(LOG, fs); + IOUtils.cleanupWithLogger(LOG, fs); cluster.shutdown(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index c81f0583668..f9445fa12ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -30,8 +30,8 @@ import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -63,7 +63,7 @@ import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.Whitebox; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -75,7 +75,8 @@ * Tests that exercise safemode in an HA cluster. */ public class TestHASafeMode { - private static final Log LOG = LogFactory.getLog(TestHASafeMode.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestHASafeMode.class); private static final int BLOCK_SIZE = 1024; private NameNode nn0; private NameNode nn1; @@ -83,8 +84,8 @@ private MiniDFSCluster cluster; static { - DFSTestUtil.setNameNodeLogLevel(Level.ALL); - GenericTestUtils.setLogLevel(FSImage.LOG, Level.ALL); + DFSTestUtil.setNameNodeLogLevel(org.apache.log4j.Level.TRACE); + GenericTestUtils.setLogLevel(FSImage.LOG, Level.TRACE); } @Before diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java index 6b84f1ee176..59c9695060a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hdfs.server.namenode.ha; import com.google.common.util.concurrent.Uninterruptibles; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -41,7 +41,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread; import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; @@ -62,7 +62,7 @@ * and failback between two namenodes. */ public class TestHAStateTransitions { - protected static final Log LOG = LogFactory.getLog( + protected static final Logger LOG = LoggerFactory.getLogger( TestStandbyIsHot.class); private static final Path TEST_DIR = new Path("/test"); private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo"); @@ -73,7 +73,7 @@ RequestSource.REQUEST_BY_USER_FORCED); static { - GenericTestUtils.setLogLevel(EditLogTailer.LOG, Level.ALL); + GenericTestUtils.setLogLevel(EditLogTailer.LOG, Level.TRACE); } /** @@ -420,7 +420,7 @@ private static void testFailoverAfterCrashDuringLogRoll(boolean writeHeader) createEmptyInProgressEditLog(cluster, nn0, writeHeader); cluster.transitionToActive(1); } finally { - IOUtils.cleanup(LOG, fs); + IOUtils.cleanupWithLogger(LOG, fs); cluster.shutdown(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java index 5015f9e1836..0705c197f24 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java @@ -27,8 +27,8 @@ import java.io.IOException; import java.net.URISyntaxException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -50,7 +50,8 @@ public class TestInitializeSharedEdits { - private static final Log LOG = LogFactory.getLog(TestInitializeSharedEdits.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestInitializeSharedEdits.class); private static final Path TEST_PATH = new Path("/test"); private Configuration conf; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java index f9e1fd0b476..e1f00f17a15 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java @@ -28,8 +28,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -70,12 +70,12 @@ */ public class TestPipelinesFailover { static { - GenericTestUtils.setLogLevel(LogFactory.getLog(RetryInvocationHandler - .class), Level.ALL); + GenericTestUtils.setLogLevel(LoggerFactory.getLogger(RetryInvocationHandler + .class), org.slf4j.event.Level.DEBUG); DFSTestUtil.setNameNodeLogLevel(Level.ALL); } - protected static final Log LOG = LogFactory.getLog( + protected static final Logger LOG = LoggerFactory.getLogger( TestPipelinesFailover.class); private static final Path TEST_PATH = new Path("/test-file"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java index 43aaa927037..bbcbaaa796e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java @@ -36,8 +36,8 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.CryptoProtocolVersion; import org.apache.hadoop.fs.CacheFlag; @@ -92,7 +92,8 @@ import org.junit.Test; public class TestRetryCacheWithHA { - private static final Log LOG = LogFactory.getLog(TestRetryCacheWithHA.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestRetryCacheWithHA.class); private static final int BlockSize = 1024; private static ErasureCodingPolicy defaultEcPolicy = @@ -194,7 +195,7 @@ public void testRetryCacheOnStandbyNN() throws Exception { FSNamesystem fsn1 = cluster.getNamesystem(1); cacheSet = (LightWeightCache) fsn1 .getRetryCache().getCacheSet(); - assertEquals("Retry cache size is wrong", 39, cacheSet.size()); + assertEquals("Retry cache size is wrong", 38, cacheSet.size()); iter = cacheSet.iterator(); while (iter.hasNext()) { CacheEntry entry = iter.next(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java index 537c18902e9..6eda1e35505 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java @@ -25,8 +25,8 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -42,7 +42,8 @@ */ public class TestSeveralNameNodes { - private static final Log LOG = LogFactory.getLog(TestSeveralNameNodes.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestSeveralNameNodes.class); /** ms between failovers between NNs */ private static final int TIME_BETWEEN_FAILOVERS = 1000; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyBlockManagement.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyBlockManagement.java index 9042f8a8665..0e60ee03218 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyBlockManagement.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyBlockManagement.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode.ha; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -39,7 +39,7 @@ * invalidate block, etc. */ public class TestStandbyBlockManagement { - protected static final Log LOG = LogFactory.getLog( + protected static final Logger LOG = LoggerFactory.getLogger( TestStandbyBlockManagement.class); private static final String TEST_FILE_DATA = "hello world"; private static final String TEST_FILE = "/TestStandbyBlockManagement"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java index 4849b54d6ed..b9880f03966 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java @@ -21,8 +21,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FSDataOutputStream; @@ -72,7 +72,7 @@ private final Random random = new Random(); protected File tmpOivImgDir; - private static final Log LOG = LogFactory.getLog(TestStandbyCheckpoints.class); + private static final Logger LOG = LoggerFactory.getLogger(TestStandbyCheckpoints.class); @SuppressWarnings("rawtypes") @Before diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java index 5da81787166..2bdada45fdf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyInProgressTail.java @@ -25,8 +25,8 @@ import java.net.URI; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -50,8 +50,8 @@ * the standby node. */ public class TestStandbyInProgressTail { - private static final Log LOG = - LogFactory.getLog(TestStandbyInProgressTail.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestStandbyInProgressTail.class); private Configuration conf; private MiniQJMHACluster qjmhaCluster; private MiniDFSCluster cluster; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java index 14c9dc264d8..04eae6f23ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java @@ -21,8 +21,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -50,7 +50,7 @@ * has namespace information, but also has the correct block reports, etc. */ public class TestStandbyIsHot { - protected static final Log LOG = LogFactory.getLog( + protected static final Logger LOG = LoggerFactory.getLogger( TestStandbyIsHot.class); private static final String TEST_FILE_DATA = "hello highly available world"; private static final String TEST_FILE = "/testStandbyIsHot"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index 05cf2ea622f..64d6c19c7b9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -49,8 +49,8 @@ import java.util.Random; import com.google.common.collect.ImmutableList; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -88,7 +88,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.MetricsAsserts; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -108,7 +108,8 @@ SystemErasureCodingPolicies.getByID( SystemErasureCodingPolicies.XOR_2_1_POLICY_ID); - public static final Log LOG = LogFactory.getLog(TestNameNodeMetrics.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestNameNodeMetrics.class); // Number of datanodes in the cluster private static final int DATANODE_COUNT = EC_POLICY.getNumDataUnits() + @@ -139,7 +140,7 @@ // Enable stale DataNodes checking CONF.setBoolean( DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, true); - GenericTestUtils.setLogLevel(LogFactory.getLog(MetricsAsserts.class), + GenericTestUtils.setLogLevel(LoggerFactory.getLogger(MetricsAsserts.class), Level.DEBUG); } @@ -412,10 +413,12 @@ private void verifyZeroMetrics() throws Exception { // Verify replica metrics assertGauge("LowRedundancyReplicatedBlocks", 0L, rb); assertGauge("CorruptReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); // Verify striped block groups metrics assertGauge("LowRedundancyECBlockGroups", 0L, rb); assertGauge("CorruptECBlockGroups", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); } /** @@ -492,9 +495,11 @@ public void testCorruptBlock() throws Exception { // Verify replicated blocks metrics assertGauge("LowRedundancyReplicatedBlocks", 1L, rb); assertGauge("CorruptReplicatedBlocks", 1L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 1L, rb); // Verify striped blocks metrics assertGauge("LowRedundancyECBlockGroups", 0L, rb); assertGauge("CorruptECBlockGroups", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); verifyAggregatedMetricsTally(); @@ -517,9 +522,11 @@ public void testCorruptBlock() throws Exception { // Verify replicated blocks metrics assertGauge("LowRedundancyReplicatedBlocks", 0L, rb); assertGauge("CorruptReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); // Verify striped blocks metrics assertGauge("LowRedundancyECBlockGroups", 0L, rb); assertGauge("CorruptECBlockGroups", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); verifyAggregatedMetricsTally(); @@ -580,9 +587,11 @@ public void testStripedFileCorruptBlocks() throws Exception { // Verify replica metrics assertGauge("LowRedundancyReplicatedBlocks", 0L, rb); assertGauge("CorruptReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); // Verify striped block groups metrics assertGauge("LowRedundancyECBlockGroups", 1L, rb); assertGauge("CorruptECBlockGroups", 1L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 1L, rb); verifyAggregatedMetricsTally(); @@ -602,9 +611,11 @@ public void testStripedFileCorruptBlocks() throws Exception { // Verify replicated blocks metrics assertGauge("LowRedundancyReplicatedBlocks", 0L, rb); assertGauge("CorruptReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); // Verify striped blocks metrics assertGauge("LowRedundancyECBlockGroups", 0L, rb); assertGauge("CorruptECBlockGroups", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); verifyAggregatedMetricsTally(); @@ -666,6 +677,8 @@ public void testMissingBlock() throws Exception { assertGauge("UnderReplicatedBlocks", 1L, rb); assertGauge("MissingBlocks", 1L, rb); assertGauge("MissingReplOneBlocks", 1L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); fs.delete(file, true); waitForDnMetricValue(NS_METRICS, "UnderReplicatedBlocks", 0L); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java index 65b2f8c3f93..77c9ecc3a0c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java @@ -48,7 +48,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -58,7 +58,7 @@ */ public class TestINodeFileUnderConstructionWithSnapshot { { - GenericTestUtils.setLogLevel(INode.LOG, Level.ALL); + GenericTestUtils.setLogLevel(INode.LOG, Level.TRACE); SnapshotTestHelper.disableLogs(); } @@ -307,4 +307,4 @@ public void testLease() throws Exception { HdfsConstants.LEASE_HARDLIMIT_PERIOD); } } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestOpenFilesWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestOpenFilesWithSnapshot.java index 38cd5f434f3..04a34160fac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestOpenFilesWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestOpenFilesWithSnapshot.java @@ -26,8 +26,8 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileChecksum; @@ -50,8 +50,8 @@ import org.junit.Test; public class TestOpenFilesWithSnapshot { - private static final Log LOG = - LogFactory.getLog(TestOpenFilesWithSnapshot.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(TestOpenFilesWithSnapshot.class.getName()); private final Configuration conf = new Configuration(); MiniDFSCluster cluster = null; DistributedFileSystem fs = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java index bd72dfd8029..987fd505afd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode.snapshot; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -64,7 +64,8 @@ static { SnapshotTestHelper.disableLogs(); } - private static final Log LOG = LogFactory.getLog(TestRenameWithSnapshots.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestRenameWithSnapshots.class); private static final long SEED = 0; private static final short REPL = 3; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java index 1c01eceab22..0f78d98817d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java @@ -59,7 +59,7 @@ import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Before; import org.junit.Rule; @@ -73,7 +73,7 @@ */ public class TestSnapshot { { - GenericTestUtils.setLogLevel(INode.LOG, Level.ALL); + GenericTestUtils.setLogLevel(INode.LOG, Level.TRACE); SnapshotTestHelper.disableLogs(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/sps/TestBlockStorageMovementAttemptedItems.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/sps/TestBlockStorageMovementAttemptedItems.java new file mode 100644 index 00000000000..f48521b1c1a --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/sps/TestBlockStorageMovementAttemptedItems.java @@ -0,0 +1,220 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.sps; + +import static org.apache.hadoop.util.Time.monotonicNow; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier.StorageTypeNodePair; +import org.apache.hadoop.hdfs.server.sps.ExternalSPSContext; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +/** + * Tests that block storage movement attempt failures are reported from DN and + * processed them correctly or not. + */ +public class TestBlockStorageMovementAttemptedItems { + + private BlockStorageMovementAttemptedItems bsmAttemptedItems; + private BlockStorageMovementNeeded unsatisfiedStorageMovementFiles; + private final int selfRetryTimeout = 500; + + @Before + public void setup() throws Exception { + Configuration config = new HdfsConfiguration(); + Context ctxt = Mockito.mock(ExternalSPSContext.class); + SPSService sps = new StoragePolicySatisfier(config); + Mockito.when(ctxt.isRunning()).thenReturn(true); + Mockito.when(ctxt.isInSafeMode()).thenReturn(false); + Mockito.when(ctxt.isFileExist(Mockito.anyLong())).thenReturn(true); + unsatisfiedStorageMovementFiles = + new BlockStorageMovementNeeded(ctxt); + bsmAttemptedItems = new BlockStorageMovementAttemptedItems(sps, + unsatisfiedStorageMovementFiles, ctxt); + } + + @After + public void teardown() { + if (bsmAttemptedItems != null) { + bsmAttemptedItems.stop(); + bsmAttemptedItems.stopGracefully(); + } + } + + private boolean checkItemMovedForRetry(Long item, long retryTimeout) + throws InterruptedException { + long stopTime = monotonicNow() + (retryTimeout * 2); + boolean isItemFound = false; + while (monotonicNow() < (stopTime)) { + ItemInfo ele = null; + while ((ele = unsatisfiedStorageMovementFiles.get()) != null) { + if (item == ele.getFile()) { + isItemFound = true; + break; + } + } + if (!isItemFound) { + Thread.sleep(100); + } else { + break; + } + } + return isItemFound; + } + + /** + * Verify that moved blocks reporting should queued up the block info. + */ + @Test(timeout = 30000) + public void testAddReportedMoveAttemptFinishedBlocks() throws Exception { + Long item = new Long(1234); + Block block = new Block(item); + DatanodeInfo dnInfo = DFSTestUtil.getLocalDatanodeInfo(9867); + Set locs = new HashSet<>(); + locs.add(new StorageTypeNodePair(StorageType.ARCHIVE, dnInfo)); + Map> blocksMap = new HashMap<>(); + blocksMap.put(block, locs); + bsmAttemptedItems.add(0L, 0L, 0L, blocksMap, 0); + bsmAttemptedItems.notifyReportedBlock(dnInfo, StorageType.ARCHIVE, + block); + assertEquals("Failed to receive result!", 1, + bsmAttemptedItems.getMovementFinishedBlocksCount()); + } + + /** + * Verify empty moved blocks reporting queue. + */ + @Test(timeout = 30000) + public void testNoBlockMovementAttemptFinishedReportAdded() throws Exception { + bsmAttemptedItems.start(); // start block movement report monitor thread + Long item = new Long(1234); + Block block = new Block(item); + DatanodeInfo dnInfo = DFSTestUtil.getLocalDatanodeInfo(9867); + Set locs = new HashSet<>(); + locs.add(new StorageTypeNodePair(StorageType.ARCHIVE, dnInfo)); + Map> blocksMap = new HashMap<>(); + blocksMap.put(block, locs); + bsmAttemptedItems.add(0L, 0L, 0L, blocksMap, 0); + assertEquals("Shouldn't receive result", 0, + bsmAttemptedItems.getMovementFinishedBlocksCount()); + assertEquals("Item doesn't exist in the attempted list", 1, + bsmAttemptedItems.getAttemptedItemsCount()); + } + + /** + * Partial block movement with + * BlockMovementStatus#DN_BLK_STORAGE_MOVEMENT_SUCCESS. Here, first occurrence + * is #blockStorageMovementReportedItemsCheck() and then + * #blocksStorageMovementUnReportedItemsCheck(). + */ + @Test(timeout = 30000) + public void testPartialBlockMovementShouldBeRetried1() throws Exception { + Long item = new Long(1234); + Block block1 = new Block(item); + Block block2 = new Block(5678L); + Long trackID = 0L; + DatanodeInfo dnInfo = DFSTestUtil.getLocalDatanodeInfo(9867); + Set locs = new HashSet<>(); + locs.add(new StorageTypeNodePair(StorageType.ARCHIVE, dnInfo)); + Map> blocksMap = new HashMap<>(); + blocksMap.put(block1, locs); + blocksMap.put(block2, locs); + bsmAttemptedItems.add(trackID, trackID, 0L, blocksMap, 0); + bsmAttemptedItems.notifyReportedBlock(dnInfo, StorageType.ARCHIVE, + block1); + + // start block movement report monitor thread + bsmAttemptedItems.start(); + assertTrue("Failed to add to the retry list", + checkItemMovedForRetry(trackID, 5000)); + assertEquals("Failed to remove from the attempted list", 0, + bsmAttemptedItems.getAttemptedItemsCount()); + } + + /** + * Partial block movement. Here, first occurrence is + * #blocksStorageMovementUnReportedItemsCheck() and then + * #blockStorageMovementReportedItemsCheck(). + */ + @Test(timeout = 30000) + public void testPartialBlockMovementShouldBeRetried2() throws Exception { + Long item = new Long(1234); + Block block = new Block(item); + Long trackID = 0L; + DatanodeInfo dnInfo = DFSTestUtil.getLocalDatanodeInfo(9867); + Set locs = new HashSet<>(); + locs.add(new StorageTypeNodePair(StorageType.ARCHIVE, dnInfo)); + Map> blocksMap = new HashMap<>(); + blocksMap.put(block, locs); + bsmAttemptedItems.add(trackID, trackID, 0L, blocksMap, 0); + bsmAttemptedItems.notifyReportedBlock(dnInfo, StorageType.ARCHIVE, + block); + + Thread.sleep(selfRetryTimeout * 2); // Waiting to get timed out + + bsmAttemptedItems.blocksStorageMovementUnReportedItemsCheck(); + bsmAttemptedItems.blockStorageMovementReportedItemsCheck(); + + assertTrue("Failed to add to the retry list", + checkItemMovedForRetry(trackID, 5000)); + assertEquals("Failed to remove from the attempted list", 0, + bsmAttemptedItems.getAttemptedItemsCount()); + } + + /** + * Partial block movement with only BlocksStorageMoveAttemptFinished report + * and storageMovementAttemptedItems list is empty. + */ + @Test(timeout = 30000) + public void testPartialBlockMovementWithEmptyAttemptedQueue() + throws Exception { + Long item = new Long(1234); + Block block = new Block(item); + Long trackID = 0L; + DatanodeInfo dnInfo = DFSTestUtil.getLocalDatanodeInfo(9867); + Set locs = new HashSet<>(); + locs.add(new StorageTypeNodePair(StorageType.ARCHIVE, dnInfo)); + Map> blocksMap = new HashMap<>(); + blocksMap.put(block, locs); + bsmAttemptedItems.add(trackID, trackID, 0L, blocksMap, 0); + bsmAttemptedItems.notifyReportedBlock(dnInfo, StorageType.ARCHIVE, + block); + assertFalse( + "Should not add in queue again if it is not there in" + + " storageMovementAttemptedItems", + checkItemMovedForRetry(trackID, 5000)); + assertEquals("Failed to remove from the attempted list", 1, + bsmAttemptedItems.getAttemptedItemsCount()); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/sps/TestStoragePolicySatisfierWithStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/sps/TestStoragePolicySatisfierWithStripedFile.java new file mode 100644 index 00000000000..018a5dc69d3 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/sps/TestStoragePolicySatisfierWithStripedFile.java @@ -0,0 +1,581 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.sps; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeoutException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; +import org.apache.hadoop.hdfs.NameNodeProxies; +import org.apache.hadoop.hdfs.StripedFileTestUtil; +import org.apache.hadoop.hdfs.client.HdfsAdmin; +import org.apache.hadoop.hdfs.protocol.ClientProtocol; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.sps.ExternalSPSContext; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Supplier; + +/** + * Tests that StoragePolicySatisfier daemon is able to check the striped blocks + * to be moved and finding its expected target locations in order to satisfy the + * storage policy. + */ +public class TestStoragePolicySatisfierWithStripedFile { + + private static final Logger LOG = LoggerFactory + .getLogger(TestStoragePolicySatisfierWithStripedFile.class); + + private final int stripesPerBlock = 2; + + private ErasureCodingPolicy ecPolicy; + private int dataBlocks; + private int parityBlocks; + private int cellSize; + private int defaultStripeBlockSize; + private Configuration conf; + private StoragePolicySatisfier sps; + private ExternalSPSContext ctxt; + private NameNodeConnector nnc; + + private ErasureCodingPolicy getEcPolicy() { + return StripedFileTestUtil.getDefaultECPolicy(); + } + + /** + * Initialize erasure coding policy. + */ + @Before + public void init(){ + ecPolicy = getEcPolicy(); + dataBlocks = ecPolicy.getNumDataUnits(); + parityBlocks = ecPolicy.getNumParityUnits(); + cellSize = ecPolicy.getCellSize(); + defaultStripeBlockSize = cellSize * stripesPerBlock; + conf = new HdfsConfiguration(); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + // Reduced refresh cycle to update latest datanodes. + conf.setLong(DFSConfigKeys.DFS_SPS_DATANODE_CACHE_REFRESH_INTERVAL_MS, + 1000); + conf.setInt( + DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MAX_RETRY_ATTEMPTS_KEY, 30); + initConfWithStripe(conf, defaultStripeBlockSize); + } + + /** + * Tests to verify that all the striped blocks(data + parity blocks) are + * moving to satisfy the storage policy. + */ + @Test(timeout = 300000) + public void testMoverWithFullStripe() throws Exception { + // start 11 datanodes + int numOfDatanodes = 11; + int storagesPerDatanode = 2; + long capacity = 20 * defaultStripeBlockSize; + long[][] capacities = new long[numOfDatanodes][storagesPerDatanode]; + for (int i = 0; i < numOfDatanodes; i++) { + for (int j = 0; j < storagesPerDatanode; j++) { + capacities[i][j] = capacity; + } + } + + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(numOfDatanodes) + .storagesPerDatanode(storagesPerDatanode) + .storageTypes(new StorageType[][]{ + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}}) + .storageCapacities(capacities) + .build(); + + HdfsAdmin hdfsAdmin = new HdfsAdmin(FileSystem.getDefaultUri(conf), conf); + try { + cluster.waitActive(); + startSPS(); + DistributedFileSystem dfs = cluster.getFileSystem(); + dfs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); + + // set "/bar" directory with HOT storage policy. + ClientProtocol client = NameNodeProxies.createProxy(conf, + cluster.getFileSystem(0).getUri(), ClientProtocol.class).getProxy(); + String barDir = "/bar"; + client.mkdirs(barDir, new FsPermission((short) 777), true); + client.setStoragePolicy(barDir, HdfsConstants.HOT_STORAGE_POLICY_NAME); + // set an EC policy on "/bar" directory + client.setErasureCodingPolicy(barDir, + StripedFileTestUtil.getDefaultECPolicy().getName()); + + // write file to barDir + final String fooFile = "/bar/foo"; + long fileLen = cellSize * dataBlocks; + DFSTestUtil.createFile(cluster.getFileSystem(), new Path(fooFile), + fileLen, (short) 3, 0); + + // verify storage types and locations + LocatedBlocks locatedBlocks = client.getBlockLocations(fooFile, 0, + fileLen); + for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) { + for (StorageType type : lb.getStorageTypes()) { + Assert.assertEquals(StorageType.DISK, type); + } + } + StripedFileTestUtil.verifyLocatedStripedBlocks(locatedBlocks, + dataBlocks + parityBlocks); + + // start 5 more datanodes + int numOfNewDatanodes = 5; + capacities = new long[numOfNewDatanodes][storagesPerDatanode]; + for (int i = 0; i < numOfNewDatanodes; i++) { + for (int j = 0; j < storagesPerDatanode; j++) { + capacities[i][j] = capacity; + } + } + cluster.startDataNodes(conf, 5, + new StorageType[][]{ + {StorageType.ARCHIVE, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.ARCHIVE}}, + true, null, null, null, capacities, null, false, false, false, null); + cluster.triggerHeartbeats(); + + // move file to ARCHIVE + client.setStoragePolicy(barDir, "COLD"); + hdfsAdmin.satisfyStoragePolicy(new Path(fooFile)); + LOG.info("Sets storage policy to COLD and invoked satisfyStoragePolicy"); + cluster.triggerHeartbeats(); + + // verify storage types and locations + waitExpectedStorageType(cluster, fooFile, fileLen, StorageType.ARCHIVE, 9, + 9, 60000); + } finally { + cluster.shutdown(); + sps.stopGracefully(); + } + } + + /** + * Tests to verify that only few datanodes are available and few striped + * blocks are able to move. Others are still trying to find available nodes. + * + * For example, we have 3 nodes A(disk, disk), B(disk, disk), C(disk, archive) + * + * Assume a block with storage locations A(disk), B(disk), C(disk). Now, set + * policy as COLD and invoked {@link HdfsAdmin#satisfyStoragePolicy(Path)}, + * while choosing the target node for A, it shouldn't choose C. For C, it + * should do local block movement as it has ARCHIVE storage type. + */ + @Test(timeout = 300000) + public void testWhenOnlyFewTargetNodesAreAvailableToSatisfyStoragePolicy() + throws Exception { + // start 10 datanodes + int numOfDatanodes = 11; + int storagesPerDatanode = 2; + long capacity = 20 * defaultStripeBlockSize; + long[][] capacities = new long[numOfDatanodes][storagesPerDatanode]; + for (int i = 0; i < numOfDatanodes; i++) { + for (int j = 0; j < storagesPerDatanode; j++) { + capacities[i][j] = capacity; + } + } + + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(numOfDatanodes) + .storagesPerDatanode(storagesPerDatanode) + .storageTypes(new StorageType[][]{ + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}}) + .storageCapacities(capacities) + .build(); + + HdfsAdmin hdfsAdmin = new HdfsAdmin(FileSystem.getDefaultUri(conf), conf); + try { + cluster.waitActive(); + startSPS(); + DistributedFileSystem dfs = cluster.getFileSystem(); + dfs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); + // set "/bar" directory with HOT storage policy. + ClientProtocol client = NameNodeProxies.createProxy(conf, + cluster.getFileSystem(0).getUri(), ClientProtocol.class).getProxy(); + String barDir = "/bar"; + client.mkdirs(barDir, new FsPermission((short) 777), true); + client.setStoragePolicy(barDir, HdfsConstants.HOT_STORAGE_POLICY_NAME); + // set an EC policy on "/bar" directory + client.setErasureCodingPolicy(barDir, + StripedFileTestUtil.getDefaultECPolicy().getName()); + + // write file to barDir + final String fooFile = "/bar/foo"; + long fileLen = cellSize * dataBlocks; + DFSTestUtil.createFile(cluster.getFileSystem(), new Path(fooFile), + fileLen, (short) 3, 0); + + // verify storage types and locations + LocatedBlocks locatedBlocks = client.getBlockLocations(fooFile, 0, + fileLen); + for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) { + for (StorageType type : lb.getStorageTypes()) { + Assert.assertEquals(StorageType.DISK, type); + } + } + Thread.sleep(5000); + StripedFileTestUtil.verifyLocatedStripedBlocks(locatedBlocks, + dataBlocks + parityBlocks); + + // start 2 more datanodes + int numOfNewDatanodes = 2; + capacities = new long[numOfNewDatanodes][storagesPerDatanode]; + for (int i = 0; i < numOfNewDatanodes; i++) { + for (int j = 0; j < storagesPerDatanode; j++) { + capacities[i][j] = capacity; + } + } + cluster.startDataNodes(conf, 2, + new StorageType[][]{ + {StorageType.ARCHIVE, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.ARCHIVE}}, + true, null, null, null, capacities, null, false, false, false, null); + cluster.triggerHeartbeats(); + + // Move file to ARCHIVE. Only 5 datanodes are available with ARCHIVE + // storage type. + client.setStoragePolicy(barDir, "COLD"); + hdfsAdmin.satisfyStoragePolicy(new Path(fooFile)); + LOG.info("Sets storage policy to COLD and invoked satisfyStoragePolicy"); + cluster.triggerHeartbeats(); + + waitForAttemptedItems(1, 30000); + // verify storage types and locations. + waitExpectedStorageType(cluster, fooFile, fileLen, StorageType.ARCHIVE, 5, + 9, 60000); + } finally { + cluster.shutdown(); + sps.stopGracefully(); + } + } + + /** + * Test SPS for low redundant file blocks. + * 1. Create cluster with 10 datanode. + * 1. Create one striped file with default EC Policy. + * 2. Set policy and call satisfyStoragePolicy for file. + * 3. Stop NameNode and Datanodes. + * 4. Start NameNode with 5 datanode and wait for block movement. + * 5. Start remaining 5 datanode. + * 6. All replica should be moved in proper storage based on policy. + */ + @Test(timeout = 300000) + public void testSPSWhenFileHasLowRedundancyBlocks() throws Exception { + // start 9 datanodes + int numOfDatanodes = 9; + int storagesPerDatanode = 2; + long capacity = 20 * defaultStripeBlockSize; + long[][] capacities = new long[numOfDatanodes][storagesPerDatanode]; + for (int i = 0; i < numOfDatanodes; i++) { + for (int j = 0; j < storagesPerDatanode; j++) { + capacities[i][j] = capacity; + } + } + + conf.set(DFSConfigKeys + .DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_KEY, + "3000"); + conf.set(DFSConfigKeys + .DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_KEY, + "5000"); + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(numOfDatanodes) + .storagesPerDatanode(storagesPerDatanode) + .storageTypes(new StorageType[][]{ + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}}) + .storageCapacities(capacities) + .build(); + try { + cluster.waitActive(); + startSPS(); + DistributedFileSystem fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); + Path barDir = new Path("/bar"); + fs.mkdirs(barDir); + // set an EC policy on "/bar" directory + fs.setErasureCodingPolicy(barDir, + StripedFileTestUtil.getDefaultECPolicy().getName()); + + // write file to barDir + final Path fooFile = new Path("/bar/foo"); + long fileLen = cellSize * dataBlocks; + DFSTestUtil.createFile(cluster.getFileSystem(), fooFile, + fileLen, (short) 3, 0); + + // Move file to ARCHIVE. + fs.setStoragePolicy(barDir, "COLD"); + //Stop DataNodes and restart namenode + List list = new ArrayList<>(numOfDatanodes); + for (int i = 0; i < numOfDatanodes; i++) { + list.add(cluster.stopDataNode(0)); + } + cluster.restartNameNodes(); + // Restart half datanodes + for (int i = 0; i < 5; i++) { + cluster.restartDataNode(list.get(i), false); + } + cluster.waitActive(); + fs.satisfyStoragePolicy(fooFile); + DFSTestUtil.waitExpectedStorageType(fooFile.toString(), + StorageType.ARCHIVE, 5, 30000, cluster.getFileSystem()); + //Start remaining datanodes + for (int i = numOfDatanodes - 1; i >= 5; i--) { + cluster.restartDataNode(list.get(i), false); + } + cluster.waitActive(); + // verify storage types and locations. + waitExpectedStorageType(cluster, fooFile.toString(), fileLen, + StorageType.ARCHIVE, 9, 9, 60000); + } finally { + cluster.shutdown(); + sps.stopGracefully(); + } + } + + + /** + * Tests to verify that for the given path, no blocks under the given path + * will be scheduled for block movement as there are no available datanode + * with required storage type. + * + * For example, there are two block for a file: + * + * File1 => blk_1[locations=A(DISK),B(DISK),C(DISK)], + * blk_2[locations=A(DISK),B(DISK),C(DISK)]. Now, set storage policy to COLD. + * No datanode is available with storage type ARCHIVE. + * + * SPS won't schedule any block movement for this path. + */ + @Test(timeout = 300000) + public void testWhenNoTargetDatanodeToSatisfyStoragePolicy() + throws Exception { + // start 10 datanodes + int numOfDatanodes = 10; + int storagesPerDatanode = 2; + long capacity = 20 * defaultStripeBlockSize; + long[][] capacities = new long[numOfDatanodes][storagesPerDatanode]; + for (int i = 0; i < numOfDatanodes; i++) { + for (int j = 0; j < storagesPerDatanode; j++) { + capacities[i][j] = capacity; + } + } + + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(numOfDatanodes) + .storagesPerDatanode(storagesPerDatanode) + .storageTypes(new StorageType[][]{ + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}}) + .storageCapacities(capacities) + .build(); + + HdfsAdmin hdfsAdmin = new HdfsAdmin(FileSystem.getDefaultUri(conf), conf); + try { + cluster.waitActive(); + startSPS(); + DistributedFileSystem dfs = cluster.getFileSystem(); + dfs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); + // set "/bar" directory with HOT storage policy. + ClientProtocol client = NameNodeProxies.createProxy(conf, + cluster.getFileSystem(0).getUri(), ClientProtocol.class).getProxy(); + String barDir = "/bar"; + client.mkdirs(barDir, new FsPermission((short) 777), true); + client.setStoragePolicy(barDir, HdfsConstants.HOT_STORAGE_POLICY_NAME); + // set an EC policy on "/bar" directory + client.setErasureCodingPolicy(barDir, + StripedFileTestUtil.getDefaultECPolicy().getName()); + + // write file to barDir + final String fooFile = "/bar/foo"; + long fileLen = cellSize * dataBlocks; + DFSTestUtil.createFile(cluster.getFileSystem(), new Path(fooFile), + fileLen, (short) 3, 0); + + // verify storage types and locations + LocatedBlocks locatedBlocks = client.getBlockLocations(fooFile, 0, + fileLen); + for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) { + for (StorageType type : lb.getStorageTypes()) { + Assert.assertEquals(StorageType.DISK, type); + } + } + StripedFileTestUtil.verifyLocatedStripedBlocks(locatedBlocks, + dataBlocks + parityBlocks); + + // Move file to ARCHIVE. Only 5 datanodes are available with ARCHIVE + // storage type. + client.setStoragePolicy(barDir, "COLD"); + hdfsAdmin.satisfyStoragePolicy(new Path(fooFile)); + LOG.info("Sets storage policy to COLD and invoked satisfyStoragePolicy"); + cluster.triggerHeartbeats(); + + waitForAttemptedItems(1, 30000); + // verify storage types and locations. + waitExpectedStorageType(cluster, fooFile, fileLen, StorageType.DISK, 9, 9, + 60000); + waitForAttemptedItems(1, 30000); + } finally { + cluster.shutdown(); + sps.stopGracefully(); + } + } + + private void startSPS() throws IOException { + nnc = DFSTestUtil.getNameNodeConnector(conf, + HdfsServerConstants.MOVER_ID_PATH, 1, false); + + sps = new StoragePolicySatisfier(conf); + ctxt = new ExternalSPSContext(sps, nnc); + sps.init(ctxt); + sps.start(StoragePolicySatisfierMode.EXTERNAL); + } + + private static void initConfWithStripe(Configuration conf, + int stripeBlockSize) { + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, stripeBlockSize); + conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L); + conf.setLong(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, + 1L); + conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, + false); + } + + // Check whether the Block movement has been successfully completed to satisfy + // the storage policy for the given file. + private void waitExpectedStorageType(MiniDFSCluster cluster, + final String fileName, long fileLen, + final StorageType expectedStorageType, int expectedStorageCount, + int expectedBlkLocationCount, int timeout) throws Exception { + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + int actualStorageCount = 0; + try { + LocatedBlocks locatedBlocks = cluster.getFileSystem().getClient() + .getLocatedBlocks(fileName, 0, fileLen); + for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) { + LOG.info("LocatedBlocks => Size {}, locs {}", + lb.getLocations().length, lb); + if (lb.getLocations().length > expectedBlkLocationCount) { + return false; + } + for (StorageType storageType : lb.getStorageTypes()) { + if (expectedStorageType == storageType) { + actualStorageCount++; + } else { + LOG.info("Expected storage type {} and actual {}", + expectedStorageType, storageType); + } + } + } + LOG.info( + expectedStorageType + " replica count, expected={} and actual={}", + expectedStorageCount, actualStorageCount); + } catch (IOException e) { + LOG.error("Exception while getting located blocks", e); + return false; + } + return expectedStorageCount == actualStorageCount; + } + }, 100, timeout); + } + + private void waitForAttemptedItems(long expectedBlkMovAttemptedCount, + int timeout) throws TimeoutException, InterruptedException { + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + LOG.info("expectedAttemptedItemsCount={} actualAttemptedItemsCount={}", + expectedBlkMovAttemptedCount, + ((BlockStorageMovementAttemptedItems) (sps + .getAttemptedItemsMonitor())).getAttemptedItemsCount()); + return ((BlockStorageMovementAttemptedItems) (sps + .getAttemptedItemsMonitor())) + .getAttemptedItemsCount() == expectedBlkMovAttemptedCount; + } + }, 100, timeout); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsCreatePermissions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsCreatePermissions.java index 68fc26f99ee..dcb6ddc00a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsCreatePermissions.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsCreatePermissions.java @@ -20,8 +20,8 @@ import java.net.HttpURLConnection; import java.net.URL; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSTestUtil; @@ -39,7 +39,8 @@ * from dfs CLI for specifying files/directories permissions. */ public class TestWebHdfsCreatePermissions { - static final Log LOG = LogFactory.getLog(TestWebHdfsCreatePermissions.class); + static final Logger LOG = + LoggerFactory.getLogger(TestWebHdfsCreatePermissions.class); { DFSTestUtil.setNameNodeLogLevel(Level.ALL); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java index d00ed62a0a5..028e18c3461 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java @@ -24,8 +24,8 @@ import java.util.Arrays; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; @@ -55,7 +55,8 @@ * Test WebHDFS which provides data locality using HTTP redirection. */ public class TestWebHdfsDataLocality { - static final Log LOG = LogFactory.getLog(TestWebHdfsDataLocality.class); + static final Logger LOG = + LoggerFactory.getLogger(TestWebHdfsDataLocality.class); { DFSTestUtil.setNameNodeLogLevel(Level.ALL); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/sps/TestExternalStoragePolicySatisfier.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/sps/TestExternalStoragePolicySatisfier.java new file mode 100644 index 00000000000..8b9e7ce710c --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/sps/TestExternalStoragePolicySatisfier.java @@ -0,0 +1,1649 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.sps; + +import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_HTTPS_KEYSTORE_RESOURCE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HTTP_POLICY_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SPS_ADDRESS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SPS_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SPS_KEYTAB_FILE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SPS_MAX_OUTSTANDING_PATHS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DATA_TRANSFER_PROTECTION_KEY; +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.XATTR_SATISFY_STORAGE_POLICY; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.concurrent.TimeoutException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.NameNodeProxies; +import org.apache.hadoop.hdfs.StripedFileTestUtil; +import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; +import org.apache.hadoop.hdfs.client.HdfsAdmin; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.ClientProtocol; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; +import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils; +import org.apache.hadoop.hdfs.server.namenode.FSEditLog; +import org.apache.hadoop.hdfs.server.namenode.INode; +import org.apache.hadoop.hdfs.server.namenode.sps.BlockMovementListener; +import org.apache.hadoop.hdfs.server.namenode.sps.BlockStorageMovementAttemptedItems; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier; +import org.apache.hadoop.http.HttpConfig; +import org.apache.hadoop.minikdc.MiniKdc; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authentication.util.KerberosName; +import org.apache.hadoop.security.ssl.KeyStoreTestUtil; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.GenericTestUtils.LogCapturer; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Supplier; + +/** + * Tests the external sps service plugins. + */ +public class TestExternalStoragePolicySatisfier { + private static final String ONE_SSD = "ONE_SSD"; + private static final String COLD = "COLD"; + private StorageType[][] allDiskTypes = + new StorageType[][]{{StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}}; + private File keytabFile; + private String principal; + private MiniKdc kdc; + private File baseDir; + private NameNodeConnector nnc; + private StoragePolicySatisfier externalSps; + private ExternalSPSContext externalCtxt; + private DistributedFileSystem dfs = null; + private MiniDFSCluster hdfsCluster = null; + private Configuration config = null; + private static final int NUM_OF_DATANODES = 3; + private static final int STORAGES_PER_DATANODE = 2; + private static final long CAPACITY = 2 * 256 * 1024 * 1024; + private static final String FILE = "/testMoveToSatisfyStoragePolicy"; + private static final int DEFAULT_BLOCK_SIZE = 1024; + private static final Logger LOG = + LoggerFactory.getLogger(TestExternalStoragePolicySatisfier.class); + + @Before + public void setUp() { + config = new HdfsConfiguration(); + config.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + // Most of the tests are restarting DNs and NN. So, reduced refresh cycle to + // update latest datanodes. + config.setLong(DFSConfigKeys.DFS_SPS_DATANODE_CACHE_REFRESH_INTERVAL_MS, + 3000); + config.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + } + + @After + public void destroy() throws Exception { + if (kdc != null) { + kdc.stop(); + FileUtil.fullyDelete(baseDir); + } + if (hdfsCluster != null) { + hdfsCluster.shutdown(); + } + } + + /** + * Sets hdfs cluster. + */ + private void setCluster(MiniDFSCluster cluster) { + this.hdfsCluster = cluster; + } + + /** + * @return conf. + */ + private Configuration getConf() { + return this.config; + } + + /** + * @return hdfs cluster. + */ + private MiniDFSCluster getCluster() { + return hdfsCluster; + } + + /** + * Gets distributed file system. + * + * @throws IOException + */ + private DistributedFileSystem getFS() throws IOException { + this.dfs = hdfsCluster.getFileSystem(); + return this.dfs; + } + + private void shutdownCluster() { + if (externalSps != null) { + externalSps.stopGracefully(); + } + } + + private void createCluster() throws IOException { + getConf().setLong("dfs.block.size", DEFAULT_BLOCK_SIZE); + setCluster(startCluster(getConf(), allDiskTypes, NUM_OF_DATANODES, + STORAGES_PER_DATANODE, CAPACITY)); + getFS(); + writeContent(FILE); + } + + private MiniDFSCluster startCluster(final Configuration conf, + StorageType[][] storageTypes, int numberOfDatanodes, int storagesPerDn, + long nodeCapacity) throws IOException { + long[][] capacities = new long[numberOfDatanodes][storagesPerDn]; + for (int i = 0; i < numberOfDatanodes; i++) { + for (int j = 0; j < storagesPerDn; j++) { + capacities[i][j] = nodeCapacity; + } + } + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(numberOfDatanodes).storagesPerDatanode(storagesPerDn) + .storageTypes(storageTypes).storageCapacities(capacities).build(); + cluster.waitActive(); + + nnc = DFSTestUtil.getNameNodeConnector(getConf(), + HdfsServerConstants.MOVER_ID_PATH, 1, false); + + externalSps = new StoragePolicySatisfier(getConf()); + externalCtxt = new ExternalSPSContext(externalSps, nnc); + + externalSps.init(externalCtxt); + externalSps.start(StoragePolicySatisfierMode.EXTERNAL); + return cluster; + } + + private void restartNamenode() throws IOException{ + if (externalSps != null) { + externalSps.stopGracefully(); + } + + getCluster().restartNameNodes(); + getCluster().waitActive(); + externalSps = new StoragePolicySatisfier(getConf()); + + externalCtxt = new ExternalSPSContext(externalSps, nnc); + externalSps.init(externalCtxt); + externalSps.start(StoragePolicySatisfierMode.EXTERNAL); + } + + private void initSecureConf(Configuration conf) throws Exception { + String username = "externalSPS"; + baseDir = GenericTestUtils + .getTestDir(TestExternalStoragePolicySatisfier.class.getSimpleName()); + FileUtil.fullyDelete(baseDir); + Assert.assertTrue(baseDir.mkdirs()); + + Properties kdcConf = MiniKdc.createConf(); + kdc = new MiniKdc(kdcConf, baseDir); + kdc.start(); + + SecurityUtil.setAuthenticationMethod( + UserGroupInformation.AuthenticationMethod.KERBEROS, conf); + UserGroupInformation.setConfiguration(conf); + KerberosName.resetDefaultRealm(); + Assert.assertTrue("Expected configuration to enable security", + UserGroupInformation.isSecurityEnabled()); + + keytabFile = new File(baseDir, username + ".keytab"); + String keytab = keytabFile.getAbsolutePath(); + // Windows will not reverse name lookup "127.0.0.1" to "localhost". + String krbInstance = Path.WINDOWS ? "127.0.0.1" : "localhost"; + principal = username + "/" + krbInstance + "@" + kdc.getRealm(); + String spnegoPrincipal = "HTTP/" + krbInstance + "@" + kdc.getRealm(); + kdc.createPrincipal(keytabFile, username, username + "/" + krbInstance, + "HTTP/" + krbInstance); + + conf.set(DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, principal); + conf.set(DFS_NAMENODE_KEYTAB_FILE_KEY, keytab); + conf.set(DFS_DATANODE_KERBEROS_PRINCIPAL_KEY, principal); + conf.set(DFS_DATANODE_KEYTAB_FILE_KEY, keytab); + conf.set(DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY, spnegoPrincipal); + conf.setBoolean(DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, true); + conf.set(DFS_DATA_TRANSFER_PROTECTION_KEY, "authentication"); + conf.set(DFS_HTTP_POLICY_KEY, HttpConfig.Policy.HTTPS_ONLY.name()); + conf.set(DFS_NAMENODE_HTTPS_ADDRESS_KEY, "localhost:0"); + conf.set(DFS_DATANODE_HTTPS_ADDRESS_KEY, "localhost:0"); + conf.setInt(IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_KEY, 10); + + conf.set(DFS_SPS_ADDRESS_KEY, "localhost:0"); + conf.set(DFS_SPS_KEYTAB_FILE_KEY, keytab); + conf.set(DFS_SPS_KERBEROS_PRINCIPAL_KEY, principal); + + String keystoresDir = baseDir.getAbsolutePath(); + String sslConfDir = KeyStoreTestUtil + .getClasspathDir(TestExternalStoragePolicySatisfier.class); + KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfDir, conf, false); + + conf.set(DFS_CLIENT_HTTPS_KEYSTORE_RESOURCE_KEY, + KeyStoreTestUtil.getClientSSLConfigFileName()); + conf.set(DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY, + KeyStoreTestUtil.getServerSSLConfigFileName()); + + conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, DEFAULT_BLOCK_SIZE); + conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L); + } + + /** + * Test SPS runs fine when logging in with a keytab in kerberized env. Reusing + * testWhenStoragePolicySetToALLSSD here for basic functionality testing. + */ + @Test(timeout = 300000) + public void testWithKeytabs() throws Exception { + try { + initSecureConf(getConf()); + final UserGroupInformation ugi = UserGroupInformation + .loginUserFromKeytabAndReturnUGI(principal, + keytabFile.getAbsolutePath()); + ugi.doAs(new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + // verify that sps runs Ok. + testWhenStoragePolicySetToALLSSD(); + // verify that UGI was logged in using keytab. + Assert.assertTrue(UserGroupInformation.isLoginKeytabBased()); + return null; + } + }); + } finally { + // Reset UGI so that other tests are not affected. + UserGroupInformation.reset(); + UserGroupInformation.setConfiguration(new Configuration()); + } + } + + /** + * Test verifies that SPS call will throw exception if the call Q exceeds + * OutstandingQueueLimit value. + * + * @throws Exception + */ + @Test(timeout = 300000) + public void testOutstandingQueueLimitExceeds() throws Exception { + try { + getConf().setInt(DFS_SPS_MAX_OUTSTANDING_PATHS_KEY, 3); + createCluster(); + List files = new ArrayList<>(); + files.add(FILE); + DistributedFileSystem fs = getFS(); + + // stops sps to make the SPS Q with many outstanding requests. + externalSps.stopGracefully(); + // Creates 4 more files. Send all of them for satisfying the storage + // policy together. + for (int i = 0; i < 3; i++) { + String file1 = "/testOutstandingQueueLimitExceeds_" + i; + files.add(file1); + writeContent(file1); + fs.satisfyStoragePolicy(new Path(file1)); + } + String fileExceeds = "/testOutstandingQueueLimitExceeds_" + 4; + files.add(fileExceeds); + writeContent(fileExceeds); + try { + fs.satisfyStoragePolicy(new Path(fileExceeds)); + Assert.fail("Should throw exception as it exceeds " + + "outstanding SPS call Q limit"); + } catch (IOException ioe) { + GenericTestUtils.assertExceptionContains( + "Outstanding satisfier queue limit: 3 exceeded, try later!", ioe); + } + } finally { + shutdownCluster(); + } + } + + /** + * Tests to verify that SPS should be able to start when the Mover ID file + * is not being hold by a Mover. This can be the case when Mover exits + * ungracefully without deleting the ID file from HDFS. + */ + @Test(timeout = 300000) + public void testWhenMoverExitsWithoutDeleteMoverIDFile() + throws IOException { + try { + createCluster(); + // Simulate the case by creating MOVER_ID file + DFSTestUtil.createFile(getCluster().getFileSystem(), + HdfsServerConstants.MOVER_ID_PATH, 0, (short) 1, 0); + restartNamenode(); + boolean running = externalCtxt.isRunning(); + Assert.assertTrue("SPS should be running as " + + "no Mover really running", running); + } finally { + shutdownCluster(); + } + } + + /** + * This test need not run as external scan is not a batch based scanning right + * now. + */ + @Ignore("ExternalFileIdCollector is not batch based right now." + + " So, ignoring it.") + public void testBatchProcessingForSPSDirectory() throws Exception { + } + + /** + * This test case is more specific to internal. + */ + @Ignore("This test is specific to internal, so skipping here.") + public void testWhenMoverIsAlreadyRunningBeforeStoragePolicySatisfier() + throws Exception { + } + + /** + * This test is specific to internal SPS. So, ignoring it. + */ + @Ignore("This test is specific to internal SPS. So, ignoring it.") + public void testTraverseWhenParentDeleted() throws Exception { + } + + /** + * This test is specific to internal SPS. So, ignoring it. + */ + @Ignore("This test is specific to internal SPS. So, ignoring it.") + public void testTraverseWhenRootParentDeleted() throws Exception { + } + + + @Test(timeout = 300000) + public void testWhenStoragePolicySetToCOLD() + throws Exception { + + try { + createCluster(); + doTestWhenStoragePolicySetToCOLD(); + } finally { + shutdownCluster(); + } + } + + private void doTestWhenStoragePolicySetToCOLD() throws Exception { + // Change policy to COLD + dfs.setStoragePolicy(new Path(FILE), COLD); + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.ARCHIVE, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.ARCHIVE}}; + startAdditionalDNs(config, 3, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + + hdfsCluster.triggerHeartbeats(); + dfs.satisfyStoragePolicy(new Path(FILE)); + // Wait till namenode notified about the block location details + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.ARCHIVE, 3, 35000, + dfs); + } + + @Test(timeout = 300000) + public void testWhenStoragePolicySetToALLSSD() + throws Exception { + try { + createCluster(); + // Change policy to ALL_SSD + dfs.setStoragePolicy(new Path(FILE), "ALL_SSD"); + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.SSD, StorageType.DISK}, + {StorageType.SSD, StorageType.DISK}, + {StorageType.SSD, StorageType.DISK}}; + + // Making sure SDD based nodes added to cluster. Adding SSD based + // datanodes. + startAdditionalDNs(config, 3, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + dfs.satisfyStoragePolicy(new Path(FILE)); + hdfsCluster.triggerHeartbeats(); + // Wait till StorgePolicySatisfier Identified that block to move to SSD + // areas + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.SSD, 3, 30000, dfs); + } finally { + shutdownCluster(); + } + } + + @Test(timeout = 300000) + public void testWhenStoragePolicySetToONESSD() + throws Exception { + try { + createCluster(); + // Change policy to ONE_SSD + dfs.setStoragePolicy(new Path(FILE), ONE_SSD); + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.SSD, StorageType.DISK}}; + + // Making sure SDD based nodes added to cluster. Adding SSD based + // datanodes. + startAdditionalDNs(config, 1, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + dfs.satisfyStoragePolicy(new Path(FILE)); + hdfsCluster.triggerHeartbeats(); + // Wait till StorgePolicySatisfier Identified that block to move to SSD + // areas + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.SSD, 1, 30000, dfs); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 2, 30000, + dfs); + } finally { + shutdownCluster(); + } + } + + /** + * Tests to verify that the block storage movement report will be propagated + * to Namenode via datanode heartbeat. + */ + @Test(timeout = 300000) + public void testBlksStorageMovementAttemptFinishedReport() throws Exception { + try { + createCluster(); + // Change policy to ONE_SSD + dfs.setStoragePolicy(new Path(FILE), ONE_SSD); + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.SSD, StorageType.DISK}}; + + // Making sure SDD based nodes added to cluster. Adding SSD based + // datanodes. + startAdditionalDNs(config, 1, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + dfs.satisfyStoragePolicy(new Path(FILE)); + hdfsCluster.triggerHeartbeats(); + + // Wait till the block is moved to SSD areas + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.SSD, 1, 30000, dfs); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 2, 30000, + dfs); + + waitForBlocksMovementAttemptReport(1, 30000); + } finally { + shutdownCluster(); + } + } + + /** + * Tests to verify that multiple files are giving to satisfy storage policy + * and should work well altogether. + */ + @Test(timeout = 300000) + public void testMultipleFilesForSatisfyStoragePolicy() throws Exception { + try { + createCluster(); + List files = new ArrayList<>(); + files.add(FILE); + + // Creates 4 more files. Send all of them for satisfying the storage + // policy together. + for (int i = 0; i < 4; i++) { + String file1 = "/testMoveWhenStoragePolicyNotSatisfying_" + i; + files.add(file1); + writeContent(file1); + } + // Change policy to ONE_SSD + for (String fileName : files) { + dfs.setStoragePolicy(new Path(fileName), ONE_SSD); + dfs.satisfyStoragePolicy(new Path(fileName)); + } + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.SSD, StorageType.DISK}}; + + // Making sure SDD based nodes added to cluster. Adding SSD based + // datanodes. + startAdditionalDNs(config, 1, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + hdfsCluster.triggerHeartbeats(); + + for (String fileName : files) { + // Wait till the block is moved to SSD areas + DFSTestUtil.waitExpectedStorageType( + fileName, StorageType.SSD, 1, 30000, dfs); + DFSTestUtil.waitExpectedStorageType( + fileName, StorageType.DISK, 2, 30000, dfs); + } + + waitForBlocksMovementAttemptReport(files.size(), 30000); + } finally { + shutdownCluster(); + } + } + + /** + * Tests to verify hdfsAdmin.satisfyStoragePolicy works well for file. + * @throws Exception + */ + @Test(timeout = 300000) + public void testSatisfyFileWithHdfsAdmin() throws Exception { + try { + createCluster(); + HdfsAdmin hdfsAdmin = + new HdfsAdmin(FileSystem.getDefaultUri(config), config); + // Change policy to COLD + dfs.setStoragePolicy(new Path(FILE), COLD); + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}}; + startAdditionalDNs(config, 3, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + + hdfsAdmin.satisfyStoragePolicy(new Path(FILE)); + + hdfsCluster.triggerHeartbeats(); + // Wait till namenode notified about the block location details + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.ARCHIVE, 3, 30000, + dfs); + } finally { + shutdownCluster(); + } + } + + /** + * Tests to verify hdfsAdmin.satisfyStoragePolicy works well for dir. + * @throws Exception + */ + @Test(timeout = 300000) + public void testSatisfyDirWithHdfsAdmin() throws Exception { + try { + createCluster(); + HdfsAdmin hdfsAdmin = + new HdfsAdmin(FileSystem.getDefaultUri(config), config); + final String subDir = "/subDir"; + final String subFile1 = subDir + "/subFile1"; + final String subDir2 = subDir + "/subDir2"; + final String subFile2 = subDir2 + "/subFile2"; + dfs.mkdirs(new Path(subDir)); + writeContent(subFile1); + dfs.mkdirs(new Path(subDir2)); + writeContent(subFile2); + + // Change policy to COLD + dfs.setStoragePolicy(new Path(subDir), ONE_SSD); + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.SSD, StorageType.DISK}}; + startAdditionalDNs(config, 1, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + + hdfsAdmin.satisfyStoragePolicy(new Path(subDir)); + + hdfsCluster.triggerHeartbeats(); + + // take effect for the file in the directory. + DFSTestUtil.waitExpectedStorageType( + subFile1, StorageType.SSD, 1, 30000, dfs); + DFSTestUtil.waitExpectedStorageType( + subFile1, StorageType.DISK, 2, 30000, dfs); + + // take no effect for the sub-dir's file in the directory. + DFSTestUtil.waitExpectedStorageType( + subFile2, StorageType.SSD, 1, 30000, dfs); + DFSTestUtil.waitExpectedStorageType( + subFile2, StorageType.DISK, 2, 30000, dfs); + } finally { + shutdownCluster(); + } + } + + /** + * Tests to verify hdfsAdmin.satisfyStoragePolicy exceptions. + * @throws Exception + */ + @Test(timeout = 300000) + public void testSatisfyWithExceptions() throws Exception { + try { + createCluster(); + final String nonExistingFile = "/noneExistingFile"; + hdfsCluster.getConfiguration(0). + setBoolean(DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY, false); + restartNamenode(); + HdfsAdmin hdfsAdmin = + new HdfsAdmin(FileSystem.getDefaultUri(config), config); + + try { + hdfsAdmin.satisfyStoragePolicy(new Path(FILE)); + Assert.fail(String.format( + "Should failed to satisfy storage policy " + + "for %s since %s is set to false.", + FILE, DFS_STORAGE_POLICY_ENABLED_KEY)); + } catch (IOException e) { + GenericTestUtils.assertExceptionContains(String.format( + "Failed to satisfy storage policy since %s is set to false.", + DFS_STORAGE_POLICY_ENABLED_KEY), e); + } + + hdfsCluster.getConfiguration(0). + setBoolean(DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY, true); + restartNamenode(); + + hdfsAdmin = new HdfsAdmin(FileSystem.getDefaultUri(config), config); + try { + hdfsAdmin.satisfyStoragePolicy(new Path(nonExistingFile)); + Assert.fail("Should throw FileNotFoundException for " + + nonExistingFile); + } catch (FileNotFoundException e) { + + } + + try { + hdfsAdmin.satisfyStoragePolicy(new Path(FILE)); + hdfsAdmin.satisfyStoragePolicy(new Path(FILE)); + } catch (Exception e) { + Assert.fail(String.format("Allow to invoke mutlipe times " + + "#satisfyStoragePolicy() api for a path %s , internally just " + + "skipping addtion to satisfy movement queue.", FILE)); + } + } finally { + shutdownCluster(); + } + } + + /** + * Tests to verify that for the given path, some of the blocks or block src + * locations(src nodes) under the given path will be scheduled for block + * movement. + * + * For example, there are two block for a file: + * + * File1 => blk_1[locations=A(DISK),B(DISK),C(DISK)], + * blk_2[locations=A(DISK),B(DISK),C(DISK)]. Now, set storage policy to COLD. + * Only one datanode is available with storage type ARCHIVE, say D. + * + * SPS will schedule block movement to the coordinator node with the details, + * blk_1[move A(DISK) -> D(ARCHIVE)], blk_2[move A(DISK) -> D(ARCHIVE)]. + */ + @Test(timeout = 300000) + public void testWhenOnlyFewTargetDatanodeAreAvailableToSatisfyStoragePolicy() + throws Exception { + try { + createCluster(); + // Change policy to COLD + dfs.setStoragePolicy(new Path(FILE), COLD); + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.ARCHIVE, StorageType.ARCHIVE}}; + + // Adding ARCHIVE based datanodes. + startAdditionalDNs(config, 1, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + + dfs.satisfyStoragePolicy(new Path(FILE)); + hdfsCluster.triggerHeartbeats(); + // Wait till StorgePolicySatisfier identified that block to move to + // ARCHIVE area. + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.ARCHIVE, 1, 30000, + dfs); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 2, 30000, + dfs); + + waitForBlocksMovementAttemptReport(1, 30000); + } finally { + shutdownCluster(); + } + } + + /** + * Tests to verify that for the given path, no blocks or block src + * locations(src nodes) under the given path will be scheduled for block + * movement as there are no available datanode with required storage type. + * + * For example, there are two block for a file: + * + * File1 => blk_1[locations=A(DISK),B(DISK),C(DISK)], + * blk_2[locations=A(DISK),B(DISK),C(DISK)]. Now, set storage policy to COLD. + * No datanode is available with storage type ARCHIVE. + * + * SPS won't schedule any block movement for this path. + */ + @Test(timeout = 300000) + public void testWhenNoTargetDatanodeToSatisfyStoragePolicy() + throws Exception { + try { + createCluster(); + // Change policy to COLD + dfs.setStoragePolicy(new Path(FILE), COLD); + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.DISK, StorageType.DISK}}; + // Adding DISK based datanodes + startAdditionalDNs(config, 1, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + + dfs.satisfyStoragePolicy(new Path(FILE)); + hdfsCluster.triggerHeartbeats(); + + // No block movement will be scheduled as there is no target node + // available with the required storage type. + waitForAttemptedItems(1, 30000); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 3, 30000, + dfs); + // Since there is no target node the item will get timed out and then + // re-attempted. + waitForAttemptedItems(1, 30000); + } finally { + shutdownCluster(); + } + } + + /** + * Test to verify that satisfy worker can't move blocks. If the given block is + * pinned it shouldn't be considered for retries. + */ + @Test(timeout = 120000) + public void testMoveWithBlockPinning() throws Exception { + try{ + config.setBoolean(DFSConfigKeys.DFS_DATANODE_BLOCK_PINNING_ENABLED, true); + hdfsCluster = startCluster(config, allDiskTypes, 3, 2, CAPACITY); + + hdfsCluster.waitActive(); + dfs = hdfsCluster.getFileSystem(); + + // create a file with replication factor 3 and mark 2 pinned block + // locations. + final String file1 = createFileAndSimulateFavoredNodes(2); + + // Change policy to COLD + dfs.setStoragePolicy(new Path(file1), COLD); + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.ARCHIVE, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.ARCHIVE}}; + // Adding DISK based datanodes + startAdditionalDNs(config, 3, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + + dfs.satisfyStoragePolicy(new Path(file1)); + hdfsCluster.triggerHeartbeats(); + + // No block movement will be scheduled as there is no target node + // available with the required storage type. + waitForAttemptedItems(1, 30000); + waitForBlocksMovementAttemptReport(1, 30000); + DFSTestUtil.waitExpectedStorageType( + file1, StorageType.ARCHIVE, 1, 30000, dfs); + DFSTestUtil.waitExpectedStorageType( + file1, StorageType.DISK, 2, 30000, dfs); + } finally { + shutdownCluster(); + } + } + + /** + * Tests to verify that for the given path, only few of the blocks or block + * src locations(src nodes) under the given path will be scheduled for block + * movement. + * + * For example, there are two block for a file: + * + * File1 => two blocks and default storage policy(HOT). + * blk_1[locations=A(DISK),B(DISK),C(DISK),D(DISK),E(DISK)], + * blk_2[locations=A(DISK),B(DISK),C(DISK),D(DISK),E(DISK)]. + * + * Now, set storage policy to COLD. + * Only two Dns are available with expected storage type ARCHIVE, say A, E. + * + * SPS will schedule block movement to the coordinator node with the details, + * blk_1[move A(DISK) -> A(ARCHIVE), move E(DISK) -> E(ARCHIVE)], + * blk_2[move A(DISK) -> A(ARCHIVE), move E(DISK) -> E(ARCHIVE)]. + */ + @Test(timeout = 300000) + public void testWhenOnlyFewSourceNodesHaveMatchingTargetNodes() + throws Exception { + try { + int numOfDns = 5; + config.setLong("dfs.block.size", 1024); + allDiskTypes = + new StorageType[][]{{StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.ARCHIVE}}; + hdfsCluster = startCluster(config, allDiskTypes, numOfDns, + STORAGES_PER_DATANODE, CAPACITY); + dfs = hdfsCluster.getFileSystem(); + writeContent(FILE, (short) 5); + + // Change policy to COLD + dfs.setStoragePolicy(new Path(FILE), COLD); + + dfs.satisfyStoragePolicy(new Path(FILE)); + hdfsCluster.triggerHeartbeats(); + // Wait till StorgePolicySatisfier identified that block to move to + // ARCHIVE area. + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.ARCHIVE, 2, 30000, + dfs); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 3, 30000, + dfs); + + waitForBlocksMovementAttemptReport(1, 30000); + } finally { + shutdownCluster(); + } + } + + /** + * Tests that moving block storage with in the same datanode. Let's say we + * have DN1[DISK,ARCHIVE], DN2[DISK, SSD], DN3[DISK,RAM_DISK] when + * storagepolicy set to ONE_SSD and request satisfyStoragePolicy, then block + * should move to DN2[SSD] successfully. + */ + @Test(timeout = 300000) + public void testBlockMoveInSameDatanodeWithONESSD() throws Exception { + StorageType[][] diskTypes = + new StorageType[][]{{StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.SSD}, + {StorageType.DISK, StorageType.RAM_DISK}}; + config.setLong("dfs.block.size", DEFAULT_BLOCK_SIZE); + try { + hdfsCluster = startCluster(config, diskTypes, NUM_OF_DATANODES, + STORAGES_PER_DATANODE, CAPACITY); + dfs = hdfsCluster.getFileSystem(); + writeContent(FILE); + + // Change policy to ONE_SSD + dfs.setStoragePolicy(new Path(FILE), ONE_SSD); + + dfs.satisfyStoragePolicy(new Path(FILE)); + hdfsCluster.triggerHeartbeats(); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.SSD, 1, 30000, dfs); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 2, 30000, + dfs); + + } finally { + shutdownCluster(); + } + } + + /** + * Tests that moving block storage with in the same datanode and remote node. + * Let's say we have DN1[DISK,ARCHIVE], DN2[ARCHIVE, SSD], DN3[DISK,DISK], + * DN4[DISK,DISK] when storagepolicy set to WARM and request + * satisfyStoragePolicy, then block should move to DN1[ARCHIVE] and + * DN2[ARCHIVE] successfully. + */ + @Test(timeout = 300000) + public void testBlockMoveInSameAndRemoteDatanodesWithWARM() throws Exception { + StorageType[][] diskTypes = + new StorageType[][]{{StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.SSD}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}}; + + config.setLong("dfs.block.size", DEFAULT_BLOCK_SIZE); + try { + hdfsCluster = startCluster(config, diskTypes, diskTypes.length, + STORAGES_PER_DATANODE, CAPACITY); + dfs = hdfsCluster.getFileSystem(); + writeContent(FILE); + + // Change policy to WARM + dfs.setStoragePolicy(new Path(FILE), "WARM"); + dfs.satisfyStoragePolicy(new Path(FILE)); + hdfsCluster.triggerHeartbeats(); + + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 1, 30000, + dfs); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.ARCHIVE, 2, 30000, + dfs); + } finally { + shutdownCluster(); + } + } + + /** + * If replica with expected storage type already exist in source DN then that + * DN should be skipped. + */ + @Test(timeout = 300000) + public void testSPSWhenReplicaWithExpectedStorageAlreadyAvailableInSource() + throws Exception { + StorageType[][] diskTypes = new StorageType[][] { + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.DISK, StorageType.ARCHIVE}}; + + try { + hdfsCluster = startCluster(config, diskTypes, diskTypes.length, + STORAGES_PER_DATANODE, CAPACITY); + dfs = hdfsCluster.getFileSystem(); + // 1. Write two replica on disk + DFSTestUtil.createFile(dfs, new Path(FILE), DEFAULT_BLOCK_SIZE, + (short) 2, 0); + // 2. Change policy to COLD, so third replica will be written to ARCHIVE. + dfs.setStoragePolicy(new Path(FILE), "COLD"); + + // 3.Change replication factor to 3. + dfs.setReplication(new Path(FILE), (short) 3); + + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 2, 30000, + dfs); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.ARCHIVE, 1, 30000, + dfs); + + // 4. Change policy to HOT, so we can move the all block to DISK. + dfs.setStoragePolicy(new Path(FILE), "HOT"); + + // 4. Satisfy the policy. + dfs.satisfyStoragePolicy(new Path(FILE)); + + // 5. Block should move successfully . + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 3, 30000, + dfs); + } finally { + shutdownCluster(); + } + } + + /** + * Tests that movements should not be assigned when there is no space in + * target DN. + */ + @Test(timeout = 300000) + public void testChooseInSameDatanodeWithONESSDShouldNotChooseIfNoSpace() + throws Exception { + StorageType[][] diskTypes = + new StorageType[][]{{StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.SSD}, + {StorageType.DISK, StorageType.DISK}}; + config.setLong("dfs.block.size", 2 * DEFAULT_BLOCK_SIZE); + long dnCapacity = 1024 * DEFAULT_BLOCK_SIZE + (2 * DEFAULT_BLOCK_SIZE - 1); + try { + hdfsCluster = startCluster(config, diskTypes, NUM_OF_DATANODES, + STORAGES_PER_DATANODE, dnCapacity); + dfs = hdfsCluster.getFileSystem(); + writeContent(FILE); + + // Change policy to ONE_SSD + dfs.setStoragePolicy(new Path(FILE), ONE_SSD); + Path filePath = new Path("/testChooseInSameDatanode"); + final FSDataOutputStream out = + dfs.create(filePath, false, 100, (short) 1, 2 * DEFAULT_BLOCK_SIZE); + try { + dfs.setStoragePolicy(filePath, ONE_SSD); + // Try to fill up SSD part by writing content + long remaining = dfs.getStatus().getRemaining() / (3 * 2); + for (int i = 0; i < remaining; i++) { + out.write(i); + } + } finally { + out.close(); + } + hdfsCluster.triggerHeartbeats(); + ArrayList dataNodes = hdfsCluster.getDataNodes(); + // Temporarily disable heart beats, so that we can assert whether any + // items schedules for DNs even though DN's does not have space to write. + // Disabling heart beats can keep scheduled items on DatanodeDescriptor + // itself. + for (DataNode dataNode : dataNodes) { + DataNodeTestUtils.setHeartbeatsDisabledForTests(dataNode, true); + } + dfs.satisfyStoragePolicy(new Path(FILE)); + + // Wait for items to be processed + waitForAttemptedItems(1, 30000); + + // Enable heart beats now + for (DataNode dataNode : dataNodes) { + DataNodeTestUtils.setHeartbeatsDisabledForTests(dataNode, false); + } + hdfsCluster.triggerHeartbeats(); + + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.DISK, 3, 30000, + dfs); + DFSTestUtil.waitExpectedStorageType(FILE, StorageType.SSD, 0, 30000, dfs); + } finally { + shutdownCluster(); + } + } + + /** + * Tests that Xattrs should be cleaned if satisfy storage policy called on EC + * file with unsuitable storage policy set. + * + * @throws Exception + */ + @Test(timeout = 300000) + public void testSPSShouldNotLeakXattrIfSatisfyStoragePolicyCallOnECFiles() + throws Exception { + StorageType[][] diskTypes = + new StorageType[][]{{StorageType.SSD, StorageType.DISK}, + {StorageType.SSD, StorageType.DISK}, + {StorageType.SSD, StorageType.DISK}, + {StorageType.SSD, StorageType.DISK}, + {StorageType.SSD, StorageType.DISK}, + {StorageType.DISK, StorageType.SSD}, + {StorageType.DISK, StorageType.SSD}, + {StorageType.DISK, StorageType.SSD}, + {StorageType.DISK, StorageType.SSD}, + {StorageType.DISK, StorageType.SSD}}; + + int defaultStripedBlockSize = + StripedFileTestUtil.getDefaultECPolicy().getCellSize() * 4; + config.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, defaultStripedBlockSize); + config.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L); + config.setLong(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, + 1L); + config.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, + false); + try { + hdfsCluster = startCluster(config, diskTypes, diskTypes.length, + STORAGES_PER_DATANODE, CAPACITY); + dfs = hdfsCluster.getFileSystem(); + dfs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); + + // set "/foo" directory with ONE_SSD storage policy. + ClientProtocol client = NameNodeProxies.createProxy(config, + hdfsCluster.getFileSystem(0).getUri(), ClientProtocol.class) + .getProxy(); + String fooDir = "/foo"; + client.mkdirs(fooDir, new FsPermission((short) 777), true); + // set an EC policy on "/foo" directory + client.setErasureCodingPolicy(fooDir, + StripedFileTestUtil.getDefaultECPolicy().getName()); + + // write file to fooDir + final String testFile = "/foo/bar"; + long fileLen = 20 * defaultStripedBlockSize; + DFSTestUtil.createFile(dfs, new Path(testFile), fileLen, (short) 3, 0); + + // ONESSD is unsuitable storage policy on EC files + client.setStoragePolicy(fooDir, HdfsConstants.ONESSD_STORAGE_POLICY_NAME); + dfs.satisfyStoragePolicy(new Path(testFile)); + + // Thread.sleep(9000); // To make sure SPS triggered + // verify storage types and locations + LocatedBlocks locatedBlocks = + client.getBlockLocations(testFile, 0, fileLen); + for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) { + for (StorageType type : lb.getStorageTypes()) { + Assert.assertEquals(StorageType.DISK, type); + } + } + + // Make sure satisfy xattr has been removed. + DFSTestUtil.waitForXattrRemoved(testFile, XATTR_SATISFY_STORAGE_POLICY, + hdfsCluster.getNamesystem(), 30000); + } finally { + shutdownCluster(); + } + } + + /** + * Test SPS with empty file. + * 1. Create one empty file. + * 2. Call satisfyStoragePolicy for empty file. + * 3. SPS should skip this file and xattr should not be added for empty file. + */ + @Test(timeout = 300000) + public void testSPSWhenFileLengthIsZero() throws Exception { + try { + hdfsCluster = startCluster(config, allDiskTypes, NUM_OF_DATANODES, + STORAGES_PER_DATANODE, CAPACITY); + hdfsCluster.waitActive(); + DistributedFileSystem fs = hdfsCluster.getFileSystem(); + Path filePath = new Path("/zeroSizeFile"); + DFSTestUtil.createFile(fs, filePath, 0, (short) 1, 0); + FSEditLog editlog = hdfsCluster.getNameNode().getNamesystem() + .getEditLog(); + long lastWrittenTxId = editlog.getLastWrittenTxId(); + fs.satisfyStoragePolicy(filePath); + Assert.assertEquals("Xattr should not be added for the file", + lastWrittenTxId, editlog.getLastWrittenTxId()); + INode inode = hdfsCluster.getNameNode().getNamesystem().getFSDirectory() + .getINode(filePath.toString()); + Assert.assertTrue("XAttrFeature should be null for file", + inode.getXAttrFeature() == null); + } finally { + shutdownCluster(); + } + } + + /** + * Test SPS for low redundant file blocks. + * 1. Create cluster with 3 datanode. + * 1. Create one file with 3 replica. + * 2. Set policy and call satisfyStoragePolicy for file. + * 3. Stop NameNode and Datanodes. + * 4. Start NameNode with 2 datanode and wait for block movement. + * 5. Start third datanode. + * 6. Third Datanode replica also should be moved in proper + * sorage based on policy. + */ + @Test(timeout = 300000) + public void testSPSWhenFileHasLowRedundancyBlocks() throws Exception { + try { + config.set(DFSConfigKeys + .DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_KEY, + "3000"); + config.set(DFSConfigKeys + .DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_KEY, + "5000"); + StorageType[][] newtypes = new StorageType[][] { + {StorageType.ARCHIVE, StorageType.DISK}, + {StorageType.ARCHIVE, StorageType.DISK}, + {StorageType.ARCHIVE, StorageType.DISK}}; + hdfsCluster = startCluster(config, newtypes, 3, 2, CAPACITY); + hdfsCluster.waitActive(); + DistributedFileSystem fs = hdfsCluster.getFileSystem(); + Path filePath = new Path("/zeroSizeFile"); + DFSTestUtil.createFile(fs, filePath, 1024, (short) 3, 0); + fs.setStoragePolicy(filePath, "COLD"); + List list = new ArrayList<>(); + list.add(hdfsCluster.stopDataNode(0)); + list.add(hdfsCluster.stopDataNode(0)); + list.add(hdfsCluster.stopDataNode(0)); + restartNamenode(); + hdfsCluster.restartDataNode(list.get(0), false); + hdfsCluster.restartDataNode(list.get(1), false); + hdfsCluster.waitActive(); + fs.satisfyStoragePolicy(filePath); + DFSTestUtil.waitExpectedStorageType(filePath.toString(), + StorageType.ARCHIVE, 2, 30000, hdfsCluster.getFileSystem()); + hdfsCluster.restartDataNode(list.get(2), false); + DFSTestUtil.waitExpectedStorageType(filePath.toString(), + StorageType.ARCHIVE, 3, 30000, hdfsCluster.getFileSystem()); + } finally { + shutdownCluster(); + } + } + + /** + * Test SPS for extra redundant file blocks. + * 1. Create cluster with 5 datanode. + * 2. Create one file with 5 replica. + * 3. Set file replication to 3. + * 4. Set policy and call satisfyStoragePolicy for file. + * 5. Block should be moved successfully. + */ + @Test(timeout = 300000) + public void testSPSWhenFileHasExcessRedundancyBlocks() throws Exception { + try { + config.set(DFSConfigKeys + .DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_KEY, + "3000"); + config.set(DFSConfigKeys + .DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_KEY, + "5000"); + StorageType[][] newtypes = new StorageType[][] { + {StorageType.ARCHIVE, StorageType.DISK}, + {StorageType.ARCHIVE, StorageType.DISK}, + {StorageType.ARCHIVE, StorageType.DISK}, + {StorageType.ARCHIVE, StorageType.DISK}, + {StorageType.ARCHIVE, StorageType.DISK}}; + hdfsCluster = startCluster(config, newtypes, 5, 2, CAPACITY); + hdfsCluster.waitActive(); + DistributedFileSystem fs = hdfsCluster.getFileSystem(); + Path filePath = new Path("/zeroSizeFile"); + DFSTestUtil.createFile(fs, filePath, 1024, (short) 5, 0); + fs.setReplication(filePath, (short) 3); + LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs( + LoggerFactory.getLogger(BlockStorageMovementAttemptedItems.class)); + fs.setStoragePolicy(filePath, "COLD"); + fs.satisfyStoragePolicy(filePath); + DFSTestUtil.waitExpectedStorageType(filePath.toString(), + StorageType.ARCHIVE, 3, 60000, hdfsCluster.getFileSystem()); + assertFalse("Log output does not contain expected log message: ", + logs.getOutput().contains("some of the blocks are low redundant")); + } finally { + shutdownCluster(); + } + } + + /** + * Test SPS for empty directory, xAttr should be removed. + */ + @Test(timeout = 300000) + public void testSPSForEmptyDirectory() throws IOException, TimeoutException, + InterruptedException { + try { + hdfsCluster = startCluster(config, allDiskTypes, NUM_OF_DATANODES, + STORAGES_PER_DATANODE, CAPACITY); + hdfsCluster.waitActive(); + DistributedFileSystem fs = hdfsCluster.getFileSystem(); + Path emptyDir = new Path("/emptyDir"); + fs.mkdirs(emptyDir); + fs.satisfyStoragePolicy(emptyDir); + // Make sure satisfy xattr has been removed. + DFSTestUtil.waitForXattrRemoved("/emptyDir", + XATTR_SATISFY_STORAGE_POLICY, hdfsCluster.getNamesystem(), 30000); + } finally { + shutdownCluster(); + } + } + + /** + * Test SPS for not exist directory. + */ + @Test(timeout = 300000) + public void testSPSForNonExistDirectory() throws Exception { + try { + hdfsCluster = startCluster(config, allDiskTypes, NUM_OF_DATANODES, + STORAGES_PER_DATANODE, CAPACITY); + hdfsCluster.waitActive(); + DistributedFileSystem fs = hdfsCluster.getFileSystem(); + Path emptyDir = new Path("/emptyDir"); + try { + fs.satisfyStoragePolicy(emptyDir); + fail("FileNotFoundException should throw"); + } catch (FileNotFoundException e) { + // nothing to do + } + } finally { + shutdownCluster(); + } + } + + /** + * Test SPS for directory tree which doesn't have files. + */ + @Test(timeout = 300000) + public void testSPSWithDirectoryTreeWithoutFile() throws Exception { + try { + hdfsCluster = startCluster(config, allDiskTypes, NUM_OF_DATANODES, + STORAGES_PER_DATANODE, CAPACITY); + hdfsCluster.waitActive(); + // Create directories + /* + * root + * | + * A--------C--------D + * | + * G----H----I + * | + * O + */ + DistributedFileSystem fs = hdfsCluster.getFileSystem(); + fs.mkdirs(new Path("/root/C/H/O")); + fs.mkdirs(new Path("/root/A")); + fs.mkdirs(new Path("/root/D")); + fs.mkdirs(new Path("/root/C/G")); + fs.mkdirs(new Path("/root/C/I")); + fs.satisfyStoragePolicy(new Path("/root")); + // Make sure satisfy xattr has been removed. + DFSTestUtil.waitForXattrRemoved("/root", + XATTR_SATISFY_STORAGE_POLICY, hdfsCluster.getNamesystem(), 30000); + } finally { + shutdownCluster(); + } + } + + /** + * Test SPS for directory which has multilevel directories. + */ + @Test(timeout = 300000) + public void testMultipleLevelDirectoryForSatisfyStoragePolicy() + throws Exception { + try { + StorageType[][] diskTypes = new StorageType[][] { + {StorageType.DISK, StorageType.ARCHIVE}, + {StorageType.ARCHIVE, StorageType.SSD}, + {StorageType.DISK, StorageType.DISK}}; + config.setLong("dfs.block.size", DEFAULT_BLOCK_SIZE); + hdfsCluster = startCluster(config, diskTypes, diskTypes.length, + STORAGES_PER_DATANODE, CAPACITY); + dfs = hdfsCluster.getFileSystem(); + createDirectoryTree(dfs); + + List files = getDFSListOfTree(); + dfs.setStoragePolicy(new Path("/root"), COLD); + dfs.satisfyStoragePolicy(new Path("/root")); + for (String fileName : files) { + // Wait till the block is moved to ARCHIVE + DFSTestUtil.waitExpectedStorageType(fileName, StorageType.ARCHIVE, 2, + 30000, dfs); + } + } finally { + shutdownCluster(); + } + } + + /** + * Test storage move blocks while under replication block tasks exists in the + * system. So, both will share the max transfer streams. + * + * 1. Create cluster with 3 datanode. + * 2. Create 20 files with 2 replica. + * 3. Start 2 more DNs with DISK & SSD types + * 4. SetReplication factor for the 1st 10 files to 4 to trigger replica task + * 5. Set policy to SSD to the 2nd set of files from 11-20 + * 6. Call SPS for 11-20 files to trigger move block tasks to new DNs + * 7. Wait for the under replica and SPS tasks completion + */ + @Test(timeout = 300000) + public void testMoveBlocksWithUnderReplicatedBlocks() throws Exception { + try { + config.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 3); + config.setLong("dfs.block.size", DEFAULT_BLOCK_SIZE); + config.set(DFSConfigKeys + .DFS_STORAGE_POLICY_SATISFIER_RECHECK_TIMEOUT_MILLIS_KEY, + "3000"); + config.set(DFSConfigKeys + .DFS_STORAGE_POLICY_SATISFIER_SELF_RETRY_TIMEOUT_MILLIS_KEY, + "5000"); + + StorageType[][] storagetypes = new StorageType[][] { + {StorageType.ARCHIVE, StorageType.DISK}, + {StorageType.ARCHIVE, StorageType.DISK}}; + + hdfsCluster = startCluster(config, storagetypes, 2, 2, CAPACITY); + hdfsCluster.waitActive(); + dfs = hdfsCluster.getFileSystem(); + + // Below files will be used for pending replication block tasks. + for (int i=1; i<=20; i++){ + Path filePath = new Path("/file" + i); + DFSTestUtil.createFile(dfs, filePath, DEFAULT_BLOCK_SIZE * 5, (short) 2, + 0); + } + + StorageType[][] newtypes = + new StorageType[][]{{StorageType.DISK, StorageType.SSD}, + {StorageType.DISK, StorageType.SSD}}; + startAdditionalDNs(config, 2, NUM_OF_DATANODES, newtypes, + STORAGES_PER_DATANODE, CAPACITY, hdfsCluster); + + // increase replication factor to 4 for the first 10 files and thus + // initiate replica tasks + for (int i=1; i<=10; i++){ + Path filePath = new Path("/file" + i); + dfs.setReplication(filePath, (short) 4); + } + + // invoke SPS for 11-20 files + for (int i = 11; i <= 20; i++) { + Path filePath = new Path("/file" + i); + dfs.setStoragePolicy(filePath, "ALL_SSD"); + dfs.satisfyStoragePolicy(filePath); + } + + for (int i = 1; i <= 10; i++) { + Path filePath = new Path("/file" + i); + DFSTestUtil.waitExpectedStorageType(filePath.toString(), + StorageType.DISK, 4, 60000, hdfsCluster.getFileSystem()); + } + for (int i = 11; i <= 20; i++) { + Path filePath = new Path("/file" + i); + DFSTestUtil.waitExpectedStorageType(filePath.toString(), + StorageType.SSD, 2, 30000, hdfsCluster.getFileSystem()); + } + } finally { + shutdownCluster(); + } + } + + private static void createDirectoryTree(DistributedFileSystem dfs) + throws Exception { + // tree structure + /* + * root + * | + * A--------B--------C--------D--------E + * | | + * F----G----H----I J----K----L----M + * | | + * N----O----P Q----R----S + * | | + * T U + */ + // create root Node and child + dfs.mkdirs(new Path("/root")); + DFSTestUtil.createFile(dfs, new Path("/root/A"), 1024, (short) 3, 0); + dfs.mkdirs(new Path("/root/B")); + DFSTestUtil.createFile(dfs, new Path("/root/C"), 1024, (short) 3, 0); + dfs.mkdirs(new Path("/root/D")); + DFSTestUtil.createFile(dfs, new Path("/root/E"), 1024, (short) 3, 0); + + // Create /root/B child + DFSTestUtil.createFile(dfs, new Path("/root/B/F"), 1024, (short) 3, 0); + dfs.mkdirs(new Path("/root/B/G")); + DFSTestUtil.createFile(dfs, new Path("/root/B/H"), 1024, (short) 3, 0); + DFSTestUtil.createFile(dfs, new Path("/root/B/I"), 1024, (short) 3, 0); + + // Create /root/D child + DFSTestUtil.createFile(dfs, new Path("/root/D/J"), 1024, (short) 3, 0); + DFSTestUtil.createFile(dfs, new Path("/root/D/K"), 1024, (short) 3, 0); + dfs.mkdirs(new Path("/root/D/L")); + DFSTestUtil.createFile(dfs, new Path("/root/D/M"), 1024, (short) 3, 0); + + // Create /root/B/G child + DFSTestUtil.createFile(dfs, new Path("/root/B/G/N"), 1024, (short) 3, 0); + DFSTestUtil.createFile(dfs, new Path("/root/B/G/O"), 1024, (short) 3, 0); + dfs.mkdirs(new Path("/root/B/G/P")); + + // Create /root/D/L child + dfs.mkdirs(new Path("/root/D/L/Q")); + DFSTestUtil.createFile(dfs, new Path("/root/D/L/R"), 1024, (short) 3, 0); + DFSTestUtil.createFile(dfs, new Path("/root/D/L/S"), 1024, (short) 3, 0); + + // Create /root/B/G/P child + DFSTestUtil.createFile(dfs, new Path("/root/B/G/P/T"), 1024, (short) 3, 0); + + // Create /root/D/L/Q child + DFSTestUtil.createFile(dfs, new Path("/root/D/L/Q/U"), 1024, (short) 3, 0); + } + + private List getDFSListOfTree() { + List dfsList = new ArrayList<>(); + dfsList.add("/root/A"); + dfsList.add("/root/B/F"); + dfsList.add("/root/B/G/N"); + dfsList.add("/root/B/G/O"); + dfsList.add("/root/B/G/P/T"); + dfsList.add("/root/B/H"); + dfsList.add("/root/B/I"); + dfsList.add("/root/C"); + dfsList.add("/root/D/J"); + dfsList.add("/root/D/K"); + dfsList.add("/root/D/L/Q/U"); + dfsList.add("/root/D/L/R"); + dfsList.add("/root/D/L/S"); + dfsList.add("/root/D/M"); + dfsList.add("/root/E"); + return dfsList; + } + + private String createFileAndSimulateFavoredNodes(int favoredNodesCount) + throws IOException { + ArrayList dns = hdfsCluster.getDataNodes(); + final String file1 = "/testMoveWithBlockPinning"; + // replication factor 3 + InetSocketAddress[] favoredNodes = new InetSocketAddress[favoredNodesCount]; + for (int i = 0; i < favoredNodesCount; i++) { + favoredNodes[i] = dns.get(i).getXferAddress(); + } + DFSTestUtil.createFile(dfs, new Path(file1), false, 1024, 100, + DEFAULT_BLOCK_SIZE, (short) 3, 0, false, favoredNodes); + + LocatedBlocks locatedBlocks = dfs.getClient().getLocatedBlocks(file1, 0); + Assert.assertEquals("Wrong block count", 1, + locatedBlocks.locatedBlockCount()); + + // verify storage type before movement + LocatedBlock lb = locatedBlocks.get(0); + StorageType[] storageTypes = lb.getStorageTypes(); + for (StorageType storageType : storageTypes) { + Assert.assertTrue(StorageType.DISK == storageType); + } + + // Mock FsDatasetSpi#getPinning to show that the block is pinned. + DatanodeInfo[] locations = lb.getLocations(); + Assert.assertEquals(3, locations.length); + Assert.assertTrue(favoredNodesCount < locations.length); + for(DatanodeInfo dnInfo: locations){ + LOG.info("Simulate block pinning in datanode {}", + locations[favoredNodesCount]); + DataNode dn = hdfsCluster.getDataNode(dnInfo.getIpcPort()); + InternalDataNodeTestUtils.mockDatanodeBlkPinning(dn, true); + favoredNodesCount--; + if (favoredNodesCount <= 0) { + break; // marked favoredNodesCount number of pinned block location + } + } + return file1; + } + + public void waitForAttemptedItems(long expectedBlkMovAttemptedCount, + int timeout) throws TimeoutException, InterruptedException { + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + LOG.info("expectedAttemptedItemsCount={} actualAttemptedItemsCount={}", + expectedBlkMovAttemptedCount, + ((BlockStorageMovementAttemptedItems) (externalSps + .getAttemptedItemsMonitor())).getAttemptedItemsCount()); + return ((BlockStorageMovementAttemptedItems) (externalSps + .getAttemptedItemsMonitor())) + .getAttemptedItemsCount() == expectedBlkMovAttemptedCount; + } + }, 100, timeout); + } + + public void waitForBlocksMovementAttemptReport( + long expectedMovementFinishedBlocksCount, int timeout) + throws TimeoutException, InterruptedException { + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + int actualCount = externalSps.getAttemptedItemsMonitor() + .getAttemptedItemsCount(); + LOG.info("MovementFinishedBlocks: expectedCount={} actualCount={}", + expectedMovementFinishedBlocksCount, actualCount); + return actualCount + >= expectedMovementFinishedBlocksCount; + } + }, 100, timeout); + } + + public void writeContent(final String fileName) throws IOException { + writeContent(fileName, (short) 3); + } + + private void writeContent(final String fileName, short replicatonFactor) + throws IOException { + // write to DISK + final FSDataOutputStream out = dfs.create(new Path(fileName), + replicatonFactor); + for (int i = 0; i < 1024; i++) { + out.write(i); + } + out.close(); + } + + private void startAdditionalDNs(final Configuration conf, + int newNodesRequired, int existingNodesNum, StorageType[][] newTypes, + int storagesPerDn, long nodeCapacity, final MiniDFSCluster cluster) + throws IOException { + long[][] capacities; + existingNodesNum += newNodesRequired; + capacities = new long[newNodesRequired][storagesPerDn]; + for (int i = 0; i < newNodesRequired; i++) { + for (int j = 0; j < storagesPerDn; j++) { + capacities[i][j] = nodeCapacity; + } + } + + cluster.startDataNodes(conf, newNodesRequired, newTypes, true, null, null, + null, capacities, null, false, false, false, null); + cluster.triggerHeartbeats(); + } + + /** + * Implementation of listener callback, where it collects all the sps move + * attempted blocks for assertion. + */ + public static final class ExternalBlockMovementListener + implements BlockMovementListener { + + private List actualBlockMovements = new ArrayList<>(); + + @Override + public void notifyMovementTriedBlocks(Block[] moveAttemptFinishedBlks) { + for (Block block : moveAttemptFinishedBlks) { + actualBlockMovements.add(block); + } + LOG.info("Movement attempted blocks:{}", actualBlockMovements); + } + + public List getActualBlockMovements() { + return actualBlockMovements; + } + + public void clear() { + actualBlockMovements.clear(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/shortcircuit/TestShortCircuitCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/shortcircuit/TestShortCircuitCache.java index ac29c3c33f7..b2da6a2fca3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/shortcircuit/TestShortCircuitCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/shortcircuit/TestShortCircuitCache.java @@ -37,8 +37,8 @@ import net.jcip.annotations.NotThreadSafe; import org.apache.commons.collections.map.LinkedMap; import org.apache.commons.lang3.mutable.MutableBoolean; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; @@ -93,7 +93,8 @@ @NotThreadSafe public class TestShortCircuitCache { - static final Log LOG = LogFactory.getLog(TestShortCircuitCache.class); + static final Logger LOG = + LoggerFactory.getLogger(TestShortCircuitCache.class); private static class TestFileDescriptorPair { final TemporarySocketDirectory dir = new TemporarySocketDirectory(); @@ -126,7 +127,7 @@ public TestFileDescriptorPair() throws IOException { } public void close() throws IOException { - IOUtils.cleanup(LOG, fis); + IOUtils.cleanupWithLogger(LOG, fis); dir.close(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java index 12452473d0f..015c9a45c99 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java @@ -27,9 +27,9 @@ import com.google.common.collect.Lists; import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.text.StrBuilder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.apache.commons.text.TextStringBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.ReconfigurationUtil; import org.apache.hadoop.fs.ChecksumException; @@ -99,7 +99,7 @@ * set/clrSpaceQuote are tested in {@link org.apache.hadoop.hdfs.TestQuota}. */ public class TestDFSAdmin { - private static final Log LOG = LogFactory.getLog(TestDFSAdmin.class); + private static final Logger LOG = LoggerFactory.getLogger(TestDFSAdmin.class); private Configuration conf = null; private MiniDFSCluster cluster; private DFSAdmin admin; @@ -394,7 +394,7 @@ public void testNameNodeGetReconfigurableProperties() throws IOException { final List outs = Lists.newArrayList(); final List errs = Lists.newArrayList(); getReconfigurableProperties("namenode", address, outs, errs); - assertEquals(6, outs.size()); + assertEquals(7, outs.size()); assertEquals(DFS_HEARTBEAT_INTERVAL_KEY, outs.get(1)); assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, outs.get(2)); assertEquals(errs.size(), 0); @@ -518,7 +518,7 @@ public void testNameNodeGetReconfigurationStatus() throws IOException, } private static String scanIntoString(final ByteArrayOutputStream baos) { - final StrBuilder sb = new StrBuilder(); + final TextStringBuilder sb = new TextStringBuilder(); final Scanner scanner = new Scanner(baos.toString()); while (scanner.hasNextLine()) { sb.appendln(scanner.nextLine()); @@ -579,7 +579,7 @@ public void testReportCommand() throws Exception { // Verify report command for all counts to be zero resetStream(); assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client); + verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client, 0L, 0L); final short replFactor = 1; final long fileLength = 512L; @@ -614,7 +614,7 @@ public void testReportCommand() throws Exception { // Verify report command for all counts to be zero resetStream(); assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client); + verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client, 0L, 0L); // Choose a DataNode to shutdown final List datanodes = miniCluster.getDataNodes(); @@ -636,7 +636,7 @@ public void testReportCommand() throws Exception { // Verify report command to show dead DataNode assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, 0, client); + verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, 0, client, 0L, 1L); // Corrupt the replicated block final int blockFilesCorrupted = miniCluster @@ -664,7 +664,7 @@ public void testReportCommand() throws Exception { // verify report command for corrupt replicated block resetStream(); assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 0, client); + verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 0, client, 0L, 1L); lbs = miniCluster.getFileSystem().getClient(). getNamenode().getBlockLocations( @@ -689,7 +689,7 @@ public void testReportCommand() throws Exception { // and EC block group resetStream(); assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 1, client); + verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 1, client, 0L, 0L); } } @@ -834,7 +834,10 @@ private void verifyNodesAndCorruptBlocks( final int numLiveDn, final int numCorruptBlocks, final int numCorruptECBlockGroups, - final DFSClient client) throws IOException { + final DFSClient client, + final Long highestPriorityLowRedundancyReplicatedBlocks, + final Long highestPriorityLowRedundancyECBlocks) + throws IOException { /* init vars */ final String outStr = scanIntoString(out); @@ -847,12 +850,23 @@ private void verifyNodesAndCorruptBlocks( final String expectedCorruptedECBlockGroupsStr = String.format( "Block groups with corrupt internal blocks: %d", numCorruptECBlockGroups); + final String highestPriorityLowRedundancyReplicatedBlocksStr + = String.format( + "\tLow redundancy blocks with highest priority " + + "to recover: %d", + highestPriorityLowRedundancyReplicatedBlocks); + final String highestPriorityLowRedundancyECBlocksStr = String.format( + "\tLow redundancy blocks with highest priority " + + "to recover: %d", + highestPriorityLowRedundancyReplicatedBlocks); // verify nodes and corrupt blocks assertThat(outStr, is(allOf( containsString(expectedLiveNodesStr), containsString(expectedCorruptedBlocksStr), - containsString(expectedCorruptedECBlockGroupsStr)))); + containsString(expectedCorruptedECBlockGroupsStr), + containsString(highestPriorityLowRedundancyReplicatedBlocksStr), + containsString(highestPriorityLowRedundancyECBlocksStr)))); assertEquals( numDn, @@ -867,8 +881,12 @@ private void verifyNodesAndCorruptBlocks( client.getCorruptBlocksCount()); assertEquals(numCorruptBlocks, client.getNamenode() .getReplicatedBlockStats().getCorruptBlocks()); + assertEquals(highestPriorityLowRedundancyReplicatedBlocks, client.getNamenode() + .getReplicatedBlockStats().getHighestPriorityLowRedundancyBlocks()); assertEquals(numCorruptECBlockGroups, client.getNamenode() .getECBlockGroupStats().getCorruptBlockGroups()); + assertEquals(highestPriorityLowRedundancyECBlocks, client.getNamenode() + .getECBlockGroupStats().getHighestPriorityLowRedundancyBlocks()); } @Test diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java index f1f5793438f..528ac4b5dfa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java @@ -27,8 +27,8 @@ import java.io.IOException; import java.io.PrintStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; @@ -52,7 +52,8 @@ import com.google.common.base.Joiner; public class TestDFSHAAdmin { - private static final Log LOG = LogFactory.getLog(TestDFSHAAdmin.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDFSHAAdmin.class); private DFSHAAdmin tool; private final ByteArrayOutputStream errOutBytes = new ByteArrayOutputStream(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java index a21a31d9e0f..74b5af16d88 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java @@ -26,8 +26,8 @@ import java.io.IOException; import java.io.PrintStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAAdmin; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; @@ -38,7 +38,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Shell; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -52,10 +52,11 @@ */ public class TestDFSHAAdminMiniCluster { static { - GenericTestUtils.setLogLevel(LogFactory.getLog(HAAdmin.class), - Level.ALL); + GenericTestUtils.setLogLevel(LoggerFactory.getLogger(HAAdmin.class), + Level.TRACE); } - private static final Log LOG = LogFactory.getLog(TestDFSHAAdminMiniCluster.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDFSHAAdminMiniCluster.class); private MiniDFSCluster cluster; private Configuration conf; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestStoragePolicyCommands.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestStoragePolicyCommands.java index f31c7399503..ad776844599 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestStoragePolicyCommands.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestStoragePolicyCommands.java @@ -23,10 +23,13 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; import org.junit.After; import org.junit.Before; @@ -46,7 +49,12 @@ @Before public void clusterSetUp() throws IOException, URISyntaxException { conf = new HdfsConfiguration(); - cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPL).build(); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + StorageType[][] newtypes = new StorageType[][] { + {StorageType.ARCHIVE, StorageType.DISK}}; + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPL) + .storageTypes(newtypes).build(); cluster.waitActive(); fs = cluster.getFileSystem(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestStoragePolicySatisfyAdminCommands.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestStoragePolicySatisfyAdminCommands.java new file mode 100644 index 00000000000..61fccfac544 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestStoragePolicySatisfyAdminCommands.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools; + +import java.io.IOException; +import java.net.URISyntaxException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.namenode.sps.Context; +import org.apache.hadoop.hdfs.server.namenode.sps.StoragePolicySatisfier; +import org.apache.hadoop.hdfs.server.sps.ExternalSPSContext; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test StoragePolicySatisfy admin commands. + */ +public class TestStoragePolicySatisfyAdminCommands { + private static final short REPL = 1; + private static final int SIZE = 128; + + private Configuration conf = null; + private MiniDFSCluster cluster = null; + private DistributedFileSystem dfs = null; + private StoragePolicySatisfier externalSps = null; + + @Before + public void clusterSetUp() throws IOException, URISyntaxException { + conf = new HdfsConfiguration(); + conf.set(DFSConfigKeys.DFS_STORAGE_POLICY_SATISFIER_MODE_KEY, + StoragePolicySatisfierMode.EXTERNAL.toString()); + // Reduced refresh cycle to update latest datanodes. + conf.setLong(DFSConfigKeys.DFS_SPS_DATANODE_CACHE_REFRESH_INTERVAL_MS, + 1000); + StorageType[][] newtypes = new StorageType[][] { + {StorageType.ARCHIVE, StorageType.DISK}}; + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPL) + .storageTypes(newtypes).build(); + cluster.waitActive(); + dfs = cluster.getFileSystem(); + NameNodeConnector nnc = DFSTestUtil.getNameNodeConnector(conf, + HdfsServerConstants.MOVER_ID_PATH, 1, false); + + StoragePolicySatisfier externalSps = new StoragePolicySatisfier(conf); + Context externalCtxt = new ExternalSPSContext(externalSps, nnc); + + externalSps.init(externalCtxt); + externalSps.start(StoragePolicySatisfierMode.EXTERNAL); + } + + @After + public void clusterShutdown() throws IOException{ + if(dfs != null) { + dfs.close(); + dfs = null; + } + if(cluster != null) { + cluster.shutdown(); + cluster = null; + } + if (externalSps != null) { + externalSps.stopGracefully(); + } + } + + @Test(timeout = 30000) + public void testStoragePolicySatisfierCommand() throws Exception { + final String file = "/testStoragePolicySatisfierCommand"; + DFSTestUtil.createFile(dfs, new Path(file), SIZE, REPL, 0); + + final StoragePolicyAdmin admin = new StoragePolicyAdmin(conf); + DFSTestUtil.toolRun(admin, "-getStoragePolicy -path " + file, 0, + "The storage policy of " + file + " is unspecified"); + + DFSTestUtil.toolRun(admin, + "-setStoragePolicy -path " + file + " -policy COLD", 0, + "Set storage policy COLD on " + file.toString()); + + DFSTestUtil.toolRun(admin, "-satisfyStoragePolicy -path " + file, 0, + "Scheduled blocks to move based on the current storage policy on " + + file.toString()); + + DFSTestUtil.waitExpectedStorageType(file, StorageType.ARCHIVE, 1, 30000, + dfs); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java index 9e1fa79a52f..80155691b05 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java @@ -31,8 +31,8 @@ import java.util.Map; import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes; import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion; @@ -50,8 +50,8 @@ import com.google.common.collect.ImmutableSet; public class TestOfflineEditsViewer { - private static final Log LOG = LogFactory - .getLog(TestOfflineEditsViewer.class); + private static final Logger LOG = LoggerFactory + .getLogger(TestOfflineEditsViewer.class); private static final String buildDir = PathUtils .getTestDirName(TestOfflineEditsViewer.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java index c84237cb836..16b35dc6a28 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java @@ -27,6 +27,8 @@ import static org.apache.hadoop.fs.permission.FsAction.ALL; import static org.apache.hadoop.fs.permission.FsAction.EXECUTE; import static org.apache.hadoop.fs.permission.FsAction.READ_EXECUTE; + +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyState; import static org.apache.hadoop.hdfs.server.namenode.AclTestHelpers.aclEntry; @@ -77,8 +79,8 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.io.output.NullOutputStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataOutputStream; @@ -106,7 +108,7 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.LambdaTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -124,7 +126,8 @@ import com.google.common.collect.Maps; public class TestOfflineImageViewer { - private static final Log LOG = LogFactory.getLog(OfflineImageViewerPB.class); + private static final Logger LOG = + LoggerFactory.getLogger(OfflineImageViewerPB.class); private static final int NUM_DIRS = 3; private static final int FILES_PER_DIR = 4; private static final String TEST_RENEWER = "JobTracker"; @@ -208,6 +211,21 @@ public static void createOriginalFSImage() throws IOException { writtenFiles.put(entityRefXMLDir.toString(), hdfs.getFileStatus(entityRefXMLDir)); + //Create directories with new line characters + Path newLFDir = new Path("/dirContainingNewLineChar" + + StringUtils.LF + "here"); + hdfs.mkdirs(newLFDir); + dirCount++; + writtenFiles.put("\"/dirContainingNewLineChar%x0Ahere\"", + hdfs.getFileStatus(newLFDir)); + + Path newCRLFDir = new Path("/dirContainingNewLineChar" + + PBImageDelimitedTextWriter.CRLF + "here"); + hdfs.mkdirs(newCRLFDir); + dirCount++; + writtenFiles.put("\"/dirContainingNewLineChar%x0D%x0Ahere\"", + hdfs.getFileStatus(newCRLFDir)); + //Create a directory with sticky bits Path stickyBitDir = new Path("/stickyBit"); hdfs.mkdirs(stickyBitDir); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForAcl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForAcl.java index c66c2deb575..5ecec2db3ba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForAcl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForAcl.java @@ -37,8 +37,8 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -79,8 +79,8 @@ */ public class TestOfflineImageViewerForAcl { - private static final Log LOG = - LogFactory.getLog(TestOfflineImageViewerForAcl.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestOfflineImageViewerForAcl.class); private static File originalFsimage = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForContentSummary.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForContentSummary.java index b758c26726e..360ed56e1a5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForContentSummary.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForContentSummary.java @@ -23,8 +23,8 @@ import java.net.HttpURLConnection; import java.net.URI; import java.net.URL; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSDataOutputStream; @@ -45,8 +45,8 @@ */ public class TestOfflineImageViewerForContentSummary { - private static final Log LOG = LogFactory - .getLog(TestOfflineImageViewerForContentSummary.class); + private static final Logger LOG = LoggerFactory + .getLogger(TestOfflineImageViewerForContentSummary.class); private static File originalFsimage = null; private static ContentSummary summaryFromDFS = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForXAttr.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForXAttr.java index 6c82101c665..74069b0e48c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForXAttr.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerForXAttr.java @@ -30,8 +30,8 @@ import java.util.Map; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -53,8 +53,8 @@ */ public class TestOfflineImageViewerForXAttr { - private static final Log LOG = LogFactory - .getLog(TestOfflineImageViewerForXAttr.class); + private static final Logger LOG = LoggerFactory + .getLogger(TestOfflineImageViewerForXAttr.class); private static File originalFsimage = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestLightWeightHashSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestLightWeightHashSet.java index 50af25582a7..6507bf32c91 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestLightWeightHashSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestLightWeightHashSet.java @@ -30,16 +30,16 @@ import java.util.List; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.util.Time; import org.junit.Before; import org.junit.Test; public class TestLightWeightHashSet{ - private static final Log LOG = LogFactory - .getLog("org.apache.hadoop.hdfs.TestLightWeightHashSet"); + private static final Logger LOG = LoggerFactory + .getLogger("org.apache.hadoop.hdfs.TestLightWeightHashSet"); private final ArrayList list = new ArrayList(); private final int NUM = 100; private LightWeightHashSet set; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestLightWeightLinkedSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestLightWeightLinkedSet.java index f923920946f..6c55f28637a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestLightWeightLinkedSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestLightWeightLinkedSet.java @@ -27,16 +27,16 @@ import java.util.List; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.util.Time; import org.junit.Before; import org.junit.Test; public class TestLightWeightLinkedSet { - private static final Log LOG = LogFactory - .getLog("org.apache.hadoop.hdfs.TestLightWeightLinkedSet"); + private static final Logger LOG = LoggerFactory + .getLogger("org.apache.hadoop.hdfs.TestLightWeightLinkedSet"); private final ArrayList list = new ArrayList(); private final int NUM = 100; private LightWeightLinkedSet set; @@ -432,4 +432,4 @@ public void testResetBookmarkPlacesBookmarkAtHead() { it = set.getBookmark(); assertEquals(it.next(), list.get(0)); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java index dd2174e555f..e88937a6297 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java @@ -44,7 +44,7 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -52,7 +52,7 @@ public class TestFSMainOperationsWebHdfs extends FSMainOperationsBaseTest { { - GenericTestUtils.setLogLevel(ExceptionHandler.LOG, Level.ALL); + GenericTestUtils.setLogLevel(ExceptionHandler.LOG, Level.TRACE); } private static MiniDFSCluster cluster = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index cbc428a4692..5d332203ce2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -51,8 +51,8 @@ import com.google.common.collect.ImmutableList; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStoragePolicySpi; @@ -111,7 +111,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.test.Whitebox; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.codehaus.jettison.json.JSONArray; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; @@ -130,7 +130,7 @@ /** Test WebHDFS */ public class TestWebHDFS { - static final Log LOG = LogFactory.getLog(TestWebHDFS.class); + static final Logger LOG = LoggerFactory.getLogger(TestWebHDFS.class); static final Random RANDOM = new Random(); @@ -296,7 +296,7 @@ static void verifyPread(FileSystem fs, Path p, long offset, long length, /** Test client retry with namenode restarting. */ @Test(timeout=300000) public void testNamenodeRestart() throws Exception { - GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.ALL); + GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.TRACE); final Configuration conf = WebHdfsTestUtil.createConf(); TestDFSClientRetries.namenodeRestartTest(conf, true); } @@ -482,6 +482,9 @@ public void testWebHdfsAllowandDisallowSnapshots() throws Exception { // allow snapshots on /bar using webhdfs webHdfs.allowSnapshot(bar); + // check if snapshot status is enabled + assertTrue(dfs.getFileStatus(bar).isSnapshotEnabled()); + assertTrue(webHdfs.getFileStatus(bar).isSnapshotEnabled()); webHdfs.createSnapshot(bar, "s1"); final Path s1path = SnapshotTestHelper.getSnapshotRoot(bar, "s1"); Assert.assertTrue(webHdfs.exists(s1path)); @@ -491,15 +494,24 @@ public void testWebHdfsAllowandDisallowSnapshots() throws Exception { assertEquals(bar, snapshottableDirs[0].getFullPath()); dfs.deleteSnapshot(bar, "s1"); dfs.disallowSnapshot(bar); + // check if snapshot status is disabled + assertFalse(dfs.getFileStatus(bar).isSnapshotEnabled()); + assertFalse(webHdfs.getFileStatus(bar).isSnapshotEnabled()); snapshottableDirs = dfs.getSnapshottableDirListing(); assertNull(snapshottableDirs); // disallow snapshots on /bar using webhdfs dfs.allowSnapshot(bar); + // check if snapshot status is enabled, again + assertTrue(dfs.getFileStatus(bar).isSnapshotEnabled()); + assertTrue(webHdfs.getFileStatus(bar).isSnapshotEnabled()); snapshottableDirs = dfs.getSnapshottableDirListing(); assertEquals(1, snapshottableDirs.length); assertEquals(bar, snapshottableDirs[0].getFullPath()); webHdfs.disallowSnapshot(bar); + // check if snapshot status is disabled, again + assertFalse(dfs.getFileStatus(bar).isSnapshotEnabled()); + assertFalse(webHdfs.getFileStatus(bar).isSnapshotEnabled()); snapshottableDirs = dfs.getSnapshottableDirListing(); assertNull(snapshottableDirs); try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTimeouts.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTimeouts.java index 47438217ada..7b445153884 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTimeouts.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTimeouts.java @@ -36,8 +36,8 @@ import java.util.Collection; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; @@ -64,7 +64,8 @@ @RunWith(Parameterized.class) public class TestWebHdfsTimeouts { - private static final Log LOG = LogFactory.getLog(TestWebHdfsTimeouts.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestWebHdfsTimeouts.class); private static final int CLIENTS_TO_CONSUME_BACKLOG = 129; private static final int CONNECTION_BACKLOG = 1; @@ -126,8 +127,9 @@ public void setUp() throws Exception { @After public void tearDown() throws Exception { - IOUtils.cleanup(LOG, clients.toArray(new SocketChannel[clients.size()])); - IOUtils.cleanup(LOG, fs); + IOUtils.cleanupWithLogger( + LOG, clients.toArray(new SocketChannel[clients.size()])); + IOUtils.cleanupWithLogger(LOG, fs); if (serverSocket != null) { try { serverSocket.close(); @@ -247,7 +249,7 @@ public void testTwoStepWriteConnectTimeout() throws Exception { GenericTestUtils.assertExceptionContains( fs.getUri().getAuthority() + ": connect timed out", e); } finally { - IOUtils.cleanup(LOG, os); + IOUtils.cleanupWithLogger(LOG, os); } } @@ -267,7 +269,7 @@ public void testTwoStepWriteReadTimeout() throws Exception { } catch (SocketTimeoutException e) { GenericTestUtils.assertExceptionContains("Read timed out", e); } finally { - IOUtils.cleanup(LOG, os); + IOUtils.cleanupWithLogger(LOG, os); } } @@ -331,7 +333,7 @@ public void run() { fail("unexpected IOException in server thread: " + e); } finally { // Clean it all up. - IOUtils.cleanup(LOG, br, isr, in, out); + IOUtils.cleanupWithLogger(LOG, br, isr, in, out); IOUtils.closeSocket(clientSocket); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java index 7bb6db99306..570123d8aa4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java @@ -20,7 +20,7 @@ import java.net.InetSocketAddress; import java.net.URI; -import org.apache.commons.logging.Log; +import org.slf4j.Logger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -32,7 +32,7 @@ import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.log4j.Level; +import org.slf4j.event.Level; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -42,13 +42,13 @@ * Test WebHDFS with multiple NameNodes */ public class TestWebHdfsWithMultipleNameNodes { - static final Log LOG = WebHdfsTestUtil.LOG; + static final Logger LOG = WebHdfsTestUtil.LOG; static private void setLogLevel() { - GenericTestUtils.setLogLevel(LOG, Level.ALL); - GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.ALL); + GenericTestUtils.setLogLevel(LOG, Level.TRACE); + GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.TRACE); - DFSTestUtil.setNameNodeLogLevel(Level.ALL); + DFSTestUtil.setNameNodeLogLevel(org.apache.log4j.Level.TRACE); } private static final Configuration conf = new HdfsConfiguration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/WebHdfsTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/WebHdfsTestUtil.java index 58de14ba917..3dae82a345f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/WebHdfsTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/WebHdfsTestUtil.java @@ -25,8 +25,8 @@ import java.security.PrivilegedExceptionAction; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -37,7 +37,8 @@ import org.junit.Assert; public class WebHdfsTestUtil { - public static final Log LOG = LogFactory.getLog(WebHdfsTestUtil.class); + public static final Logger LOG = + LoggerFactory.getLogger(WebHdfsTestUtil.class); public static Configuration createConf() { final Configuration conf = new Configuration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java index 6c145a4b6c0..9851ede7afe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java @@ -25,8 +25,8 @@ import java.util.EnumSet; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.Options; @@ -42,7 +42,7 @@ import org.junit.Test; public class TestParam { - public static final Log LOG = LogFactory.getLog(TestParam.class); + public static final Logger LOG = LoggerFactory.getLogger(TestParam.class); final Configuration conf = new Configuration(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestPermission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestPermission.java index 388e7f23a69..188476f0247 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestPermission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestPermission.java @@ -29,8 +29,8 @@ import java.io.IOException; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; @@ -48,7 +48,8 @@ /** Unit tests for permission */ public class TestPermission { - public static final Log LOG = LogFactory.getLog(TestPermission.class); + public static final Logger LOG = + LoggerFactory.getLogger(TestPermission.class); final private static Path ROOT_PATH = new Path("/data"); final private static Path CHILD_DIR1 = new Path(ROOT_PATH, "child1"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestPermissionSymlinks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestPermissionSymlinks.java index 7bd29d21ddf..ad5b86c837e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestPermissionSymlinks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestPermissionSymlinks.java @@ -30,8 +30,8 @@ import java.security.PrivilegedExceptionAction; import java.util.Arrays; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileStatus; @@ -54,7 +54,8 @@ public class TestPermissionSymlinks { - private static final Log LOG = LogFactory.getLog(TestPermissionSymlinks.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestPermissionSymlinks.class); private static final Configuration conf = new HdfsConfiguration(); // Non-super user to run commands with private static final UserGroupInformation user = UserGroupInformation diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestRefreshUserMappings.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestRefreshUserMappings.java index f511eb18579..2d7410a405c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestRefreshUserMappings.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/security/TestRefreshUserMappings.java @@ -34,7 +34,6 @@ import java.net.URL; import java.net.URLDecoder; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -45,12 +44,18 @@ import org.apache.hadoop.security.authorize.AuthorizationException; import org.apache.hadoop.security.authorize.DefaultImpersonationProvider; import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.test.GenericTestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.event.Level; import org.junit.After; import org.junit.Before; import org.junit.Test; public class TestRefreshUserMappings { + private static final Logger LOG = LoggerFactory.getLogger( + TestRefreshUserMappings.class); private MiniDFSCluster cluster; Configuration config; private static final long groupRefreshTimeoutSec = 1; @@ -93,6 +98,8 @@ public void setUp() throws Exception { FileSystem.setDefaultUri(config, "hdfs://localhost:" + "0"); cluster = new MiniDFSCluster.Builder(config).build(); cluster.waitActive(); + + GenericTestUtils.setLogLevel(Groups.LOG, Level.DEBUG); } @After @@ -114,40 +121,43 @@ public void testGroupMappingRefresh() throws Exception { String [] args = new String[]{"-refreshUserToGroupsMappings"}; Groups groups = Groups.getUserToGroupsMappingService(config); String user = UserGroupInformation.getCurrentUser().getUserName(); - System.out.println("first attempt:"); + + LOG.debug("First attempt:"); List g1 = groups.getGroups(user); - String [] str_groups = new String [g1.size()]; - g1.toArray(str_groups); - System.out.println(Arrays.toString(str_groups)); - - System.out.println("second attempt, should be same:"); + LOG.debug(g1.toString()); + + LOG.debug("Second attempt, should be the same:"); List g2 = groups.getGroups(user); - g2.toArray(str_groups); - System.out.println(Arrays.toString(str_groups)); + LOG.debug(g2.toString()); for(int i=0; i g3 = groups.getGroups(user); - g3.toArray(str_groups); - System.out.println(Arrays.toString(str_groups)); + LOG.debug(g3.toString()); for(int i=0; i g4 = groups.getGroups(user); - g4.toArray(str_groups); - System.out.println(Arrays.toString(str_groups)); - for(int i=0; i { + List g4; + try { + g4 = groups.getGroups(user); + } catch (IOException e) { + return false; + } + LOG.debug(g4.toString()); + // if g4 is the same as g3, wait and retry + return !g3.equals(g4); + }, 50, Math.toIntExact(groupRefreshTimeoutSec * 1000 * 30)); } - + @Test public void testRefreshSuperUserGroupsConfiguration() throws Exception { final String SUPER_USER = "super_user"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testErasureCodingConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testErasureCodingConf.xml index 2f7a6a7fdeb..34f51764399 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testErasureCodingConf.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testErasureCodingConf.xml @@ -154,7 +154,7 @@ SubstringComparator - Add a list of erasure coding policies + Add a list of user defined erasure coding policies SubstringComparator @@ -163,6 +163,26 @@ + + help: removePolicy command + + -fs NAMENODE -help removePolicy + + + + + + + SubstringComparator + Remove an user defined erasure coding policy + + + SubstringComparator + [-removePolicy -policy <policy>] + + + + help: enablePolicy command @@ -339,6 +359,30 @@ + + unsetPolicy : unset on non EC directory + + -fs NAMENODE -mkdir /ecdir + -fs NAMENODE -mkdir /ecdir/child + -fs NAMENODE -unsetPolicy -path /ecdir/child + + + -fs NAMENODE -rm /ecdir/child/ecfile + -fs NAMENODE -rmdir /ecdir/child + -fs NAMENODE -rmdir /ecdir + + + + SubstringComparator + NoECPolicySetException: No erasure coding policy explicitly set on /ecdir/child + + + SubstringComparator + Use '-setPolicy -path <PATH> -replicate' to enforce default replication policy irrespective of EC policy defined on parent. + + + + unsetPolicy : unset policy on non-empty directory @@ -690,7 +734,7 @@ SubstringComparator - Set default erasure coding policy on /ecdir + Set RS-6-3-1024k erasure coding policy on /ecdir @@ -708,11 +752,11 @@ SubstringComparator - Set default erasure coding policy on /ecdir + Set RS-6-3-1024k erasure coding policy on /ecdir SubstringComparator - Warning: setting erasure coding policy on a non-empty directory will not automatically convert existing files to default erasure coding policy + Warning: setting erasure coding policy on a non-empty directory will not automatically convert existing files to RS-6-3-1024k erasure coding policy diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml index a13c4410520..4ab093bd15d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml @@ -6183,11 +6183,11 @@ RegexpComparator - mkdir: `dir0/dir1': No such file or directory + .*mkdir:.*dir0': No such file or directory$ - + mkdir: Test recreate of existing directory fails diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java index fa653833c55..65736877b51 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java @@ -100,8 +100,6 @@ private AppContext context; private Thread speculationBackgroundThread = null; private volatile boolean stopped = false; - private BlockingQueue eventQueue - = new LinkedBlockingQueue(); private TaskRuntimeEstimator estimator; private BlockingQueue scanControl = new LinkedBlockingQueue(); @@ -247,7 +245,7 @@ public void handleAttempt(TaskAttemptStatus status) { // This section is not part of the Speculator interface; it's used only for // testing public boolean eventQueueEmpty() { - return eventQueue.isEmpty(); + return scanControl.isEmpty(); } // This interface is intended to be used only for test cases. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebServices.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebServices.java index f477d314c76..fe3ace8beff 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebServices.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebServices.java @@ -65,6 +65,7 @@ import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobTaskAttemptState; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobTaskCounterInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobsInfo; +import org.apache.hadoop.mapreduce.v2.app.webapp.dao.MapTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.ReduceTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptsInfo; @@ -396,9 +397,9 @@ public TaskAttemptsInfo getJobTaskAttempts(@Context HttpServletRequest hsr, for (TaskAttempt ta : task.getAttempts().values()) { if (ta != null) { if (task.getType() == TaskType.REDUCE) { - attempts.add(new ReduceTaskAttemptInfo(ta, task.getType())); + attempts.add(new ReduceTaskAttemptInfo(ta)); } else { - attempts.add(new TaskAttemptInfo(ta, task.getType(), true)); + attempts.add(new MapTaskAttemptInfo(ta, true)); } } } @@ -419,9 +420,9 @@ public TaskAttemptInfo getJobTaskAttemptId(@Context HttpServletRequest hsr, Task task = getTaskFromTaskIdString(tid, job); TaskAttempt ta = getTaskAttemptFromTaskAttemptString(attId, task); if (task.getType() == TaskType.REDUCE) { - return new ReduceTaskAttemptInfo(ta, task.getType()); + return new ReduceTaskAttemptInfo(ta); } else { - return new TaskAttemptInfo(ta, task.getType(), true); + return new MapTaskAttemptInfo(ta, true); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JAXBContextResolver.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JAXBContextResolver.java index 88c7d861104..625eb4ef32e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JAXBContextResolver.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JAXBContextResolver.java @@ -42,8 +42,8 @@ import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobsInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobTaskAttemptCounterInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobTaskCounterInfo; +import org.apache.hadoop.mapreduce.v2.app.webapp.dao.MapTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.ReduceTaskAttemptInfo; -import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptsInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskCounterGroupInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskCounterInfo; @@ -62,9 +62,8 @@ AppInfo.class, CounterInfo.class, JobTaskAttemptCounterInfo.class, JobTaskCounterInfo.class, TaskCounterGroupInfo.class, ConfInfo.class, JobCounterInfo.class, TaskCounterInfo.class, CounterGroupInfo.class, - JobInfo.class, JobsInfo.class, ReduceTaskAttemptInfo.class, - TaskAttemptInfo.class, TaskInfo.class, TasksInfo.class, - TaskAttemptsInfo.class, ConfEntryInfo.class, RemoteExceptionData.class}; + JobInfo.class, JobsInfo.class, MapTaskAttemptInfo.class, ReduceTaskAttemptInfo.class, + TaskInfo.class, TasksInfo.class, TaskAttemptsInfo.class, ConfEntryInfo.class, RemoteExceptionData.class}; // these dao classes need root unwrapping private final Class[] rootUnwrappedTypes = {JobTaskAttemptState.class}; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java index 944f65e9800..5a4f4cc202c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java @@ -27,13 +27,14 @@ import java.util.EnumSet; import java.util.Collection; -import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; +import org.apache.hadoop.mapreduce.v2.app.webapp.dao.MapTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; import org.apache.hadoop.util.StringUtils; @@ -124,7 +125,7 @@ protected void render(Block html) { StringBuilder attemptsTableData = new StringBuilder("[\n"); for (TaskAttempt attempt : getTaskAttempts()) { - TaskAttemptInfo ta = new TaskAttemptInfo(attempt, true); + TaskAttemptInfo ta = new MapTaskAttemptInfo(attempt, true); String progress = StringUtils.format("%.2f", ta.getProgress()); String nodeHttpAddr = ta.getNode(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TasksBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TasksBlock.java index a2d8fa94096..a6d9f52c83f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TasksBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TasksBlock.java @@ -24,7 +24,7 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI.C_PROGRESSBAR; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.C_PROGRESSBAR_VALUE; -import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/MapTaskAttemptInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/MapTaskAttemptInfo.java new file mode 100644 index 00000000000..bcd790a9c31 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/MapTaskAttemptInfo.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.app.webapp.dao; + +import org.apache.hadoop.mapreduce.v2.api.records.TaskType; +import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; + +import javax.xml.bind.annotation.XmlRootElement; + +@XmlRootElement(name = "taskAttempt") +public class MapTaskAttemptInfo extends TaskAttemptInfo { + + public MapTaskAttemptInfo() { + } + + public MapTaskAttemptInfo(TaskAttempt ta) { + this(ta, false); + } + + public MapTaskAttemptInfo(TaskAttempt ta, Boolean isRunning) { + super(ta, TaskType.MAP, isRunning); + } +} \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ReduceTaskAttemptInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ReduceTaskAttemptInfo.java index 4d44d11b41e..dbc76ec9d0b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ReduceTaskAttemptInfo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ReduceTaskAttemptInfo.java @@ -18,8 +18,6 @@ package org.apache.hadoop.mapreduce.v2.app.webapp.dao; -import javax.xml.bind.annotation.XmlAccessType; -import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlRootElement; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; @@ -27,7 +25,6 @@ import org.apache.hadoop.yarn.util.Times; @XmlRootElement(name = "taskAttempt") -@XmlAccessorType(XmlAccessType.FIELD) public class ReduceTaskAttemptInfo extends TaskAttemptInfo { protected long shuffleFinishTime; @@ -39,8 +36,12 @@ public ReduceTaskAttemptInfo() { } - public ReduceTaskAttemptInfo(TaskAttempt ta, TaskType type) { - super(ta, type, false); + public ReduceTaskAttemptInfo(TaskAttempt ta) { + this(ta, false); + } + + public ReduceTaskAttemptInfo(TaskAttempt ta, Boolean isRunning) { + super(ta, TaskType.REDUCE, isRunning); this.shuffleFinishTime = ta.getShuffleFinishTime(); this.mergeFinishTime = ta.getSortFinishTime(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskAttemptInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskAttemptInfo.java index 892c6269619..140202ef4c8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskAttemptInfo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskAttemptInfo.java @@ -17,8 +17,6 @@ */ package org.apache.hadoop.mapreduce.v2.app.webapp.dao; -import java.util.List; - import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlRootElement; @@ -31,13 +29,12 @@ import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.Times; @XmlRootElement(name = "taskAttempt") -@XmlSeeAlso({ ReduceTaskAttemptInfo.class }) +@XmlSeeAlso({MapTaskAttemptInfo.class, ReduceTaskAttemptInfo.class}) @XmlAccessorType(XmlAccessType.FIELD) -public class TaskAttemptInfo { +public abstract class TaskAttemptInfo { protected long startTime; protected long finishTime; @@ -58,10 +55,6 @@ public TaskAttemptInfo() { } - public TaskAttemptInfo(TaskAttempt ta, Boolean isRunning) { - this(ta, TaskType.MAP, isRunning); - } - public TaskAttemptInfo(TaskAttempt ta, TaskType type, Boolean isRunning) { final TaskAttemptReport report = ta.getReport(); this.type = type.toString(); @@ -133,4 +126,7 @@ public String getNote() { return this.diagnostics; } + public String getType() { + return type; + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskAttemptsInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskAttemptsInfo.java index c92488fe81b..866b2fe90f4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskAttemptsInfo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskAttemptsInfo.java @@ -18,26 +18,28 @@ package org.apache.hadoop.mapreduce.v2.app.webapp.dao; import java.util.ArrayList; +import java.util.List; -import javax.xml.bind.annotation.XmlAccessType; -import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElementRef; import javax.xml.bind.annotation.XmlRootElement; @XmlRootElement(name = "taskAttempts") -@XmlAccessorType(XmlAccessType.FIELD) public class TaskAttemptsInfo { - protected ArrayList taskAttempt = new ArrayList(); + protected List taskAttempts = new ArrayList<>(); public TaskAttemptsInfo() { } // JAXB needs this public void add(TaskAttemptInfo taskattemptInfo) { - taskAttempt.add(taskattemptInfo); + taskAttempts.add(taskattemptInfo); } - public ArrayList getTaskAttempts() { - return taskAttempt; + // XmlElementRef annotation should be used to identify the exact type of a list element + // otherwise metadata will be added to XML attributes, + // it can lead to incorrect JSON marshaling + @XmlElementRef + public List getTaskAttempts() { + return taskAttempts; } - } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java index 025a8fac197..efe150fad19 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java @@ -33,10 +33,12 @@ import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator; +import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy; import org.apache.hadoop.mapreduce.v2.app.rm.preemption.NoopAMPreemptionPolicy; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; @@ -47,6 +49,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -115,7 +118,7 @@ protected ContainerAllocator createContainerAllocator( } class ThrottledContainerAllocator extends AbstractService - implements ContainerAllocator { + implements ContainerAllocator, RMHeartbeatHandler { private int containerCount; private Thread thread; private BlockingQueue eventQueue = @@ -181,10 +184,19 @@ protected void serviceStop() throws Exception { } super.serviceStop(); } + + @Override + public long getLastHeartbeatTime() { + return Time.now(); + } + + @Override + public void runOnNextHeartbeat(Runnable callback) { + } } } - @Test + @Test(timeout = 60000) public void benchmark1() throws Exception { int maps = 100; // Adjust for benchmarking. Start with thousands. int reduces = 0; @@ -211,6 +223,7 @@ protected ApplicationMasterProtocol createSchedulerProxy() { Records.newRecord(RegisterApplicationMasterResponse.class); response.setMaximumResourceCapability(Resource.newInstance( 10240, 1)); + response.setQueue("queue1"); return response; } @@ -252,6 +265,7 @@ public AllocateResponse allocate(AllocateRequest request) response.setAllocatedContainers(containers); response.setResponseId(request.getResponseId() + 1); response.setNumClusterNodes(350); + response.setApplicationPriority(Priority.newInstance(100)); return response; } }; @@ -261,7 +275,7 @@ public AllocateResponse allocate(AllocateRequest request) }); } - @Test + @Test(timeout = 60000) public void benchmark2() throws Exception { int maps = 100; // Adjust for benchmarking, start with a couple of thousands int reduces = 50; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java index b151c8afb08..585b949d7f9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java @@ -27,6 +27,7 @@ import java.util.HashMap; import java.util.Map; +import org.junit.After; import org.junit.Assert; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -62,12 +63,10 @@ @SuppressWarnings({"rawtypes"}) public class TestTaskAttemptContainerRequest { - //WARNING: This test must be the only test in this file. This is because - // there is an optimization where the credentials passed in are cached - // statically so they do not need to be recomputed when creating a new - // ContainerLaunchContext. if other tests run first this code will cache - // their credentials and this test will fail trying to look for the - // credentials it inserted in. + @After + public void cleanup() { + UserGroupInformation.reset(); + } @Test public void testAttemptContainerRequest() throws Exception { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/metrics/TestMRAppMetrics.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/metrics/TestMRAppMetrics.java index 02552bce897..3fd4cb028a5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/metrics/TestMRAppMetrics.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/metrics/TestMRAppMetrics.java @@ -24,12 +24,19 @@ import static org.apache.hadoop.test.MetricsAsserts.*; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.junit.After; import org.junit.Test; import static org.mockito.Mockito.*; public class TestMRAppMetrics { + @After + public void tearDown() { + DefaultMetricsSystem.shutdown(); + } + @Test public void testNames() { Job job = mock(Job.class); Task mapTask = mock(Task.class); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesAttempts.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesAttempts.java index d92c275135f..32d054ff5c5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesAttempts.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesAttempts.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.yarn.webapp.WebServicesTestUtils.assertResponseStatusCode; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -483,6 +484,8 @@ public void verifyAMTaskAttemptsXML(NodeList nodes, Task task) { Boolean found = false; for (int i = 0; i < nodes.getLength(); i++) { Element element = (Element) nodes.item(i); + assertFalse("task attempt should not contain any attributes, it can lead to incorrect JSON marshaling", + element.hasAttributes()); if (attid.matches(WebServicesTestUtils.getXmlString(element, "id"))) { found = true; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/LocalResourceBuilder.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/LocalResourceBuilder.java index 48b157ee26a..48cc29e7028 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/LocalResourceBuilder.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/LocalResourceBuilder.java @@ -27,7 +27,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.InvalidJobConfException; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.filecache.DistributedCache; import org.apache.hadoop.yarn.api.records.LocalResource; @@ -144,10 +143,9 @@ void createLocalResources(Map localResources) LocalResource orig = localResources.get(linkName); if(orig != null && !orig.getResource().equals(URL.fromURI(p.toUri()))) { - throw new InvalidJobConfException( - getResourceDescription(orig.getType()) + orig.getResource() - + - " conflicts with " + getResourceDescription(type) + u); + LOG.warn(getResourceDescription(orig.getType()) + orig.getResource() + + " conflicts with " + getResourceDescription(type) + u); + continue; } Boolean sharedCachePolicy = sharedCacheUploadPolicies.get(u.toString()); sharedCachePolicy = diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java index 3aadd637bf2..c6a287439da 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java @@ -360,7 +360,7 @@ public void testSetupDistributedCacheEmpty() throws IOException { } @SuppressWarnings("deprecation") - @Test(timeout = 120000, expected = InvalidJobConfException.class) + @Test(timeout = 120000) public void testSetupDistributedCacheConflicts() throws Exception { Configuration conf = new Configuration(); conf.setClass("fs.mockfs.impl", MockFileSystem.class, FileSystem.class); @@ -388,10 +388,18 @@ public void testSetupDistributedCacheConflicts() throws Exception { Map localResources = new HashMap(); MRApps.setupDistributedCache(conf, localResources); + + assertEquals(1, localResources.size()); + LocalResource lr = localResources.get("something"); + //Archive wins + assertNotNull(lr); + assertEquals(10l, lr.getSize()); + assertEquals(10l, lr.getTimestamp()); + assertEquals(LocalResourceType.ARCHIVE, lr.getType()); } @SuppressWarnings("deprecation") - @Test(timeout = 120000, expected = InvalidJobConfException.class) + @Test(timeout = 120000) public void testSetupDistributedCacheConflictsFiles() throws Exception { Configuration conf = new Configuration(); conf.setClass("fs.mockfs.impl", MockFileSystem.class, FileSystem.class); @@ -416,6 +424,14 @@ public void testSetupDistributedCacheConflictsFiles() throws Exception { Map localResources = new HashMap(); MRApps.setupDistributedCache(conf, localResources); + + assertEquals(1, localResources.size()); + LocalResource lr = localResources.get("something"); + //First one wins + assertNotNull(lr); + assertEquals(10l, lr.getSize()); + assertEquals(10l, lr.getTimestamp()); + assertEquals(LocalResourceType.FILE, lr.getType()); } @SuppressWarnings("deprecation") diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/PathOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/PathOutputCommitter.java index 3679d9f4d60..5e25f50fefa 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/PathOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/PathOutputCommitter.java @@ -57,8 +57,8 @@ protected PathOutputCommitter(Path outputPath, TaskAttemptContext context) throws IOException { this.context = Preconditions.checkNotNull(context, "Null context"); - LOG.debug("Creating committer with output path {} and task context" - + " {}", outputPath, context); + LOG.debug("Instantiating committer {} with output path {} and task context" + + " {}", this, outputPath, context); } /** @@ -71,8 +71,8 @@ protected PathOutputCommitter(Path outputPath, protected PathOutputCommitter(Path outputPath, JobContext context) throws IOException { this.context = Preconditions.checkNotNull(context, "Null context"); - LOG.debug("Creating committer with output path {} and job context" - + " {}", outputPath, context); + LOG.debug("Instantiating committer {} with output path {} and job context" + + " {}", this, outputPath, context); } /** @@ -103,6 +103,8 @@ public boolean hasOutputPath() { @Override public String toString() { - return "PathOutputCommitter{context=" + context + '}'; + return "PathOutputCommitter{context=" + context + + "; " + super.toString() + + '}'; } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java index 3382bbf8436..1aad71d7db4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java @@ -37,7 +37,8 @@ @InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable @SuppressWarnings({"unchecked", "rawtypes"}) -public class Shuffle implements ShuffleConsumerPlugin, ExceptionReporter { +public class Shuffle implements ShuffleConsumerPlugin, + ExceptionReporter { private static final int PROGRESS_FREQUENCY = 2000; private static final int MAX_EVENTS_TO_FETCH = 10000; private static final int MIN_EVENTS_TO_FETCH = 100; @@ -51,7 +52,7 @@ private ShuffleClientMetrics metrics; private TaskUmbilicalProtocol umbilical; - private ShuffleSchedulerImpl scheduler; + private ShuffleSchedulerImpl scheduler; private MergeManager merger; private Throwable throwable = null; private String throwingThreadName = null; @@ -68,7 +69,8 @@ public void init(ShuffleConsumerPlugin.Context context) { this.jobConf = context.getJobConf(); this.umbilical = context.getUmbilical(); this.reporter = context.getReporter(); - this.metrics = ShuffleClientMetrics.create(); + this.metrics = ShuffleClientMetrics.create(context.getReduceId(), + this.jobConf); this.copyPhase = context.getCopyPhase(); this.taskStatus = context.getStatus(); this.reduceTask = context.getReduceTask(); @@ -101,16 +103,16 @@ public RawKeyValueIterator run() throws IOException, InterruptedException { int maxEventsToFetch = Math.min(MAX_EVENTS_TO_FETCH, eventsPerReducer); // Start the map-completion events fetcher thread - final EventFetcher eventFetcher = - new EventFetcher(reduceId, umbilical, scheduler, this, - maxEventsToFetch); + final EventFetcher eventFetcher = + new EventFetcher(reduceId, umbilical, scheduler, this, + maxEventsToFetch); eventFetcher.start(); // Start the map-output fetcher threads boolean isLocal = localMapFiles != null; final int numFetchers = isLocal ? 1 : - jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5); - Fetcher[] fetchers = new Fetcher[numFetchers]; + jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5); + Fetcher[] fetchers = new Fetcher[numFetchers]; if (isLocal) { fetchers[0] = new LocalFetcher(jobConf, reduceId, scheduler, merger, reporter, metrics, this, reduceTask.getShuffleSecret(), @@ -118,7 +120,7 @@ public RawKeyValueIterator run() throws IOException, InterruptedException { fetchers[0].start(); } else { for (int i=0; i < numFetchers; ++i) { - fetchers[i] = new Fetcher(jobConf, reduceId, scheduler, merger, + fetchers[i] = new Fetcher(jobConf, reduceId, scheduler, merger, reporter, metrics, this, reduceTask.getShuffleSecret()); fetchers[i].start(); @@ -141,7 +143,7 @@ public RawKeyValueIterator run() throws IOException, InterruptedException { eventFetcher.shutDown(); // Stop the map-output fetcher threads - for (Fetcher fetcher : fetchers) { + for (Fetcher fetcher : fetchers) { fetcher.shutDown(); } @@ -157,7 +159,7 @@ public RawKeyValueIterator run() throws IOException, InterruptedException { try { kvIter = merger.close(); } catch (Throwable e) { - throw new ShuffleError("Error while doing final merge " , e); + throw new ShuffleError("Error while doing final merge ", e); } // Sanity check diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleClientMetrics.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleClientMetrics.java index d4e185df6f3..d5e97aabc2f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleClientMetrics.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleClientMetrics.java @@ -17,24 +17,42 @@ */ package org.apache.hadoop.mapreduce.task.reduce; +import com.google.common.annotations.VisibleForTesting; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.TaskAttemptID; + +import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; + import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterInt; import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; + import java.util.concurrent.ThreadLocalRandom; +import static org.apache.hadoop.metrics2.lib.Interns.info; + +/** + * Metric for Shuffle client. + */ +@SuppressWarnings("checkstyle:finalclass") @InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable @Metrics(name="ShuffleClientMetrics", context="mapred") public class ShuffleClientMetrics { + private static final MetricsInfo RECORD_INFO = + info("ShuffleClientMetrics", "Metrics for Shuffle client"); + @Metric private MutableCounterInt numFailedFetches; @Metric @@ -44,14 +62,23 @@ @Metric private MutableGaugeInt numThreadsBusy; + private final MetricsRegistry metricsRegistry = + new MetricsRegistry(RECORD_INFO); + private ShuffleClientMetrics() { } - public static ShuffleClientMetrics create() { + public static ShuffleClientMetrics create( + TaskAttemptID reduceId, + JobConf jobConf) { MetricsSystem ms = DefaultMetricsSystem.initialize("JobTracker"); + + ShuffleClientMetrics shuffleClientMetrics = new ShuffleClientMetrics(); + shuffleClientMetrics.addTags(reduceId, jobConf); + return ms.register("ShuffleClientMetrics-" + ThreadLocalRandom.current().nextInt(), null, - new ShuffleClientMetrics()); + shuffleClientMetrics); } public void inputBytes(long bytes) { @@ -69,4 +96,16 @@ public void threadBusy() { public void threadFree() { numThreadsBusy.decr(); } + + private void addTags(TaskAttemptID reduceId, JobConf jobConf) { + metricsRegistry.tag("user", "", jobConf.getUser()) + .tag("jobName", "", jobConf.getJobName()) + .tag("jobId", "", reduceId.getJobID().toString()) + .tag("taskId", "", reduceId.toString()); + } + + @VisibleForTesting + MetricsRegistry getMetricsRegistry() { + return metricsRegistry; + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredAppMasterRest.md b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredAppMasterRest.md index be4a8dabe06..7d942caba7e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredAppMasterRest.md +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredAppMasterRest.md @@ -1875,7 +1875,7 @@ Response Body: - + 1326238777460 0 0 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestShuffleClientMetrics.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestShuffleClientMetrics.java new file mode 100644 index 00000000000..0baf52fb19a --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestShuffleClientMetrics.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.task.reduce; + +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobID; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.metrics2.MetricsTag; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Unit test for {@link TestShuffleClientMetrics}. + */ +public class TestShuffleClientMetrics { + + private static final String TEST_JOB_NAME = "Test job name"; + private static final String TEST_JOB_ID = "Test job id"; + private static final String TEST_TASK_ID = "Test task id"; + private static final String TEST_USER_NAME = "Test user name"; + + @Test + public void testShuffleMetricsTags() { + // Set up + JobID jobID = mock(JobID.class); + when(jobID.toString()).thenReturn(TEST_JOB_ID); + + TaskAttemptID reduceId = mock(TaskAttemptID.class); + when(reduceId.getJobID()).thenReturn(jobID); + when(reduceId.toString()).thenReturn(TEST_TASK_ID); + + JobConf jobConf = mock(JobConf.class); + when(jobConf.getUser()).thenReturn(TEST_USER_NAME); + when(jobConf.getJobName()).thenReturn(TEST_JOB_NAME); + + // Act + ShuffleClientMetrics shuffleClientMetrics = + ShuffleClientMetrics.create(reduceId, jobConf); + + // Assert + MetricsTag userMetrics = shuffleClientMetrics.getMetricsRegistry() + .getTag("user"); + assertEquals(TEST_USER_NAME, userMetrics.value()); + + MetricsTag jobNameMetrics = shuffleClientMetrics.getMetricsRegistry() + .getTag("jobName"); + assertEquals(TEST_JOB_NAME, jobNameMetrics.value()); + + MetricsTag jobIdMetrics = shuffleClientMetrics.getMetricsRegistry() + .getTag("jobId"); + assertEquals(TEST_JOB_ID, jobIdMetrics.value()); + + MetricsTag taskIdMetrics = shuffleClientMetrics.getMetricsRegistry() + .getTag("taskId"); + assertEquals(TEST_TASK_ID, taskIdMetrics.value()); + } +} \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java index 7fe99a28b9e..825fb259dc9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java @@ -1089,7 +1089,17 @@ public HistoryFileInfo getFileInfo(JobId jobId) throws IOException { private void moveToDoneNow(final Path src, final Path target) throws IOException { LOG.info("Moving " + src.toString() + " to " + target.toString()); - intermediateDoneDirFc.rename(src, target, Options.Rename.NONE); + try { + intermediateDoneDirFc.rename(src, target, Options.Rename.NONE); + } catch (FileNotFoundException e) { + if (doneDirFc.util().exists(target)) { + LOG.info("Source file " + src.toString() + " not found, but target " + + "file " + target.toString() + " already exists. Move already " + + "happened."); + } else { + throw e; + } + } } private String getJobSummary(FileContext fc, Path path) throws IOException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobsBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobsBlock.java index 216bdcee983..3f4daf937d1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobsBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobsBlock.java @@ -21,7 +21,7 @@ import java.text.SimpleDateFormat; import java.util.Date; -import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.v2.app.AppContext; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java index e8e76d1a558..5b0c59f51a5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTaskPage.java @@ -29,11 +29,12 @@ import java.util.Collection; -import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.app.webapp.App; +import org.apache.hadoop.mapreduce.v2.app.webapp.dao.MapTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.mapreduce.v2.util.MRWebAppUtil; @@ -115,7 +116,7 @@ protected void render(Block html) { StringBuilder attemptsTableData = new StringBuilder("[\n"); for (TaskAttempt attempt : getTaskAttempts()) { - final TaskAttemptInfo ta = new TaskAttemptInfo(attempt, false); + final TaskAttemptInfo ta = new MapTaskAttemptInfo(attempt, false); String taid = ta.getId(); String nodeHttpAddr = ta.getNode(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTasksBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTasksBlock.java index 702c13c01de..a00146e680b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTasksBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsTasksBlock.java @@ -24,6 +24,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.app.webapp.App; +import org.apache.hadoop.mapreduce.v2.app.webapp.dao.MapTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.ReduceTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskInfo; @@ -129,7 +130,7 @@ if(successful != null) { TaskAttemptInfo ta; if(type == TaskType.REDUCE) { - ReduceTaskAttemptInfo rta = new ReduceTaskAttemptInfo(successful, type); + ReduceTaskAttemptInfo rta = new ReduceTaskAttemptInfo(successful); shuffleFinishTime = rta.getShuffleFinishTime(); sortFinishTime = rta.getMergeFinishTime(); elapsedShuffleTime = rta.getElapsedShuffleTime(); @@ -137,7 +138,7 @@ elapsedReduceTime = rta.getElapsedReduceTime(); ta = rta; } else { - ta = new TaskAttemptInfo(successful, type, false); + ta = new MapTaskAttemptInfo(successful, false); } attemptStartTime = ta.getStartTime(); attemptFinishTime = ta.getFinishTime(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java index dda237db833..dabb760d1ce 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java @@ -48,6 +48,7 @@ import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobCounterInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobTaskAttemptCounterInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobTaskCounterInfo; +import org.apache.hadoop.mapreduce.v2.app.webapp.dao.MapTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.ReduceTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptsInfo; @@ -361,9 +362,9 @@ public TaskAttemptsInfo getJobTaskAttempts(@Context HttpServletRequest hsr, for (TaskAttempt ta : task.getAttempts().values()) { if (ta != null) { if (task.getType() == TaskType.REDUCE) { - attempts.add(new ReduceTaskAttemptInfo(ta, task.getType())); + attempts.add(new ReduceTaskAttemptInfo(ta)); } else { - attempts.add(new TaskAttemptInfo(ta, task.getType(), false)); + attempts.add(new MapTaskAttemptInfo(ta, false)); } } } @@ -385,9 +386,9 @@ public TaskAttemptInfo getJobTaskAttemptId(@Context HttpServletRequest hsr, TaskAttempt ta = AMWebServices.getTaskAttemptFromTaskAttemptString(attId, task); if (task.getType() == TaskType.REDUCE) { - return new ReduceTaskAttemptInfo(ta, task.getType()); + return new ReduceTaskAttemptInfo(ta); } else { - return new TaskAttemptInfo(ta, task.getType(), false); + return new MapTaskAttemptInfo(ta, false); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/JAXBContextResolver.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/JAXBContextResolver.java index e03d27fe0bc..ad1c7a9fad2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/JAXBContextResolver.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/JAXBContextResolver.java @@ -36,8 +36,8 @@ import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobCounterInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobTaskAttemptCounterInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.JobTaskCounterInfo; +import org.apache.hadoop.mapreduce.v2.app.webapp.dao.MapTaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.ReduceTaskAttemptInfo; -import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptsInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskCounterGroupInfo; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskCounterInfo; @@ -63,7 +63,7 @@ JobsInfo.class, TaskInfo.class, TasksInfo.class, TaskAttemptsInfo.class, ConfInfo.class, CounterInfo.class, JobTaskCounterInfo.class, JobTaskAttemptCounterInfo.class, TaskCounterInfo.class, - JobCounterInfo.class, ReduceTaskAttemptInfo.class, TaskAttemptInfo.class, + JobCounterInfo.class, MapTaskAttemptInfo.class, ReduceTaskAttemptInfo.class, TaskAttemptsInfo.class, CounterGroupInfo.class, TaskCounterGroupInfo.class, AMAttemptInfo.class, AMAttemptsInfo.class, RemoteExceptionData.class }; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java index b7a367226e6..f09329b668e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java @@ -26,6 +26,7 @@ import java.util.List; import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.junit.Assert; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -341,6 +342,57 @@ public void testHistoryFileInfoShouldReturnCompletedJobIfMaxNotConfiged() } + /** + * This test sets up a scenario where the history files have already been + * moved to the "done" directory (so the "intermediate" directory is empty), + * but then moveToDone() is called again on the same history file. It + * validates that the second moveToDone() still succeeds rather than throws a + * FileNotFoundException. + */ + @Test + public void testMoveToDoneAlreadyMovedSucceeds() throws Exception { + HistoryFileManagerTest historyFileManager = new HistoryFileManagerTest(); + long jobTimestamp = 1535436603000L; + String job = "job_" + jobTimestamp + "_123456789"; + + String intermediateDirectory = "/" + UUID.randomUUID(); + String doneDirectory = "/" + UUID.randomUUID(); + Configuration conf = dfsCluster.getConfiguration(0); + conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, + intermediateDirectory); + conf.set(JHAdminConfig.MR_HISTORY_DONE_DIR, doneDirectory); + + Path intermediateHistoryFilePath = new Path(intermediateDirectory + "/" + + job + ".jhist"); + Path intermediateConfFilePath = new Path(intermediateDirectory + "/" + + job + "_conf.xml"); + Path doneHistoryFilePath = new Path(doneDirectory + "/" + + JobHistoryUtils.timestampDirectoryComponent(jobTimestamp) + "/123456/" + + job + ".jhist"); + Path doneConfFilePath = new Path(doneDirectory + "/" + + JobHistoryUtils.timestampDirectoryComponent(jobTimestamp) + + "/123456/" + job + "_conf.xml"); + + dfsCluster.getFileSystem().createNewFile(doneHistoryFilePath); + dfsCluster.getFileSystem().createNewFile(doneConfFilePath); + + historyFileManager.serviceInit(conf); + + JobIndexInfo jobIndexInfo = new JobIndexInfo(); + jobIndexInfo.setJobId(TypeConverter.toYarn(JobID.forName(job))); + jobIndexInfo.setFinishTime(jobTimestamp); + HistoryFileInfo info = historyFileManager.getHistoryFileInfo( + intermediateHistoryFilePath, intermediateConfFilePath, null, + jobIndexInfo, false); + info.moveToDone(); + + Assert.assertFalse(info.isMovePending()); + Assert.assertEquals(doneHistoryFilePath.toString(), + info.getHistoryFile().toUri().getPath()); + Assert.assertEquals(doneConfFilePath.toString(), + info.getConfFile().toUri().getPath()); + } + static class HistoryFileManagerTest extends HistoryFileManager { public HistoryFileManagerTest() { super(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java index ac4b73b2417..2cb37166c9b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java @@ -58,10 +58,14 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerReport; +import org.apache.hadoop.yarn.api.records.NodeAttribute; +import org.apache.hadoop.yarn.api.records.NodeAttributeKey; +import org.apache.hadoop.yarn.api.records.NodeAttributeInfo; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.NodeToAttributeValue; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; @@ -538,4 +542,22 @@ public Resource getResourceProfile(String profile) throws YarnException, IOException { return client.getResourceTypeInfo(); } + + @Override + public Set getClusterAttributes() + throws YarnException, IOException { + return client.getClusterAttributes(); + } + + @Override + public Map> getAttributesToNodes( + Set attributes) throws YarnException, IOException { + return client.getAttributesToNodes(attributes); + } + + @Override + public Map> getNodeToAttributes( + Set hostNames) throws YarnException, IOException { + return client.getNodeToAttributes(hostNames); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java index f97d0a48f72..5972f65e961 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java @@ -82,8 +82,12 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAttributesToNodesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAttributesToNodesResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeAttributesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeAttributesResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeLabelsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeLabelsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; @@ -100,6 +104,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetNewReservationRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetNewReservationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToAttributesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToAttributesResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToLabelsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToLabelsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; @@ -521,6 +527,25 @@ public GetAllResourceTypeInfoResponse getResourceTypeInfo( throws YarnException, IOException { return null; } + + @Override + public GetAttributesToNodesResponse getAttributesToNodes( + GetAttributesToNodesRequest request) throws YarnException, IOException { + return null; + } + + @Override + public GetClusterNodeAttributesResponse getClusterNodeAttributes( + GetClusterNodeAttributesRequest request) + throws YarnException, IOException { + return null; + } + + @Override + public GetNodesToAttributesResponse getNodesToAttributes( + GetNodesToAttributesRequest request) throws YarnException, IOException { + return null; + } } class HistoryService extends AMService implements HSClientProtocol { diff --git a/hadoop-mapreduce-project/pom.xml b/hadoop-mapreduce-project/pom.xml index e75461ab666..fe8972908d3 100644 --- a/hadoop-mapreduce-project/pom.xml +++ b/hadoop-mapreduce-project/pom.xml @@ -25,7 +25,7 @@ 3.2.0-SNAPSHOT pom Apache Hadoop MapReduce - http://hadoop.apache.org/mapreduce/ + http://hadoop.apache.org/ true diff --git a/hadoop-ozone/acceptance-test/README.md b/hadoop-ozone/acceptance-test/README.md deleted file mode 100644 index 2714e0adcd2..00000000000 --- a/hadoop-ozone/acceptance-test/README.md +++ /dev/null @@ -1,48 +0,0 @@ - - -# Acceptance test suite for Ozone/Hdds - -This project contains acceptance tests for ozone/hdds using docker-compose and [robot framework](http://robotframework.org/). - -## Run - -To run the acceptance tests, please activate the `ozone-acceptance-test` profile and do a full build. - -``` -mvn clean install -Pdist -Phdds -cd hadoop-ozone/acceptance-test -mvn integration-test -Phdds,ozone-acceptance-test,dist -DskipTests -``` - -Notes: - - 1. You need a hadoop build in hadoop-dist/target directory. - 2. The `ozone-acceptance-test` could be activated with profile even if the unit tests are disabled. - 3. This method does not require the robot framework on path as jpython is used. - -## Development - -You can also run manually the robot tests with `robot` cli. - (See robotframework docs to install it: http://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#installation-instructions) - -In the dev-support directory we have two wrapper scripts to run robot framework with local robot cli -instead of calling it from maven. - -It's useful during the development of the robot files as any robotframework cli -arguments could be used. - - 1. `dev-support/bin/robot.sh` is the simple wrapper. The .robot file should be used as an argument. - 2. `dev-support/bin/robot-all.sh` will call the robot.sh with the main acceptance test directory, - which means all the acceptance tests will be executed. diff --git a/hadoop-ozone/acceptance-test/dev-support/bin/robot-dnd-all.sh b/hadoop-ozone/acceptance-test/dev-support/bin/robot-dnd-all.sh deleted file mode 100755 index 9f1d367141e..00000000000 --- a/hadoop-ozone/acceptance-test/dev-support/bin/robot-dnd-all.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -x - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -#Dir od the definition of the dind based test exeucution container -DOCKERDIR="$DIR/../docker" - -#Dir to save the results -TARGETDIR="$DIR/../../target/dnd" - -#Dir to mount the distribution from -OZONEDIST="$DIR/../../../../hadoop-dist/target/ozone" - -#Name and imagename of the temporary, dind based test containers -DOCKER_IMAGE_NAME=ozoneacceptance -DOCKER_INSTANCE_NAME="${DOCKER_INSTANCE_NAME:-ozoneacceptance}" - -teardown() { - docker stop "$DOCKER_INSTANCE_NAME" -} - -trap teardown EXIT - -#Make sure it will work even if the ozone is built by an other user. We -# eneable to run the distribution by an other user -mkdir -p "$TARGETDIR" -mkdir -p "$OZONEDIST/logs" -chmod o+w "$OZONEDIST/logs" || true -chmod -R o+w "$OZONEDIST/etc/hadoop" || true -chmod o+w "$OZONEDIST" || true - -rm "$TARGETDIR/docker-compose.log" -docker rm "$DOCKER_INSTANCE_NAME" || true -docker build -t "$DOCKER_IMAGE_NAME" $DIR/../docker - -#Starting the dind based environment -docker run --rm -v $DIR/../../../..:/opt/hadoop --privileged -d --name "$DOCKER_INSTANCE_NAME" $DOCKER_IMAGE_NAME -sleep 5 - -#Starting the tests -docker exec "$DOCKER_INSTANCE_NAME" /opt/hadoop/hadoop-ozone/acceptance-test/dev-support/bin/robot-all.sh -RESULT=$? - -docker cp "$DOCKER_INSTANCE_NAME:/root/log.html" "$TARGETDIR/" -docker cp "$DOCKER_INSTANCE_NAME:/root/junit-results.xml" "$TARGETDIR/" -docker cp "$DOCKER_INSTANCE_NAME:/root/docker-compose.log" "$TARGETDIR/" -exit $RESULT diff --git a/hadoop-ozone/acceptance-test/dev-support/bin/robot.sh b/hadoop-ozone/acceptance-test/dev-support/bin/robot.sh deleted file mode 100755 index ef2a1110668..00000000000 --- a/hadoop-ozone/acceptance-test/dev-support/bin/robot.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -x - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -if [ ! "$(which robot)" ] ; then - echo "" - echo "robot is not on your PATH." - echo "" - echo "Please install it according to the documentation:" - echo " http://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#installation-instructions" - echo " (TLDR; most of the time you need: 'pip install robotframework')" - exit -1 -fi - -OZONEDISTDIR="$DIR/../../../../hadoop-dist/target/ozone" -if [ ! -d "$OZONEDISTDIR" ]; then - echo "Ozone can't be found in the $OZONEDISTDIR." - echo "You may need a full build with -Phdds and -Pdist profiles" - exit -1 -fi -robot -x junit-results.xml "$@" diff --git a/hadoop-ozone/acceptance-test/pom.xml b/hadoop-ozone/acceptance-test/pom.xml deleted file mode 100644 index fee41f1d490..00000000000 --- a/hadoop-ozone/acceptance-test/pom.xml +++ /dev/null @@ -1,60 +0,0 @@ - - - - 4.0.0 - - org.apache.hadoop - hadoop-project - 3.2.0-SNAPSHOT - ../../hadoop-project - - hadoop-ozone-acceptance-test - 3.2.0-SNAPSHOT - Apache Hadoop Ozone Acceptance Tests - Apache Hadoop Ozone Acceptance Tests - pom - - - ozone-acceptance-test - - - - org.robotframework - robotframework-maven-plugin - 1.4.7 - - - - run - - - src/test/acceptance - - basedir:${project.basedir}/../.. - - false - false - - - - - - - - - diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/ozone-shell.robot b/hadoop-ozone/acceptance-test/src/test/acceptance/basic/ozone-shell.robot deleted file mode 100644 index f4be3e0f6a0..00000000000 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/basic/ozone-shell.robot +++ /dev/null @@ -1,85 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -*** Settings *** -Documentation Test ozone shell CLI usage -Library OperatingSystem -Suite Setup Startup Ozone cluster with size 5 -Suite Teardown Teardown Ozone cluster -Resource ../commonlib.robot -Test Timeout 2 minute - -*** Variables *** -${basedir} -${COMPOSEFILE} ${CURDIR}/docker-compose.yaml -${PROJECTDIR} ${CURDIR}/../../../../../.. - -*** Test Cases *** -RestClient without http port - Test ozone shell http:// ozoneManager restwoport True - -RestClient with http port - Test ozone shell http:// ozoneManager:9874 restwport True - -RestClient without host name - Test ozone shell http:// ${EMPTY} restwohost True - -RpcClient with port - Test ozone shell o3:// ozoneManager:9862 rpcwoport False - -RpcClient without host - Test ozone shell o3:// ${EMPTY} rpcwport False - -RpcClient without scheme - Test ozone shell ${EMPTY} ${EMPTY} rpcwoscheme False - - -*** Keywords *** -Test ozone shell - [arguments] ${protocol} ${server} ${volume} ${withkeytest} - ${result} = Execute on datanode ozone oz -createVolume ${protocol}${server}/${volume} -user bilbo -quota 100TB -root - Should not contain ${result} Failed - Should contain ${result} Creating Volume: ${volume} - ${result} = Execute on datanode ozone oz -listVolume o3://ozoneManager -user bilbo | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '.[] | select(.volumeName=="${volume}")' - Should contain ${result} createdOn - Execute on datanode ozone oz -updateVolume ${protocol}${server}/${volume} -user bill -quota 10TB - ${result} = Execute on datanode ozone oz -infoVolume ${protocol}${server}/${volume} | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.volumeName=="${volume}") | .owner | .name' - Should Be Equal ${result} bill - ${result} = Execute on datanode ozone oz -infoVolume ${protocol}${server}/${volume} | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.volumeName=="${volume}") | .quota | .size' - Should Be Equal ${result} 10 - Execute on datanode ozone oz -createBucket ${protocol}${server}/${volume}/bb1 - ${result} = Execute on datanode ozone oz -infoBucket ${protocol}${server}/${volume}/bb1 | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.bucketName=="bb1") | .storageType' - Should Be Equal ${result} DISK - ${result} = Execute on datanode ozone oz -updateBucket ${protocol}${server}/${volume}/bb1 -addAcl user:frodo:rw,group:samwise:r | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.bucketName=="bb1") | .acls | .[] | select(.name=="samwise") | .type' - Should Be Equal ${result} GROUP - ${result} = Execute on datanode ozone oz -updateBucket ${protocol}${server}/${volume}/bb1 -removeAcl group:samwise:r | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.bucketName=="bb1") | .acls | .[] | select(.name=="frodo") | .type' - Should Be Equal ${result} USER - ${result} = Execute on datanode ozone oz -listBucket o3://ozoneManager/${volume}/ | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '.[] | select(.bucketName=="bb1") | .volumeName' - Should Be Equal ${result} ${volume} - Run Keyword and Return If ${withkeytest} Test key handling ${protocol} ${server} ${volume} - Execute on datanode ozone oz -deleteBucket ${protocol}${server}/${volume}/bb1 - Execute on datanode ozone oz -deleteVolume ${protocol}${server}/${volume} -user bilbo - -Test key handling - [arguments] ${protocol} ${server} ${volume} - Execute on datanode ozone oz -putKey ${protocol}${server}/${volume}/bb1/key1 -file NOTICE.txt - Execute on datanode rm -f NOTICE.txt.1 - Execute on datanode ozone oz -getKey ${protocol}${server}/${volume}/bb1/key1 -file NOTICE.txt.1 - Execute on datanode ls -l NOTICE.txt.1 - ${result} = Execute on datanode ozone oz -infoKey ${protocol}${server}/${volume}/bb1/key1 | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '. | select(.keyName=="key1")' - Should contain ${result} createdOn - ${result} = Execute on datanode ozone oz -listKey o3://ozoneManager/${volume}/bb1 | grep -Ev 'Removed|WARN|DEBUG|ERROR|INFO|TRACE' | jq -r '.[] | select(.keyName=="key1") | .keyName' - Should Be Equal ${result} key1 - Execute on datanode ozone oz -deleteKey ${protocol}${server}/${volume}/bb1/key1 -v diff --git a/hadoop-ozone/acceptance-test/src/test/acceptance/commonlib.robot b/hadoop-ozone/acceptance-test/src/test/acceptance/commonlib.robot deleted file mode 100644 index 9235cd917ae..00000000000 --- a/hadoop-ozone/acceptance-test/src/test/acceptance/commonlib.robot +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -*** Keywords *** - -Startup Ozone cluster with size - [arguments] ${datanodeno} - ${rc} ${output} = Run docker compose down - Run echo "Starting new docker-compose environment" >> docker-compose.log - ${rc} ${output} = Run docker compose up -d - Should Be Equal As Integers ${rc} 0 - Wait Until Keyword Succeeds 1min 5sec Is Daemon started ozoneManager HTTP server of OZONEMANAGER is listening - Daemons are running without error - Scale datanodes up 5 - -Daemons are running without error - Is daemon running without error ozoneManager - Is daemon running without error scm - Is daemon running without error datanode - -Check if datanode is connected to the scm - Wait Until Keyword Succeeds 3min 5sec Have healthy datanodes 1 - -Scale it up to 5 datanodes - Scale datanodes up 5 - Wait Until Keyword Succeeds 3min 5sec Have healthy datanodes 5 - -Scale datanodes up - [arguments] ${datanodeno} - Run docker compose scale datanode=${datanodeno} - Wait Until Keyword Succeeds 3min 5sec Have healthy datanodes ${datanodeno} - -Teardown Ozone cluster - Run docker compose down - Run docker compose logs >> docker-compose.log - -Is daemon running without error - [arguments] ${name} - ${result} = Run docker ps - Should contain ${result} _${name}_1 - ${rc} ${result} = Run docker compose logs ${name} - Should not contain ${result} ERROR - -Is Daemon started - [arguments] ${name} ${expression} - ${rc} ${result} = Run docker compose logs - Should contain ${result} ${expression} - -Have healthy datanodes - [arguments] ${requirednodes} - ${result} = Execute on scm curl -s 'http://localhost:9876/jmx?qry=Hadoop:service=SCMNodeManager,name=SCMNodeManagerInfo' | jq -r '.beans[0].NodeCount[] | select(.key=="HEALTHY") | .value' - Should Be Equal ${result} ${requirednodes} - -Execute on - [arguments] ${componentname} ${command} - ${rc} ${return} = Run docker compose exec -T ${componentname} ${command} - [return] ${return} - -Run docker compose - [arguments] ${command} - Set Environment Variable COMPOSE_INTERACTIVE_NO_CLI 1 - Set Environment Variable OZONEDIR ${PROJECTDIR}/hadoop-dist/target/ozone - ${rc} ${output} = Run And Return Rc And Output docker-compose -f ${COMPOSEFILE} ${command} - Log ${output} - Should Be Equal As Integers ${rc} 0 - [return] ${rc} ${output} diff --git a/hadoop-ozone/client/pom.xml b/hadoop-ozone/client/pom.xml index fae630f5442..bbd075967e4 100644 --- a/hadoop-ozone/client/pom.xml +++ b/hadoop-ozone/client/pom.xml @@ -20,10 +20,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-ozone - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-ozone-client - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Ozone Client Apache Hadoop Ozone Client jar diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/ObjectStore.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/ObjectStore.java index c5f0689818d..b6005d18617 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/ObjectStore.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/ObjectStore.java @@ -18,7 +18,6 @@ package org.apache.hadoop.ozone.client; -import com.google.common.base.Preconditions; import com.google.common.base.Strings; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.scm.client.HddsClientUtils; @@ -158,9 +157,9 @@ public void deleteVolume(String volumeName) throws IOException { private OzoneVolume currentValue; /** - * Creates an Iterator to iterate over all volumes after prevVolume of the user. - * If prevVolume is null it iterates from the first volume. The returned volumes - * match volume prefix. + * Creates an Iterator to iterate over all volumes after + * prevVolume of the user. If prevVolume is null it iterates from the + * first volume. The returned volumes match volume prefix. * @param user user name * @param volPrefix volume prefix to match */ diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java index 2f3cff6c368..97bd682975b 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneBucket.java @@ -258,10 +258,10 @@ public OzoneInputStream readKey(String key) throws IOException { /** * Returns information about the key. * @param key Name of the key. - * @return OzoneKey Information about the key. + * @return OzoneKeyDetails Information about the key. * @throws IOException */ - public OzoneKey getKey(String key) throws IOException { + public OzoneKeyDetails getKey(String key) throws IOException { return proxy.getKeyDetails(volumeName, name, key); } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneClientUtils.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneClientUtils.java index 0aaee31ffb9..40e4d83113e 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneClientUtils.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneClientUtils.java @@ -17,13 +17,22 @@ */ package org.apache.hadoop.ozone.client; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.client.OzoneQuota; import org.apache.hadoop.hdds.scm.client.HddsClientUtils; +import org.apache.hadoop.hdds.scm.container.common.helpers.BlockNotCommittedException; +import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.client.rest.response.BucketInfo; -import org.apache.hadoop.ozone.client.rest.response.KeyInfo; -import org.apache.hadoop.ozone.client.rest.response.VolumeInfo; -import org.apache.hadoop.ozone.client.rest.response.VolumeOwner; +import org.apache.hadoop.ozone.client.rest.response.*; + +import java.util.ArrayList; +import java.util.List; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; /** A utility class for OzoneClient. */ public final class OzoneClientUtils { @@ -84,4 +93,44 @@ public static KeyInfo asKeyInfo(OzoneKey key) { keyInfo.setSize(key.getDataSize()); return keyInfo; } + + public static RetryPolicy createRetryPolicy(Configuration conf) { + int maxRetryCount = + conf.getInt(OzoneConfigKeys.OZONE_CLIENT_MAX_RETRIES, OzoneConfigKeys. + OZONE_CLIENT_MAX_RETRIES_DEFAULT); + long retryInterval = conf.getTimeDuration(OzoneConfigKeys. + OZONE_CLIENT_RETRY_INTERVAL, OzoneConfigKeys. + OZONE_CLIENT_RETRY_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + RetryPolicy basePolicy = RetryPolicies + .retryUpToMaximumCountWithFixedSleep(maxRetryCount, retryInterval, + TimeUnit.MILLISECONDS); + Map, RetryPolicy> exceptionToPolicyMap = + new HashMap, RetryPolicy>(); + exceptionToPolicyMap.put(BlockNotCommittedException.class, basePolicy); + RetryPolicy retryPolicy = RetryPolicies + .retryByException(RetryPolicies.TRY_ONCE_THEN_FAIL, + exceptionToPolicyMap); + return retryPolicy; + } + /** + * Returns a KeyInfoDetails object constructed using fields of the input + * OzoneKeyDetails object. + * + * @param key OzoneKeyDetails instance from which KeyInfo object needs to + * be created. + * @return KeyInfoDetails instance + */ + public static KeyInfoDetails asKeyInfoDetails(OzoneKeyDetails key) { + KeyInfoDetails keyInfo = new KeyInfoDetails(); + keyInfo.setKeyName(key.getName()); + keyInfo.setCreatedOn(HddsClientUtils.formatDateTime(key.getCreationTime())); + keyInfo.setModifiedOn( + HddsClientUtils.formatDateTime(key.getModificationTime())); + keyInfo.setSize(key.getDataSize()); + List keyLocations = new ArrayList<>(); + key.getOzoneKeyLocations().forEach((a) -> keyLocations.add(new KeyLocation( + a.getContainerID(), a.getLocalID(), a.getLength(), a.getOffset()))); + keyInfo.setKeyLocation(keyLocations); + return keyInfo; + } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneKeyDetails.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneKeyDetails.java new file mode 100644 index 00000000000..e7709ddfb89 --- /dev/null +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneKeyDetails.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.client; + +import java.util.List; + +/** + * A class that encapsulates OzoneKeyLocation. + */ +public class OzoneKeyDetails extends OzoneKey { + + /** + * A list of block location information to specify replica locations. + */ + private List ozoneKeyLocations; + + /** + * Constructs OzoneKeyDetails from OmKeyInfo. + */ + public OzoneKeyDetails(String volumeName, String bucketName, String keyName, + long size, long creationTime, long modificationTime, + List ozoneKeyLocations) { + super(volumeName, bucketName, keyName, size, creationTime, + modificationTime); + this.ozoneKeyLocations = ozoneKeyLocations; + } + + /** + * Returns the location detail information of the specific Key. + */ + public List getOzoneKeyLocations() { + return ozoneKeyLocations; + } + + /** + * Set details of key location. + * @param ozoneKeyLocations - details of key location + */ + public void setOzoneKeyLocations(List ozoneKeyLocations) { + this.ozoneKeyLocations = ozoneKeyLocations; + } +} diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneKeyLocation.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneKeyLocation.java new file mode 100644 index 00000000000..0ff8ba749b6 --- /dev/null +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneKeyLocation.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.client; + +/** + * One key can be stored in one or more containers as one or more blocks. + * This class represents one such block instance. + */ +public class OzoneKeyLocation { + /** + * Which container this key stored. + */ + private final long containerID; + /** + * Which block this key stored inside a container. + */ + private final long localID; + /** + * Data length of this key replica. + */ + private final long length; + /** + * Offset of this key. + */ + private final long offset; + + /** + * Constructs OzoneKeyLocation. + */ + public OzoneKeyLocation(long containerID, long localID, + long length, long offset) { + this.containerID = containerID; + this.localID = localID; + this.length = length; + this.offset = offset; + } + + /** + * Returns the containerID of this Key. + */ + public long getContainerID() { + return containerID; + } + + /** + * Returns the localID of this Key. + */ + public long getLocalID() { + return localID; + } + + /** + * Returns the length of this Key. + */ + public long getLength() { + return length; + } + + /** + * Returns the offset of this Key. + */ + public long getOffset() { + return offset; + } + +} diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupInputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupInputStream.java index b3a566e43f3..94966f66ef6 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupInputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupInputStream.java @@ -71,7 +71,7 @@ public synchronized int getCurrentStreamIndex() { } @VisibleForTesting - public long getRemainingOfIndex(int index) { + public long getRemainingOfIndex(int index) throws IOException { return streamEntries.get(index).getRemaining(); } @@ -115,19 +115,20 @@ public synchronized int read(byte[] b, int off, int len) throws IOException { return totalReadLen == 0 ? EOF : totalReadLen; } ChunkInputStreamEntry current = streamEntries.get(currentStreamIndex); - int readLen = Math.min(len, (int)current.getRemaining()); - int actualLen = current.read(b, off, readLen); - // this means the underlying stream has nothing at all, return - if (actualLen == EOF) { - return totalReadLen > 0 ? totalReadLen : EOF; + int numBytesToRead = Math.min(len, (int)current.getRemaining()); + int numBytesRead = current.read(b, off, numBytesToRead); + if (numBytesRead != numBytesToRead) { + // This implies that there is either data loss or corruption in the + // chunk entries. Even EOF in the current stream would be covered in + // this case. + throw new IOException(String.format( + "Inconsistent read for blockID=%s length=%d numBytesRead=%d", + current.chunkInputStream.getBlockID(), current.length, + numBytesRead)); } - totalReadLen += actualLen; - // this means there is no more data to read beyond this point, return - if (actualLen != readLen) { - return totalReadLen; - } - off += readLen; - len -= readLen; + totalReadLen += numBytesRead; + off += numBytesRead; + len -= numBytesRead; if (current.getRemaining() <= 0) { currentStreamIndex += 1; } @@ -206,31 +207,27 @@ public void close() throws IOException { private final ChunkInputStream chunkInputStream; private final long length; - private long currentPosition; public ChunkInputStreamEntry(ChunkInputStream chunkInputStream, long length) { this.chunkInputStream = chunkInputStream; this.length = length; - this.currentPosition = 0; } - synchronized long getRemaining() { - return length - currentPosition; + synchronized long getRemaining() throws IOException { + return length - getPos(); } @Override public synchronized int read(byte[] b, int off, int len) throws IOException { int readLen = chunkInputStream.read(b, off, len); - currentPosition += readLen; return readLen; } @Override public synchronized int read() throws IOException { int data = chunkInputStream.read(); - currentPosition += 1; return data; } @@ -282,10 +279,10 @@ public static LengthInputStream getFromOmKeyInfo( LOG.debug("get key accessing {} {}", containerID, containerKey); groupInputStream.streamOffset[i] = length; - ContainerProtos.KeyData containerKeyData = OzoneContainerTranslation - .containerKeyDataForRead(blockID); + ContainerProtos.DatanodeBlockID datanodeBlockID = blockID + .getDatanodeBlockIDProtobuf(); ContainerProtos.GetKeyResponseProto response = ContainerProtocolCalls - .getKey(xceiverClient, containerKeyData, requestId); + .getKey(xceiverClient, datanodeBlockID, requestId); List chunks = response.getKeyData().getChunksList(); for (ContainerProtos.ChunkInfo chunk : chunks) { diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupOutputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupOutputStream.java index 94433179f99..3742a9a5d18 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupOutputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ChunkGroupOutputStream.java @@ -24,9 +24,11 @@ import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ObjectStageChangeRequestProto; import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; @@ -45,9 +47,13 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InterruptedIOException; import java.io.OutputStream; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; +import java.util.Optional; +import java.util.ListIterator; /** * Maintaining a list of ChunkInputStream. Write based on offset. @@ -60,7 +66,7 @@ */ public class ChunkGroupOutputStream extends OutputStream { - private static final Logger LOG = + public static final Logger LOG = LoggerFactory.getLogger(ChunkGroupOutputStream.class); // array list's get(index) is O(1) @@ -71,12 +77,12 @@ private final StorageContainerLocationProtocolClientSideTranslatorPB scmClient; private final OmKeyArgs keyArgs; - private final int openID; + private final long openID; private final XceiverClientManager xceiverClientManager; private final int chunkSize; private final String requestID; private boolean closed; - + private final RetryPolicy retryPolicy; /** * A constructor for testing purpose only. */ @@ -91,6 +97,7 @@ public ChunkGroupOutputStream() { chunkSize = 0; requestID = null; closed = false; + retryPolicy = null; } /** @@ -101,7 +108,7 @@ public ChunkGroupOutputStream() { * @param length */ @VisibleForTesting - public synchronized void addStream(OutputStream outputStream, long length) { + public void addStream(OutputStream outputStream, long length) { streamEntries.add(new ChunkOutputStreamEntry(outputStream, length)); } @@ -110,12 +117,24 @@ public synchronized void addStream(OutputStream outputStream, long length) { return streamEntries; } + public List getLocationInfoList() { + List locationInfoList = new ArrayList<>(); + for (ChunkOutputStreamEntry streamEntry : streamEntries) { + OmKeyLocationInfo info = + new OmKeyLocationInfo.Builder().setBlockID(streamEntry.blockID) + .setShouldCreateContainer(false) + .setLength(streamEntry.currentPosition).setOffset(0).build(); + locationInfoList.add(info); + } + return locationInfoList; + } + public ChunkGroupOutputStream( OpenKeySession handler, XceiverClientManager xceiverClientManager, StorageContainerLocationProtocolClientSideTranslatorPB scmClient, OzoneManagerProtocolClientSideTranslatorPB omClient, int chunkSize, String requestId, ReplicationFactor factor, - ReplicationType type) throws IOException { + ReplicationType type, RetryPolicy retryPolicy) throws IOException { this.streamEntries = new ArrayList<>(); this.currentStreamIndex = 0; this.byteOffset = 0; @@ -133,6 +152,7 @@ public ChunkGroupOutputStream( this.xceiverClientManager = xceiverClientManager; this.chunkSize = chunkSize; this.requestID = requestId; + this.retryPolicy = retryPolicy; LOG.debug("Expecting open key with one block, but got" + info.getKeyLocationVersions().size()); } @@ -198,7 +218,6 @@ private void checkKeyLocationInfo(OmKeyLocationInfo subKeyInfo) chunkSize, subKeyInfo.getLength())); } - @VisibleForTesting public long getByteOffset() { return byteOffset; @@ -206,26 +225,10 @@ public long getByteOffset() { @Override - public synchronized void write(int b) throws IOException { - checkNotClosed(); - - if (streamEntries.size() <= currentStreamIndex) { - Preconditions.checkNotNull(omClient); - // allocate a new block, if a exception happens, log an error and - // throw exception to the caller directly, and the write fails. - try { - allocateNewBlock(currentStreamIndex); - } catch (IOException ioe) { - LOG.error("Allocate block fail when writing."); - throw ioe; - } - } - ChunkOutputStreamEntry entry = streamEntries.get(currentStreamIndex); - entry.write(b); - if (entry.getRemaining() <= 0) { - currentStreamIndex += 1; - } - byteOffset += 1; + public void write(int b) throws IOException { + byte[] buf = new byte[1]; + buf[0] = (byte) b; + write(buf, 0, 1); } /** @@ -241,10 +244,13 @@ public synchronized void write(int b) throws IOException { * @throws IOException */ @Override - public synchronized void write(byte[] b, int off, int len) + public void write(byte[] b, int off, int len) throws IOException { checkNotClosed(); + handleWrite(b, off, len); + } + private void handleWrite(byte[] b, int off, int len) throws IOException { if (b == null) { throw new NullPointerException(); } @@ -274,9 +280,20 @@ public synchronized void write(byte[] b, int off, int len) // still do a sanity check. Preconditions.checkArgument(currentStreamIndex < streamEntries.size()); ChunkOutputStreamEntry current = streamEntries.get(currentStreamIndex); - int writeLen = Math.min(len, (int)current.getRemaining()); - current.write(b, off, writeLen); + int writeLen = Math.min(len, (int) current.getRemaining()); + try { + current.write(b, off, writeLen); + } catch (IOException ioe) { + if (checkIfContainerIsClosed(ioe)) { + handleCloseContainerException(current, currentStreamIndex); + continue; + } else { + throw ioe; + } + } if (current.getRemaining() <= 0) { + // since the current block is already written close the stream. + handleFlushOrClose(true); currentStreamIndex += 1; } len -= writeLen; @@ -285,6 +302,166 @@ public synchronized void write(byte[] b, int off, int len) } } + private long getCommittedBlockLength(ChunkOutputStreamEntry streamEntry) + throws IOException { + long blockLength; + ContainerProtos.GetCommittedBlockLengthResponseProto responseProto; + RetryPolicy.RetryAction action; + int numRetries = 0; + while (true) { + try { + responseProto = ContainerProtocolCalls + .getCommittedBlockLength(streamEntry.xceiverClient, + streamEntry.blockID, requestID); + blockLength = responseProto.getBlockLength(); + return blockLength; + } catch (StorageContainerException sce) { + try { + action = retryPolicy.shouldRetry(sce, numRetries, 0, true); + } catch (Exception e) { + throw e instanceof IOException ? (IOException) e : new IOException(e); + } + if (action.action == RetryPolicy.RetryAction.RetryDecision.FAIL) { + if (action.reason != null) { + LOG.error( + "GetCommittedBlockLength request failed. " + action.reason, + sce); + } + throw sce; + } + + // Throw the exception if the thread is interrupted + if (Thread.currentThread().isInterrupted()) { + LOG.warn("Interrupted while trying for connection"); + throw sce; + } + Preconditions.checkArgument( + action.action == RetryPolicy.RetryAction.RetryDecision.RETRY); + try { + Thread.sleep(action.delayMillis); + } catch (InterruptedException e) { + throw (IOException) new InterruptedIOException( + "Interrupted: action=" + action + ", retry policy=" + retryPolicy) + .initCause(e); + } + numRetries++; + LOG.trace("Retrying GetCommittedBlockLength request. Already tried " + + numRetries + " time(s); retry policy is " + retryPolicy); + continue; + } + } + } + + /** + * Discards the subsequent pre allocated blocks and removes the streamEntries + * from the streamEntries list for the container which is closed. + * @param containerID id of the closed container + */ + private void discardPreallocatedBlocks(long containerID) { + // currentStreamIndex < streamEntries.size() signifies that, there are still + // pre allocated blocks available. + if (currentStreamIndex < streamEntries.size()) { + ListIterator streamEntryIterator = + streamEntries.listIterator(currentStreamIndex); + while (streamEntryIterator.hasNext()) { + if (streamEntryIterator.next().blockID.getContainerID() + == containerID) { + streamEntryIterator.remove(); + } + } + } + } + + /** + * It might be possible that the blocks pre allocated might never get written + * while the stream gets closed normally. In such cases, it would be a good + * idea to trim down the locationInfoList by removing the unused blocks if any + * so as only the used block info gets updated on OzoneManager during close. + */ + private void removeEmptyBlocks() { + if (currentStreamIndex < streamEntries.size()) { + ListIterator streamEntryIterator = + streamEntries.listIterator(currentStreamIndex); + while (streamEntryIterator.hasNext()) { + if (streamEntryIterator.next().currentPosition == 0) { + streamEntryIterator.remove(); + } + } + } + } + /** + * It performs following actions : + * a. Updates the committed length at datanode for the current stream in + * datanode. + * b. Reads the data from the underlying buffer and writes it the next stream. + * + * @param streamEntry StreamEntry + * @param streamIndex Index of the entry + * @throws IOException Throws IOexception if Write fails + */ + private void handleCloseContainerException(ChunkOutputStreamEntry streamEntry, + int streamIndex) throws IOException { + long committedLength = 0; + ByteBuffer buffer = streamEntry.getBuffer(); + if (buffer == null) { + // the buffer here will be null only when closeContainerException is + // hit while calling putKey during close on chunkOutputStream. + // Since closeContainer auto commit pending keys, no need to do + // anything here. + return; + } + + // In case where not a single chunk of data has been written to the Datanode + // yet. This block does not yet exist on the datanode but cached on the + // outputStream buffer. No need to call GetCommittedBlockLength here + // for this block associated with the stream here. + if (streamEntry.currentPosition >= chunkSize + || streamEntry.currentPosition != buffer.position()) { + committedLength = getCommittedBlockLength(streamEntry); + // update the length of the current stream + streamEntry.currentPosition = committedLength; + } + + if (buffer.position() > 0) { + // If the data is still cached in the underlying stream, we need to + // allocate new block and write this data in the datanode. The cached + // data in the buffer does not exceed chunkSize. + Preconditions.checkState(buffer.position() < chunkSize); + currentStreamIndex += 1; + // readjust the byteOffset value to the length actually been written. + byteOffset -= buffer.position(); + handleWrite(buffer.array(), 0, buffer.position()); + } + + // just clean up the current stream. Since the container is already closed, + // it will be auto committed. No need to call close again here. + streamEntry.cleanup(); + // This case will arise when while writing the first chunk itself fails. + // In such case, the current block associated with the stream has no data + // written. Remove it from the current stream list. + if (committedLength == 0) { + streamEntries.remove(streamIndex); + Preconditions.checkArgument(currentStreamIndex != 0); + currentStreamIndex -= 1; + } + // discard subsequent pre allocated blocks from the streamEntries list + // from the closed container + discardPreallocatedBlocks(streamEntry.blockID.getContainerID()); + } + + private boolean checkIfContainerIsClosed(IOException ioe) { + return Optional.of(ioe.getCause()) + .filter(e -> e instanceof StorageContainerException) + .map(e -> (StorageContainerException) e) + .filter(sce -> sce.getResult() == Result.CLOSED_CONTAINER_IO) + .isPresent(); + } + + private long getKeyLength() { + return streamEntries.parallelStream().mapToLong(e -> e.currentPosition) + .sum(); + } + /** * Contact OM to get a new block. Set the new block with the index (e.g. * first block has index = 0, second has index = 1 etc.) @@ -300,13 +477,43 @@ private void allocateNewBlock(int index) throws IOException { } @Override - public synchronized void flush() throws IOException { + public void flush() throws IOException { checkNotClosed(); + handleFlushOrClose(false); + } + + /** + * Close or Flush the latest outputStream. + * @param close Flag which decides whether to call close or flush on the + * outputStream. + * @throws IOException In case, flush or close fails with exception. + */ + private void handleFlushOrClose(boolean close) throws IOException { if (streamEntries.size() == 0) { return; } - for (int i = 0; i <= currentStreamIndex; i++) { - streamEntries.get(i).flush(); + int size = streamEntries.size(); + int streamIndex = + currentStreamIndex >= size ? size - 1 : currentStreamIndex; + ChunkOutputStreamEntry entry = streamEntries.get(streamIndex); + if (entry != null) { + try { + if (close) { + entry.close(); + } else { + entry.flush(); + } + } catch (IOException ioe) { + if (checkIfContainerIsClosed(ioe)) { + // This call will allocate a new streamEntry and write the Data. + // Close needs to be retried on the newly allocated streamEntry as + // as well. + handleCloseContainerException(entry, streamIndex); + handleFlushOrClose(close); + } else { + throw ioe; + } + } } } @@ -316,19 +523,18 @@ public synchronized void flush() throws IOException { * @throws IOException */ @Override - public synchronized void close() throws IOException { + public void close() throws IOException { if (closed) { return; } closed = true; - for (ChunkOutputStreamEntry entry : streamEntries) { - if (entry != null) { - entry.close(); - } - } + handleFlushOrClose(true); if (keyArgs != null) { // in test, this could be null + removeEmptyBlocks(); + Preconditions.checkState(byteOffset == getKeyLength()); keyArgs.setDataSize(byteOffset); + keyArgs.setLocationInfoList(getLocationInfoList()); omClient.commitKey(keyArgs, openID); } else { LOG.warn("Closing ChunkGroupOutputStream, but key args is null"); @@ -347,6 +553,7 @@ public synchronized void close() throws IOException { private String requestID; private ReplicationType type; private ReplicationFactor factor; + private RetryPolicy retryPolicy; public Builder setHandler(OpenKeySession handler) { this.openHandler = handler; @@ -392,8 +599,14 @@ public Builder setFactor(ReplicationFactor replicationFactor) { public ChunkGroupOutputStream build() throws IOException { return new ChunkGroupOutputStream(openHandler, xceiverManager, scmClient, - omClient, chunkSize, requestID, factor, type); + omClient, chunkSize, requestID, factor, type, retryPolicy); } + + public Builder setRetryPolicy(RetryPolicy rPolicy) { + this.retryPolicy = rPolicy; + return this; + } + } private static class ChunkOutputStreamEntry extends OutputStream { @@ -451,7 +664,7 @@ long getRemaining() { return length - currentPosition; } - private synchronized void checkStream() { + private void checkStream() { if (this.outputStream == null) { this.outputStream = new ChunkOutputStream(blockID, key, xceiverClientManager, xceiverClient, @@ -486,6 +699,23 @@ public void close() throws IOException { this.outputStream.close(); } } + + ByteBuffer getBuffer() throws IOException { + if (this.outputStream instanceof ChunkOutputStream) { + ChunkOutputStream out = (ChunkOutputStream) this.outputStream; + return out.getBuffer(); + } + throw new IOException("Invalid Output Stream for Key: " + key); + } + + public void cleanup() { + checkStream(); + if (this.outputStream instanceof ChunkOutputStream) { + ChunkOutputStream out = (ChunkOutputStream) this.outputStream; + out.cleanup(); + } + } + } /** diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneInputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneInputStream.java index c2ff979a2d5..e1f65e69a86 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneInputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/OzoneInputStream.java @@ -44,6 +44,11 @@ public int read() throws IOException { return inputStream.read(); } + @Override + public int read(byte[] b, int off, int len) throws IOException { + return inputStream.read(b, off, len); + } + @Override public synchronized void close() throws IOException { inputStream.close(); diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java index 94cc257e1ae..008b69d2d16 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java @@ -20,14 +20,10 @@ import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.ozone.OzoneAcl; -import org.apache.hadoop.ozone.client.BucketArgs; -import org.apache.hadoop.ozone.client.OzoneBucket; -import org.apache.hadoop.ozone.client.OzoneKey; +import org.apache.hadoop.ozone.client.*; import org.apache.hadoop.hdds.client.OzoneQuota; -import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; -import org.apache.hadoop.ozone.client.VolumeArgs; import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; @@ -321,8 +317,8 @@ void renameKey(String volumeName, String bucketName, String fromKeyName, * @return {@link OzoneKey} * @throws IOException */ - OzoneKey getKeyDetails(String volumeName, String bucketName, - String keyName) + OzoneKeyDetails getKeyDetails(String volumeName, String bucketName, + String keyName) throws IOException; /** diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rest/RestClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rest/RestClient.java index 78fbe8d1f7d..fdd049a5271 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rest/RestClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rest/RestClient.java @@ -30,12 +30,8 @@ import org.apache.hadoop.ozone.OzoneAcl; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.client.BucketArgs; -import org.apache.hadoop.ozone.client.VolumeArgs; -import org.apache.hadoop.ozone.client.OzoneBucket; -import org.apache.hadoop.ozone.client.OzoneKey; +import org.apache.hadoop.ozone.client.*; import org.apache.hadoop.hdds.client.OzoneQuota; -import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.ozone.client.io.OzoneInputStream; @@ -43,7 +39,7 @@ import org.apache.hadoop.ozone.client.protocol.ClientProtocol; import org.apache.hadoop.ozone.client.rest.headers.Header; import org.apache.hadoop.ozone.client.rest.response.BucketInfo; -import org.apache.hadoop.ozone.client.rest.response.KeyInfo; +import org.apache.hadoop.ozone.client.rest.response.KeyInfoDetails; import org.apache.hadoop.ozone.client.rest.response.VolumeInfo; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.helpers.ServiceInfo; @@ -80,6 +76,7 @@ import java.net.URI; import java.net.URISyntaxException; import java.text.ParseException; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.concurrent.FutureTask; @@ -788,7 +785,7 @@ public void renameKey(String volumeName, String bucketName, } @Override - public OzoneKey getKeyDetails( + public OzoneKeyDetails getKeyDetails( String volumeName, String bucketName, String keyName) throws IOException { try { @@ -798,18 +795,24 @@ public OzoneKey getKeyDetails( builder.setPath(PATH_SEPARATOR + volumeName + PATH_SEPARATOR + bucketName + PATH_SEPARATOR + keyName); builder.setParameter(Header.OZONE_INFO_QUERY_TAG, - Header.OZONE_INFO_QUERY_KEY); + Header.OZONE_INFO_QUERY_KEY_DETAIL); HttpGet httpGet = new HttpGet(builder.build()); addOzoneHeaders(httpGet); HttpEntity response = executeHttpRequest(httpGet); - KeyInfo keyInfo = - KeyInfo.parse(EntityUtils.toString(response)); - OzoneKey key = new OzoneKey(volumeName, + KeyInfoDetails keyInfo = + KeyInfoDetails.parse(EntityUtils.toString(response)); + + List ozoneKeyLocations = new ArrayList<>(); + keyInfo.getKeyLocations().forEach((a) -> ozoneKeyLocations.add( + new OzoneKeyLocation(a.getContainerID(), a.getLocalID(), + a.getLength(), a.getOffset()))); + OzoneKeyDetails key = new OzoneKeyDetails(volumeName, bucketName, keyInfo.getKeyName(), keyInfo.getSize(), HddsClientUtils.formatDateTime(keyInfo.getCreatedOn()), - HddsClientUtils.formatDateTime(keyInfo.getModifiedOn())); + HddsClientUtils.formatDateTime(keyInfo.getModifiedOn()), + ozoneKeyLocations); EntityUtils.consume(response); return key; } catch (URISyntaxException | ParseException e) { diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java index fc705144532..330eba878bf 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java @@ -24,19 +24,18 @@ import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdds.scm.client.HddsClientUtils; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.ipc.Client; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.client.BucketArgs; -import org.apache.hadoop.ozone.client.OzoneBucket; -import org.apache.hadoop.ozone.client.OzoneKey; +import org.apache.hadoop.ozone.client.*; import org.apache.hadoop.hdds.client.OzoneQuota; -import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.ozone.client.VolumeArgs; +import org.apache.hadoop.ozone.client.OzoneClientUtils; import org.apache.hadoop.ozone.client.io.ChunkGroupInputStream; import org.apache.hadoop.ozone.client.io.ChunkGroupOutputStream; import org.apache.hadoop.ozone.client.io.LengthInputStream; @@ -71,10 +70,7 @@ import java.io.IOException; import java.net.InetSocketAddress; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.UUID; +import java.util.*; import java.util.stream.Collectors; /** @@ -97,6 +93,7 @@ private final UserGroupInformation ugi; private final OzoneAcl.OzoneACLRights userRights; private final OzoneAcl.OzoneACLRights groupRights; + private final RetryPolicy retryPolicy; /** * Creates RpcClient instance with the given configuration. @@ -137,6 +134,7 @@ public RpcClient(Configuration conf) throws IOException { Client.getRpcTimeout(conf))); this.xceiverClientManager = new XceiverClientManager(conf); + retryPolicy = OzoneClientUtils.createRetryPolicy(conf); int configuredChunkSize = conf.getInt( ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY, @@ -469,6 +467,7 @@ public OzoneOutputStream createKey( .setRequestID(requestId) .setType(HddsProtos.ReplicationType.valueOf(type.toString())) .setFactor(HddsProtos.ReplicationFactor.valueOf(factor.getValue())) + .setRetryPolicy(retryPolicy) .build(); groupOutputStream.addPreallocateBlocks( openKey.getKeyInfo().getLatestVersionLocations(), @@ -493,8 +492,7 @@ public OzoneInputStream getKey( ChunkGroupInputStream.getFromOmKeyInfo( keyInfo, xceiverClientManager, storageContainerLocationClient, requestId); - return new OzoneInputStream( - (ChunkGroupInputStream)lengthInputStream.getWrappedStream()); + return new OzoneInputStream(lengthInputStream.getWrappedStream()); } @Override @@ -543,7 +541,7 @@ public void renameKey(String volumeName, String bucketName, } @Override - public OzoneKey getKeyDetails( + public OzoneKeyDetails getKeyDetails( String volumeName, String bucketName, String keyName) throws IOException { Preconditions.checkNotNull(volumeName); @@ -555,12 +553,18 @@ public OzoneKey getKeyDetails( .setKeyName(keyName) .build(); OmKeyInfo keyInfo = ozoneManagerClient.lookupKey(keyArgs); - return new OzoneKey(keyInfo.getVolumeName(), + + List ozoneKeyLocations = new ArrayList<>(); + keyInfo.getLatestVersionLocations().getBlocksLatestVersionOnly().forEach( + (a) -> ozoneKeyLocations.add(new OzoneKeyLocation(a.getContainerID(), + a.getLocalID(), a.getLength(), a.getOffset()))); + return new OzoneKeyDetails(keyInfo.getVolumeName(), keyInfo.getBucketName(), keyInfo.getKeyName(), keyInfo.getDataSize(), keyInfo.getCreationTime(), - keyInfo.getModificationTime()); + keyInfo.getModificationTime(), + ozoneKeyLocations); } @Override diff --git a/hadoop-ozone/common/pom.xml b/hadoop-ozone/common/pom.xml index 83d023e9fdb..37a2cb962bd 100644 --- a/hadoop-ozone/common/pom.xml +++ b/hadoop-ozone/common/pom.xml @@ -20,10 +20,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-ozone - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-ozone-common - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Ozone Common Apache Hadoop Ozone Common jar @@ -38,11 +38,43 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + + ${basedir}/src/main/resources + + ozone-version-info.properties + + false + + + ${basedir}/src/main/resources + + ozone-version-info.properties + + true + + org.apache.hadoop hadoop-maven-plugins + + version-info + generate-resources + + version-info + + + + ${basedir}/../ + + */src/main/java/**/*.java + */src/main/proto/*.proto + + + + compile-protoc @@ -85,4 +117,4 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> - \ No newline at end of file + diff --git a/hadoop-ozone/common/src/main/bin/ozone b/hadoop-ozone/common/src/main/bin/ozone index 9495eff0a9f..6bf8b012f2b 100755 --- a/hadoop-ozone/common/src/main/bin/ozone +++ b/hadoop-ozone/common/src/main/bin/ozone @@ -28,25 +28,25 @@ function hadoop_usage hadoop_add_option "--buildpaths" "attempt to add class files from build tree" hadoop_add_option "--daemon (start|status|stop)" "operate on a daemon" hadoop_add_option "--hostnames list[,of,host,names]" "hosts to use in worker mode" - hadoop_add_option "--loglevel level" "set the log4j level for this command" hadoop_add_option "--hosts filename" "list of hosts to use in worker mode" + hadoop_add_option "--loglevel level" "set the log4j level for this command" hadoop_add_option "--workers" "turn on worker mode" - hadoop_add_subcommand "classpath" client "prints the class path needed to get the hadoop jar and the required libraries" hadoop_add_subcommand "datanode" daemon "run a HDDS datanode" hadoop_add_subcommand "envvars" client "display computed Hadoop environment variables" hadoop_add_subcommand "freon" client "runs an ozone data generator" + hadoop_add_subcommand "fs" client "run a filesystem command on Ozone file system. Equivalent to 'hadoop fs'" + hadoop_add_subcommand "genconf" client "generate minimally required ozone configs and output to ozone-site.xml in specified path" hadoop_add_subcommand "genesis" client "runs a collection of ozone benchmarks to help with tuning." hadoop_add_subcommand "getozoneconf" client "get ozone config values from configuration" hadoop_add_subcommand "jmxget" admin "get JMX exported values from NameNode or DataNode." - hadoop_add_subcommand "om" daemon "Ozone Manager" - hadoop_add_subcommand "o3" client "command line interface for ozone" hadoop_add_subcommand "noz" client "ozone debug tool, convert ozone metadata into relational data" + hadoop_add_subcommand "om" daemon "Ozone Manager" hadoop_add_subcommand "scm" daemon "run the Storage Container Manager service" hadoop_add_subcommand "scmcli" client "run the CLI of the Storage Container Manager" + hadoop_add_subcommand "sh" client "command line interface for object store operations" hadoop_add_subcommand "version" client "print the version" - hadoop_add_subcommand "genconf" client "generate minimally required ozone configs and output to ozone-site.xml in specified path" hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" false } @@ -96,8 +96,10 @@ function ozonecmd_case om) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME=org.apache.hadoop.ozone.om.OzoneManager + HDFS_OM_OPTS="${HDFS_OM_OPTS} -Dlog4j.configurationFile=${HADOOP_CONF_DIR}/om-audit-log4j2.properties" + HADOOP_OPTS="${HADOOP_OPTS} ${HDFS_OM_OPTS}" ;; - oz) + sh | shell) HADOOP_CLASSNAME=org.apache.hadoop.ozone.web.ozShell.Shell ;; noz) @@ -109,11 +111,14 @@ function ozonecmd_case hadoop_debug "Appending HDFS_STORAGECONTAINERMANAGER_OPTS onto HADOOP_OPTS" HADOOP_OPTS="${HADOOP_OPTS} ${HDFS_STORAGECONTAINERMANAGER_OPTS}" ;; + fs) + HADOOP_CLASSNAME=org.apache.hadoop.fs.FsShell + ;; scmcli) - HADOOP_CLASSNAME=org.apache.hadoop.ozone.scm.cli.SCMCLI + HADOOP_CLASSNAME=org.apache.hadoop.hdds.scm.cli.SCMCLI ;; version) - HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo + HADOOP_CLASSNAME=org.apache.hadoop.ozone.util.OzoneVersionInfo ;; genconf) HADOOP_CLASSNAME=org.apache.hadoop.ozone.genconf.GenerateOzoneRequiredConfigurations @@ -138,11 +143,11 @@ fi HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}" # shellcheck disable=SC2034 HADOOP_NEW_CONFIG=true -if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then - # shellcheck source=./hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh - . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" +if [[ -f "${HADOOP_LIBEXEC_DIR}/ozone-config.sh" ]]; then + # shellcheck source=./hadoop-ozone/common/src/main/bin/ozone-config.sh + . "${HADOOP_LIBEXEC_DIR}/ozone-config.sh" else - echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1 + echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/ozone-config.sh." 2>&1 exit 1 fi diff --git a/hadoop-ozone/common/src/main/bin/ozone-config.sh b/hadoop-ozone/common/src/main/bin/ozone-config.sh new file mode 100755 index 00000000000..d179a331ae9 --- /dev/null +++ b/hadoop-ozone/common/src/main/bin/ozone-config.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# included in all the ozone scripts with source command +# should not be executed directly + +function hadoop_subproject_init +{ + if [[ -z "${HADOOP_OZONE_ENV_PROCESSED}" ]]; then + if [[ -e "${HADOOP_CONF_DIR}/ozone-env.sh" ]]; then + . "${HADOOP_CONF_DIR}/ozone-env.sh" + export HADOOP_OZONE_ENV_PROCESSED=true + fi + fi + HADOOP_OZONE_HOME="${HADOOP_OZONE_HOME:-$HADOOP_HOME}" + +} + +if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then + _hd_this="${BASH_SOURCE-$0}" + HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hd_this}")" >/dev/null && pwd -P) +fi + +# shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh + +if [[ -n "${HADOOP_COMMON_HOME}" ]] && + [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then + . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" +elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then + . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" +elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then + . "${HADOOP_HOME}/libexec/hadoop-config.sh" +else + echo "ERROR: Hadoop common not found." 2>&1 + exit 1 +fi + diff --git a/hadoop-ozone/common/src/main/bin/start-ozone.sh b/hadoop-ozone/common/src/main/bin/start-ozone.sh old mode 100644 new mode 100755 index 29c36743369..cfb54e03325 --- a/hadoop-ozone/common/src/main/bin/start-ozone.sh +++ b/hadoop-ozone/common/src/main/bin/start-ozone.sh @@ -39,11 +39,11 @@ fi HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}" # shellcheck disable=SC2034 HADOOP_NEW_CONFIG=true -if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then +if [[ -f "${HADOOP_LIBEXEC_DIR}/ozone-config.sh" ]]; then # shellcheck disable=SC1090 - . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" + . "${HADOOP_LIBEXEC_DIR}/ozone-config.sh" else - echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1 + echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/ozone-config.sh." 2>&1 exit 1 fi @@ -83,28 +83,6 @@ if [[ "${OZONE_ENABLED}" != "true" ]]; then exit -1 fi -#--------------------------------------------------------- -# Start hdfs before starting ozone daemons - -#--------------------------------------------------------- -# namenodes - -NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -namenodes 2>/dev/null) - -if [[ -z "${NAMENODES}" ]]; then - NAMENODES=$(hostname) -fi - -echo "Starting namenodes on [${NAMENODES}]" -hadoop_uservar_su hdfs namenode "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --workers \ - --config "${HADOOP_CONF_DIR}" \ - --hostnames "${NAMENODES}" \ - --daemon start \ - namenode ${nameStartOpt} - -HADOOP_JUMBO_RETCOUNTER=$? - #--------------------------------------------------------- # datanodes (using default workers file) @@ -116,68 +94,6 @@ hadoop_uservar_su hdfs datanode "${HADOOP_HDFS_HOME}/bin/ozone" \ datanode ${dataStartOpt} (( HADOOP_JUMBO_RETCOUNTER=HADOOP_JUMBO_RETCOUNTER + $? )) -#--------------------------------------------------------- -# secondary namenodes (if any) - -SECONDARY_NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -secondarynamenodes 2>/dev/null) - -if [[ -n "${SECONDARY_NAMENODES}" ]]; then - - if [[ "${NAMENODES}" =~ , ]]; then - - hadoop_error "WARNING: Highly available NameNode is configured." - hadoop_error "WARNING: Skipping SecondaryNameNode." - - else - - if [[ "${SECONDARY_NAMENODES}" == "0.0.0.0" ]]; then - SECONDARY_NAMENODES=$(hostname) - fi - - echo "Starting secondary namenodes [${SECONDARY_NAMENODES}]" - - hadoop_uservar_su hdfs secondarynamenode "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --workers \ - --config "${HADOOP_CONF_DIR}" \ - --hostnames "${SECONDARY_NAMENODES}" \ - --daemon start \ - secondarynamenode - (( HADOOP_JUMBO_RETCOUNTER=HADOOP_JUMBO_RETCOUNTER + $? )) - fi -fi - -#--------------------------------------------------------- -# quorumjournal nodes (if any) - -JOURNAL_NODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -journalNodes 2>&-) - -if [[ "${#JOURNAL_NODES}" != 0 ]]; then - echo "Starting journal nodes [${JOURNAL_NODES}]" - - hadoop_uservar_su hdfs journalnode "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --workers \ - --config "${HADOOP_CONF_DIR}" \ - --hostnames "${JOURNAL_NODES}" \ - --daemon start \ - journalnode - (( HADOOP_JUMBO_RETCOUNTER=HADOOP_JUMBO_RETCOUNTER + $? )) -fi - -#--------------------------------------------------------- -# ZK Failover controllers, if auto-HA is enabled -AUTOHA_ENABLED=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.ha.automatic-failover.enabled | tr '[:upper:]' '[:lower:]') -if [[ "${AUTOHA_ENABLED}" = "true" ]]; then - echo "Starting ZK Failover Controllers on NN hosts [${NAMENODES}]" - - hadoop_uservar_su hdfs zkfc "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --workers \ - --config "${HADOOP_CONF_DIR}" \ - --hostnames "${NAMENODES}" \ - --daemon start \ - zkfc - (( HADOOP_JUMBO_RETCOUNTER=HADOOP_JUMBO_RETCOUNTER + $? )) -fi - #--------------------------------------------------------- # Ozone ozonemanager nodes OM_NODES=$("${HADOOP_HDFS_HOME}/bin/ozone" getozoneconf -ozonemanagers 2>/dev/null) diff --git a/hadoop-ozone/common/src/main/bin/stop-ozone.sh b/hadoop-ozone/common/src/main/bin/stop-ozone.sh old mode 100644 new mode 100755 index 5f5faf0153a..97e1df4df36 --- a/hadoop-ozone/common/src/main/bin/stop-ozone.sh +++ b/hadoop-ozone/common/src/main/bin/stop-ozone.sh @@ -39,11 +39,11 @@ fi HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}" # shellcheck disable=SC2034 HADOOP_NEW_CONFIG=true -if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then +if [[ -f "${HADOOP_LIBEXEC_DIR}/ozone-config.sh" ]]; then # shellcheck disable=SC1090 - . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" + . "${HADOOP_LIBEXEC_DIR}/ozone-config.sh" else - echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1 + echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/ozone-config.sh." 2>&1 exit 1 fi @@ -64,13 +64,15 @@ if [[ "${OZONE_ENABLED}" != "true" ]]; then fi #--------------------------------------------------------- -# Start hdfs before starting ozone daemons -if [[ -f "${bin}/stop-dfs.sh" ]]; then - "${bin}/stop-dfs.sh" -else - echo "ERROR: Cannot execute ${bin}/stop-dfs.sh." 2>&1 - exit 1 -fi +# datanodes (using default workers file) + +echo "Stopping datanodes" + +hadoop_uservar_su ozone datanode "${HADOOP_HDFS_HOME}/bin/ozone" \ + --workers \ + --config "${HADOOP_CONF_DIR}" \ + --daemon stop \ + datanode #--------------------------------------------------------- # Ozone Manager nodes diff --git a/hadoop-ozone/common/src/main/conf/om-audit-log4j2.properties b/hadoop-ozone/common/src/main/conf/om-audit-log4j2.properties new file mode 100644 index 00000000000..7d097a081a2 --- /dev/null +++ b/hadoop-ozone/common/src/main/conf/om-audit-log4j2.properties @@ -0,0 +1,86 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with this +# work for additional information regarding copyright ownership. The ASF +# licenses this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +#

+# http://www.apache.org/licenses/LICENSE-2.0 +#

+# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS,WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# +name=PropertiesConfig + +# Checks for config change periodically and reloads +monitorInterval=30 + +filter=read,write +# filter.read.onMatch=DENY avoids logging all READ events +# filter.read.onMatch=ACCEPT permits logging all READ events +# The above two settings ignore the log levels in configuration +# filter.read.onMatch=NEUTRAL permits logging of only those READ events +# which are attempted at log level equal or greater than log level specified +# in the configuration +filter.read.type=MarkerFilter +filter.read.marker=READ +filter.read.onMatch=DENY +filter.read.onMismatch=NEUTRAL + +# filter.write.onMatch=DENY avoids logging all WRITE events +# filter.write.onMatch=ACCEPT permits logging all WRITE events +# The above two settings ignore the log levels in configuration +# filter.write.onMatch=NEUTRAL permits logging of only those WRITE events +# which are attempted at log level equal or greater than log level specified +# in the configuration +filter.write.type=MarkerFilter +filter.write.marker=WRITE +filter.write.onMatch=NEUTRAL +filter.write.onMismatch=NEUTRAL + +# Log Levels are organized from most specific to least: +# OFF (most specific, no logging) +# FATAL (most specific, little data) +# ERROR +# WARN +# INFO +# DEBUG +# TRACE (least specific, a lot of data) +# ALL (least specific, all data) + +appenders=console, rolling +appender.console.type=Console +appender.console.name=STDOUT +appender.console.layout.type=PatternLayout +appender.console.layout.pattern=%d{DEFAULT} | %-5level | %c{1} | %msg | %throwable{3} %n + +#Rolling File Appender with size & time thresholds. +#Rolling is triggered when either threshold is breached. +#The rolled over file is compressed by default +#Time interval is specified in seconds 86400s=1 day +appender.rolling.type=RollingFile +appender.rolling.name=RollingFile +appender.rolling.fileName =${sys:hadoop.log.dir}/om-audit-${hostName}.log +appender.rolling.filePattern=${sys:hadoop.log.dir}/om-audit-${hostName}-%d{yyyy-MM-dd-HH-mm-ss}-%i.log.gz +appender.rolling.layout.type=PatternLayout +appender.rolling.layout.pattern=%d{DEFAULT} | %-5level | %c{1} | %msg | %throwable{3} %n +appender.rolling.policies.type=Policies +appender.rolling.policies.time.type=TimeBasedTriggeringPolicy +appender.rolling.policies.time.interval=86400 +appender.rolling.policies.size.type=SizeBasedTriggeringPolicy +appender.rolling.policies.size.size=64MB + +loggers=audit +logger.audit.type=AsyncLogger +logger.audit.name=OMAudit +logger.audit.level=INFO +logger.audit.appenderRefs=rolling +logger.audit.appenderRef.file.ref=RollingFile + +rootLogger.level=INFO +rootLogger.appenderRefs=stdout +rootLogger.appenderRef.stdout.ref=STDOUT diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/audit/OMAction.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/audit/OMAction.java index d780ea2c93b..a0ae455303e 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/audit/OMAction.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/audit/OMAction.java @@ -18,24 +18,34 @@ package org.apache.hadoop.ozone.audit; /** - * Enum to define OM Action types for Audit. + * Enum to define Audit Action types for OzoneManager. */ public enum OMAction implements AuditAction { + // WRITE Actions + ALLOCATE_BLOCK("ALLOCATE_BLOCK"), + ALLOCATE_KEY("ALLOCATE_KEY"), + COMMIT_KEY("COMMIT_KEY"), CREATE_VOLUME("CREATE_VOLUME"), CREATE_BUCKET("CREATE_BUCKET"), CREATE_KEY("CREATE_KEY"), - READ_VOLUME("READ_VOLUME"), - READ_BUCKET("READ_BUCKET"), - READ_KEY("READ_BUCKET"), - UPDATE_VOLUME("UPDATE_VOLUME"), - UPDATE_BUCKET("UPDATE_BUCKET"), - UPDATE_KEY("UPDATE_KEY"), DELETE_VOLUME("DELETE_VOLUME"), DELETE_BUCKET("DELETE_BUCKET"), DELETE_KEY("DELETE_KEY"), + RENAME_KEY("RENAME_KEY"), SET_OWNER("SET_OWNER"), - SET_QUOTA("SET_QUOTA"); + SET_QUOTA("SET_QUOTA"), + UPDATE_VOLUME("UPDATE_VOLUME"), + UPDATE_BUCKET("UPDATE_BUCKET"), + UPDATE_KEY("UPDATE_KEY"), + // READ Actions + CHECK_VOLUME_ACCESS("CHECK_VOLUME_ACCESS"), + LIST_BUCKETS("LIST_BUCKETS"), + LIST_VOLUMES("LIST_VOLUMES"), + LIST_KEYS("LIST_KEYS"), + READ_VOLUME("READ_VOLUME"), + READ_BUCKET("READ_BUCKET"), + READ_KEY("READ_BUCKET"); private String action; diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/headers/Header.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/headers/Header.java index ebfc0a9bd3e..3e404937061 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/headers/Header.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/headers/Header.java @@ -44,6 +44,7 @@ public static final String OZONE_INFO_QUERY_VOLUME = "volume"; public static final String OZONE_INFO_QUERY_BUCKET = "bucket"; public static final String OZONE_INFO_QUERY_KEY = "key"; + public static final String OZONE_INFO_QUERY_KEY_DETAIL = "key-detail"; public static final String OZONE_REQUEST_ID = "x-ozone-request-id"; public static final String OZONE_SERVER_NAME = "x-ozone-server-name"; diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/response/KeyInfoDetails.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/response/KeyInfoDetails.java new file mode 100644 index 00000000000..98506f06a89 --- /dev/null +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/response/KeyInfoDetails.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.client.rest.response; + +import java.io.IOException; +import java.util.List; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; + +/** + * KeyInfoDetails class is used for parsing json response + * when KeyInfoDetails Call is made. + */ +public class KeyInfoDetails extends KeyInfo { + + private static final ObjectReader READER = + new ObjectMapper().readerFor(KeyInfoDetails.class); + + /** + * a list of Map which maps localID to ContainerID + * to specify replica locations. + */ + private List keyLocations; + + /** + * Constructor needed for json serialization. + */ + public KeyInfoDetails() { + } + + /** + * Set details of key location. + * + * @param locations - details of key location + */ + public void setKeyLocation(List locations) { + this.keyLocations = locations; + } + + /** + * Returns details of key location. + * + * @return volumeName + */ + public List getKeyLocations() { + return keyLocations; + } + + /** + * Parse a string to return KeyInfoDetails Object. + * + * @param jsonString Json String + * @return KeyInfoDetails + * @throws IOException + */ + public static KeyInfoDetails parse(String jsonString) throws IOException { + return READER.readValue(jsonString); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + KeyInfoDetails that = (KeyInfoDetails) o; + + return new EqualsBuilder() + .append(getVersion(), that.getVersion()) + .append(getKeyName(), that.getKeyName()) + .append(keyLocations, that.keyLocations) + .isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(21, 33) + .append(getVersion()) + .append(getKeyName()) + .append(keyLocations) + .toHashCode(); + } +} + diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/response/KeyLocation.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/response/KeyLocation.java new file mode 100644 index 00000000000..e5f46980ab1 --- /dev/null +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/client/rest/response/KeyLocation.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.client.rest.response; + +/** + * KeyLocation class is used used for parsing json response + * when KeyInfoDetails Call is made. + */ +public class KeyLocation { + /** + * Which container this key stored. + */ + private long containerID; + /** + * Which block this key stored inside a container. + */ + private long localID; + /** + * Data length of this key replica. + */ + private long length; + /** + * Offset of this key. + */ + private long offset; + + /** + * Empty constructor for Json serialization. + */ + public KeyLocation() { + + } + + /** + * Constructs KeyLocation. + */ + public KeyLocation(long containerID, long localID, + long length, long offset) { + this.containerID = containerID; + this.localID = localID; + this.length = length; + this.offset = offset; + } + + /** + * Returns the containerID of this Key. + */ + public long getContainerID() { + return containerID; + } + + /** + * Returns the localID of this Key. + */ + public long getLocalID() { + return localID; + } + + /** + * Returns the length of this Key. + */ + public long getLength() { + return length; + } + + /** + * Returns the offset of this Key. + */ + public long getOffset() { + return offset; + } + +} diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java index 6aabfef6b45..1bd258e7429 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketArgs.java @@ -17,13 +17,17 @@ */ package org.apache.hadoop.ozone.om.helpers; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import com.google.common.base.Preconditions; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; import org.apache.hadoop.ozone.OzoneAcl; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.audit.Auditable; import org.apache.hadoop.ozone.protocol.proto .OzoneManagerProtocolProtos.BucketArgs; import org.apache.hadoop.ozone.protocolPB.OMPBHelper; @@ -31,7 +35,7 @@ /** * A class that encapsulates Bucket Arguments. */ -public final class OmBucketArgs { +public final class OmBucketArgs implements Auditable { /** * Name of the volume in which the bucket belongs to. */ @@ -135,6 +139,25 @@ public static Builder newBuilder() { return new Builder(); } + @Override + public Map toAuditMap() { + Map auditMap = new LinkedHashMap<>(); + auditMap.put(OzoneConsts.VOLUME, this.volumeName); + auditMap.put(OzoneConsts.BUCKET, this.bucketName); + if(this.addAcls != null){ + auditMap.put(OzoneConsts.ADD_ACLS, this.addAcls.toString()); + } + if(this.removeAcls != null){ + auditMap.put(OzoneConsts.REMOVE_ACLS, this.removeAcls.toString()); + } + auditMap.put(OzoneConsts.IS_VERSION_ENABLED, + String.valueOf(this.isVersionEnabled)); + if(this.storageType != null){ + auditMap.put(OzoneConsts.STORAGE_TYPE, this.storageType.name()); + } + return auditMap; + } + /** * Builder for OmBucketArgs. */ diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketInfo.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketInfo.java index bf5abddc431..5199ce3b58f 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketInfo.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmBucketInfo.java @@ -21,18 +21,22 @@ import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; import org.apache.hadoop.ozone.OzoneAcl; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.audit.Auditable; import org.apache.hadoop.ozone.protocol.proto .OzoneManagerProtocolProtos.BucketInfo; import org.apache.hadoop.ozone.protocolPB.OMPBHelper; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; /** * A class that encapsulates Bucket Info. */ -public final class OmBucketInfo { +public final class OmBucketInfo implements Auditable { /** * Name of the volume in which the bucket belongs to. */ @@ -137,6 +141,21 @@ public static Builder newBuilder() { return new Builder(); } + @Override + public Map toAuditMap() { + Map auditMap = new LinkedHashMap<>(); + auditMap.put(OzoneConsts.VOLUME, this.volumeName); + auditMap.put(OzoneConsts.BUCKET, this.bucketName); + auditMap.put(OzoneConsts.ACLS, + (this.acls != null) ? this.acls.toString() : null); + auditMap.put(OzoneConsts.IS_VERSION_ENABLED, + String.valueOf(this.isVersionEnabled)); + auditMap.put(OzoneConsts.STORAGE_TYPE, + (this.storageType != null) ? this.storageType.name() : null); + auditMap.put(OzoneConsts.CREATION_TIME, String.valueOf(this.creationTime)); + return auditMap; + } + /** * Builder for OmBucketInfo. */ @@ -148,7 +167,7 @@ public static Builder newBuilder() { private StorageType storageType; private long creationTime; - Builder() { + public Builder() { //Default values this.acls = new LinkedList<>(); this.isVersionEnabled = false; diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyArgs.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyArgs.java index 1f8ed5fb1e7..e56ad7f161b 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyArgs.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyArgs.java @@ -16,29 +16,40 @@ * limitations under the License. */ package org.apache.hadoop.ozone.om.helpers; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.audit.Auditable; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; /** * Args for key. Client use this to specify key's attributes on key creation * (putKey()). */ -public final class OmKeyArgs { +public final class OmKeyArgs implements Auditable { private final String volumeName; private final String bucketName; private final String keyName; private long dataSize; private final ReplicationType type; private final ReplicationFactor factor; + private List locationInfoList; private OmKeyArgs(String volumeName, String bucketName, String keyName, - long dataSize, ReplicationType type, ReplicationFactor factor) { + long dataSize, ReplicationType type, ReplicationFactor factor, + List locationInfoList) { this.volumeName = volumeName; this.bucketName = bucketName; this.keyName = keyName; this.dataSize = dataSize; this.type = type; this.factor = factor; + this.locationInfoList = locationInfoList; } public ReplicationType getType() { @@ -69,6 +80,38 @@ public void setDataSize(long size) { dataSize = size; } + public void setLocationInfoList(List locationInfoList) { + this.locationInfoList = locationInfoList; + } + + public List getLocationInfoList() { + return locationInfoList; + } + + @Override + public Map toAuditMap() { + Map auditMap = new LinkedHashMap<>(); + auditMap.put(OzoneConsts.VOLUME, this.volumeName); + auditMap.put(OzoneConsts.BUCKET, this.bucketName); + auditMap.put(OzoneConsts.KEY, this.keyName); + auditMap.put(OzoneConsts.DATA_SIZE, String.valueOf(this.dataSize)); + auditMap.put(OzoneConsts.REPLICATION_TYPE, + (this.type != null) ? this.type.name() : null); + auditMap.put(OzoneConsts.REPLICATION_FACTOR, + (this.factor != null) ? this.factor.name() : null); + auditMap.put(OzoneConsts.KEY_LOCATION_INFO, + (this.locationInfoList != null) ? locationInfoList.toString() : null); + return auditMap; + } + + @VisibleForTesting + public void addLocationInfo(OmKeyLocationInfo locationInfo) { + if (this.locationInfoList == null) { + locationInfoList = new ArrayList<>(); + } + locationInfoList.add(locationInfo); + } + /** * Builder class of OmKeyArgs. */ @@ -79,7 +122,7 @@ public void setDataSize(long size) { private long dataSize; private ReplicationType type; private ReplicationFactor factor; - + private List locationInfoList; public Builder setVolumeName(String volume) { this.volumeName = volume; @@ -111,9 +154,14 @@ public Builder setFactor(ReplicationFactor replicationFactor) { return this; } + public Builder setLocationInfoList(List locationInfos) { + this.locationInfoList = locationInfos; + return this; + } + public OmKeyArgs build() { - return new OmKeyArgs(volumeName, bucketName, keyName, dataSize, - type, factor); + return new OmKeyArgs(volumeName, bucketName, keyName, dataSize, type, + factor, locationInfoList); } } } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyInfo.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyInfo.java index 05c8d45fbf9..50f4b17508a 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyInfo.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyInfo.java @@ -101,8 +101,7 @@ public void setDataSize(long size) { this.dataSize = size; } - public synchronized OmKeyLocationInfoGroup getLatestVersionLocations() - throws IOException { + public synchronized OmKeyLocationInfoGroup getLatestVersionLocations() { return keyLocationVersions.size() == 0? null : keyLocationVersions.get(keyLocationVersions.size() - 1); } @@ -115,6 +114,33 @@ public void updateModifcationTime() { this.modificationTime = Time.monotonicNow(); } + /** + * updates the length of the each block in the list given. + * This will be called when the key is being committed to OzoneManager. + * + * @param locationInfoList list of locationInfo + * @throws IOException + */ + public void updateLocationInfoList(List locationInfoList) { + long latestVersion = getLatestVersionLocations().getVersion(); + OmKeyLocationInfoGroup keyLocationInfoGroup = getLatestVersionLocations(); + List currentList = + keyLocationInfoGroup.getLocationList(); + List latestVersionList = + keyLocationInfoGroup.getBlocksLatestVersionOnly(); + // Updates the latest locationList in the latest version only with + // given locationInfoList here. + // TODO : The original allocated list and the updated list here may vary + // as the containers on the Datanode on which the blocks were pre allocated + // might get closed. The diff of blocks between these two lists here + // need to be garbage collected in case the ozone client dies. + currentList.removeAll(latestVersionList); + // set each of the locationInfo object to the latest version + locationInfoList.stream().forEach(omKeyLocationInfo -> omKeyLocationInfo + .setCreateVersion(latestVersion)); + currentList.addAll(locationInfoList); + } + /** * Append a set of blocks to the latest version. Note that these blocks are * part of the latest version, not a new version. diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyLocationInfo.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyLocationInfo.java index 3f6666df814..79b3c82b2dc 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyLocationInfo.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyLocationInfo.java @@ -27,7 +27,7 @@ private final BlockID blockID; private final boolean shouldCreateContainer; // the id of this subkey in all the subkeys. - private final long length; + private long length; private final long offset; // the version number indicating when this block was added private long createVersion; @@ -68,6 +68,10 @@ public long getLength() { return length; } + public void setLength(long length) { + this.length = length; + } + public long getOffset() { return offset; } @@ -126,4 +130,14 @@ public static OmKeyLocationInfo getFromProtobuf(KeyLocation keyLocation) { info.setCreateVersion(keyLocation.getCreateVersion()); return info; } + + @Override + public String toString() { + return "{blockID={containerID=" + blockID.getContainerID() + + ", localID=" + blockID.getLocalID() + "}" + + ", shouldCreateContainer=" + shouldCreateContainer + + ", length=" + length + + ", offset=" + offset + + ", createVersion=" + createVersion + '}'; + } } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmVolumeArgs.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmVolumeArgs.java index c8b59b682db..165d9aba783 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmVolumeArgs.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmVolumeArgs.java @@ -18,6 +18,8 @@ package org.apache.hadoop.ozone.om.helpers; import com.google.common.base.Preconditions; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.audit.Auditable; import org.apache.hadoop.ozone.protocol.proto .OzoneManagerProtocolProtos.OzoneAclInfo; import org.apache.hadoop.ozone.protocol.proto @@ -26,6 +28,7 @@ import java.io.IOException; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -35,7 +38,7 @@ /** * A class that encapsulates the OmVolumeArgs Args. */ -public final class OmVolumeArgs { +public final class OmVolumeArgs implements Auditable{ private final String adminName; private final String ownerName; private final String volume; @@ -122,6 +125,17 @@ public static Builder newBuilder() { return new Builder(); } + @Override + public Map toAuditMap() { + Map auditMap = new LinkedHashMap<>(); + auditMap.put(OzoneConsts.ADMIN, this.adminName); + auditMap.put(OzoneConsts.OWNER, this.ownerName); + auditMap.put(OzoneConsts.VOLUME, this.volume); + auditMap.put(OzoneConsts.CREATION_TIME, String.valueOf(this.creationTime)); + auditMap.put(OzoneConsts.QUOTA_IN_BYTES, String.valueOf(this.quotaInBytes)); + return auditMap; + } + /** * Builder for OmVolumeArgs. */ @@ -137,7 +151,7 @@ public static Builder newBuilder() { /** * Constructs a builder. */ - Builder() { + public Builder() { keyValueMap = new HashMap<>(); aclMap = new OmOzoneAclMap(); } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OpenKeySession.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OpenKeySession.java index bc364e665ff..11ee622494d 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OpenKeySession.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OpenKeySession.java @@ -23,14 +23,14 @@ * that servers can recognize this client, and thus know how to close the key. */ public class OpenKeySession { - private final int id; + private final long id; private final OmKeyInfo keyInfo; // the version of the key when it is being opened in this session. // a block that has a create version equals to open version means it will // be committed only when this open session is closed. private long openVersion; - public OpenKeySession(int id, OmKeyInfo info, long version) { + public OpenKeySession(long id, OmKeyInfo info, long version) { this.id = id; this.keyInfo = info; this.openVersion = version; @@ -44,7 +44,7 @@ public OmKeyInfo getKeyInfo() { return keyInfo; } - public int getId() { + public long getId() { return id; } } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java index b7a099d0286..edb260a1084 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java @@ -148,7 +148,7 @@ OmBucketInfo getBucketInfo(String volumeName, String bucketName) * @param clientID the client identification * @throws IOException */ - void commitKey(OmKeyArgs args, int clientID) throws IOException; + void commitKey(OmKeyArgs args, long clientID) throws IOException; /** * Allocate a new block, it is assumed that the client is having an open key @@ -159,7 +159,7 @@ OmBucketInfo getBucketInfo(String volumeName, String bucketName) * @return an allocated block * @throws IOException */ - OmKeyLocationInfo allocateBlock(OmKeyArgs args, int clientID) + OmKeyLocationInfo allocateBlock(OmKeyArgs args, long clientID) throws IOException; /** @@ -172,9 +172,10 @@ OmKeyLocationInfo allocateBlock(OmKeyArgs args, int clientID) OmKeyInfo lookupKey(OmKeyArgs args) throws IOException; /** - * Rename an existing key within a bucket + * Rename an existing key within a bucket. * @param args the args of the key. * @param toKeyName New name to be used for the Key + * @throws IOException */ void renameKey(OmKeyArgs args, String toKeyName) throws IOException; @@ -214,7 +215,7 @@ OmKeyLocationInfo allocateBlock(OmKeyArgs args, int clientID) * @throws IOException */ List listBuckets(String volumeName, - String startBucketName, String bucketPrefix, int maxNumOfBuckets) + String startBucketName, String bucketPrefix, int maxNumOfBuckets) throws IOException; /** @@ -239,7 +240,7 @@ OmKeyLocationInfo allocateBlock(OmKeyArgs args, int clientID) * @throws IOException */ List listKeys(String volumeName, - String bucketName, String startKeyName, String keyPrefix, int maxKeys) + String bucketName, String startKeyName, String keyPrefix, int maxKeys) throws IOException; /** diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java index 37151fb659e..c0829fabb98 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.ozone.om.protocolPB; +import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.protobuf.RpcController; @@ -487,7 +488,7 @@ public void setBucketProperty(OmBucketArgs args) */ @Override public List listBuckets(String volumeName, - String startKey, String prefix, int count) throws IOException { + String startKey, String prefix, int count) throws IOException { List buckets = new ArrayList<>(); ListBucketsRequest.Builder reqBuilder = ListBucketsRequest.newBuilder(); reqBuilder.setVolumeName(volumeName); @@ -553,7 +554,7 @@ public OpenKeySession openKey(OmKeyArgs args) throws IOException { } @Override - public OmKeyLocationInfo allocateBlock(OmKeyArgs args, int clientID) + public OmKeyLocationInfo allocateBlock(OmKeyArgs args, long clientID) throws IOException { AllocateBlockRequest.Builder req = AllocateBlockRequest.newBuilder(); KeyArgs keyArgs = KeyArgs.newBuilder() @@ -578,14 +579,19 @@ public OmKeyLocationInfo allocateBlock(OmKeyArgs args, int clientID) } @Override - public void commitKey(OmKeyArgs args, int clientID) + public void commitKey(OmKeyArgs args, long clientID) throws IOException { CommitKeyRequest.Builder req = CommitKeyRequest.newBuilder(); + List locationInfoList = args.getLocationInfoList(); + Preconditions.checkNotNull(locationInfoList); KeyArgs keyArgs = KeyArgs.newBuilder() .setVolumeName(args.getVolumeName()) .setBucketName(args.getBucketName()) .setKeyName(args.getKeyName()) - .setDataSize(args.getDataSize()).build(); + .setDataSize(args.getDataSize()) + .addAllKeyLocations( + locationInfoList.stream().map(OmKeyLocationInfo::getProtobuf) + .collect(Collectors.toList())).build(); req.setKeyArgs(keyArgs); req.setClientID(clientID); @@ -702,7 +708,7 @@ public void deleteBucket(String volume, String bucket) throws IOException { */ @Override public List listKeys(String volumeName, String bucketName, - String startKey, String prefix, int maxKeys) throws IOException { + String startKey, String prefix, int maxKeys) throws IOException { List keys = new ArrayList<>(); ListKeysRequest.Builder reqBuilder = ListKeysRequest.newBuilder(); reqBuilder.setVolumeName(volumeName); diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/util/OzoneVersionInfo.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/util/OzoneVersionInfo.java new file mode 100644 index 00000000000..d4767482084 --- /dev/null +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/util/OzoneVersionInfo.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.util; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.ClassUtil; +import org.apache.hadoop.util.ThreadUtil; +import org.apache.hadoop.utils.HddsVersionInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; + +/** + * This class returns build information about Hadoop components. + */ +@InterfaceAudience.Public +@InterfaceStability.Stable +public class OzoneVersionInfo { + private static final Logger LOG = LoggerFactory.getLogger(OzoneVersionInfo.class); + + private Properties info; + + protected OzoneVersionInfo(String component) { + info = new Properties(); + String versionInfoFile = component + "-version-info.properties"; + InputStream is = null; + try { + is = ThreadUtil.getResourceAsStream(OzoneVersionInfo.class.getClassLoader(), + versionInfoFile); + info.load(is); + } catch (IOException ex) { + LoggerFactory.getLogger(getClass()).warn("Could not read '" + + versionInfoFile + "', " + ex.toString(), ex); + } finally { + IOUtils.closeStream(is); + } + } + + protected String _getVersion() { + return info.getProperty("version", "Unknown"); + } + + protected String _getRelease() { + return info.getProperty("release", "Unknown"); + } + + protected String _getRevision() { + return info.getProperty("revision", "Unknown"); + } + + protected String _getBranch() { + return info.getProperty("branch", "Unknown"); + } + + protected String _getDate() { + return info.getProperty("date", "Unknown"); + } + + protected String _getUser() { + return info.getProperty("user", "Unknown"); + } + + protected String _getUrl() { + return info.getProperty("url", "Unknown"); + } + + protected String _getSrcChecksum() { + return info.getProperty("srcChecksum", "Unknown"); + } + + protected String _getBuildVersion(){ + return _getVersion() + + " from " + _getRevision() + + " by " + _getUser() + + " source checksum " + _getSrcChecksum(); + } + + protected String _getProtocVersion() { + return info.getProperty("protocVersion", "Unknown"); + } + + private static OzoneVersionInfo OZONE_VERSION_INFO = new OzoneVersionInfo("ozone"); + /** + * Get the Ozone version. + * @return the Ozone version string, eg. "0.6.3-dev" + */ + public static String getVersion() { + return OZONE_VERSION_INFO._getVersion(); + } + + /** + * Get the Ozone release name. + * @return the Ozone release string, eg. "Acadia" + */ + public static String getRelease() { + return OZONE_VERSION_INFO._getRelease(); + } + + /** + * Get the Git commit hash of the repository when compiled. + * @return the commit hash, eg. "18f64065d5db6208daf50b02c1b5ed4ee3ce547a" + */ + public static String getRevision() { + return OZONE_VERSION_INFO._getRevision(); + } + + /** + * Get the branch on which this originated. + * @return The branch name, e.g. "trunk" or "branches/branch-0.20" + */ + public static String getBranch() { + return OZONE_VERSION_INFO._getBranch(); + } + + /** + * The date that Ozone was compiled. + * @return the compilation date in unix date format + */ + public static String getDate() { + return OZONE_VERSION_INFO._getDate(); + } + + /** + * The user that compiled Ozone. + * @return the username of the user + */ + public static String getUser() { + return OZONE_VERSION_INFO._getUser(); + } + + /** + * Get the URL for the Ozone repository. + * @return the URL of the Ozone repository + */ + public static String getUrl() { + return OZONE_VERSION_INFO._getUrl(); + } + + /** + * Get the checksum of the source files from which Ozone was built. + * @return the checksum of the source files + */ + public static String getSrcChecksum() { + return OZONE_VERSION_INFO._getSrcChecksum(); + } + + /** + * Returns the buildVersion which includes version, + * revision, user and date. + * @return the buildVersion + */ + public static String getBuildVersion(){ + return OZONE_VERSION_INFO._getBuildVersion(); + } + + /** + * Returns the protoc version used for the build. + * @return the protoc version + */ + public static String getProtocVersion(){ + return OZONE_VERSION_INFO._getProtocVersion(); + } + + public static void main(String[] args) { + System.out.println( + " ////////////// \n" + + " //////////////////// \n" + + " //////// //////////////// \n" + + " ////// //////////////// \n" + + " ///// //////////////// / \n" + + " ///// //////// /// \n" + + " //// //////// ///// \n" + + " ///// //////////////// \n" + + " ///// //////////////// // \n" + + " //// /////////////// ///// \n" + + " ///// /////////////// //// \n" + + " ///// ////// ///// \n" + + " ////// ////// ///// \n" + + " /////////// //////// \n" + + " ////// //////////// \n" + + " /// ////////// \n" + + " / "+ getVersion() + "("+ getRelease() +")\n"); + System.out.println("Source code repository " + getUrl() + " -r " + + getRevision()); + System.out.println("Compiled by " + getUser() + " on " + getDate()); + System.out.println("Compiled with protoc " + getProtocVersion()); + System.out.println("From source with checksum " + getSrcChecksum() + "\n"); + LOG.debug("This command was run using " + + ClassUtil.findContainingJar(OzoneVersionInfo.class)); + HddsVersionInfo.main(args); + } +} diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/response/KeyInfoDetails.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/response/KeyInfoDetails.java new file mode 100644 index 00000000000..7f2ba098d79 --- /dev/null +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/response/KeyInfoDetails.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.web.response; + +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; + +import java.util.List; + +/** + * Represents an Ozone key Object with detail information of location. + */ +public class KeyInfoDetails extends KeyInfo { + /** + * a list of Map which maps localID to ContainerID + * to specify replica locations. + */ + private List keyLocations; + + /** + * Set details of key location. + * + * @param keyLocations - details of key location + */ + public void setKeyLocations(List keyLocations) { + this.keyLocations = keyLocations; + } + + /** + * Returns details of key location. + * + * @return volumeName + */ + public List getKeyLocations() { + return keyLocations; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + KeyInfoDetails that = (KeyInfoDetails) o; + + return new EqualsBuilder() + .append(getVersion(), that.getVersion()) + .append(getKeyName(), that.getKeyName()) + .append(keyLocations, that.getKeyLocations()) + .isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(17, 37) + .append(getVersion()) + .append(getKeyName()) + .append(keyLocations) + .toHashCode(); + } +} diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/response/KeyLocation.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/response/KeyLocation.java new file mode 100644 index 00000000000..d03eff74753 --- /dev/null +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/response/KeyLocation.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.web.response; + +/** + * KeyLocation class is used used for parsing json response + * when KeyInfoDetails Call is made. + */ +public class KeyLocation { + /** + * Which container this key stored. + */ + private final long containerID; + /** + * Which block this key stored inside a container. + */ + private final long localID; + /** + * Data length of this key replica. + */ + private final long length; + /** + * Offset of this key. + */ + private final long offset; + + /** + * Constructs KeyLocation. + */ + public KeyLocation(long containerID, long localID, + long length, long offset) { + this.containerID = containerID; + this.localID = localID; + this.length = length; + this.offset = offset; + } + + /** + * Returns the containerID of this Key. + */ + public long getContainerID() { + return containerID; + } + + /** + * Returns the localID of this Key. + */ + public long getLocalID() { + return localID; + } + + /** + * Returns the length of this Key. + */ + public long getLength() { + return length; + } + + /** + * Returns the offset of this Key. + */ + public long getOffset() { + return offset; + } + +} diff --git a/hadoop-ozone/common/src/main/proto/OzoneManagerProtocol.proto b/hadoop-ozone/common/src/main/proto/OzoneManagerProtocol.proto index 36b1c83efb1..975c790f784 100644 --- a/hadoop-ozone/common/src/main/proto/OzoneManagerProtocol.proto +++ b/hadoop-ozone/common/src/main/proto/OzoneManagerProtocol.proto @@ -234,6 +234,7 @@ message KeyArgs { optional uint64 dataSize = 4; optional hadoop.hdds.ReplicationType type = 5; optional hadoop.hdds.ReplicationFactor factor = 6; + repeated KeyLocation keyLocations = 7; } message KeyLocation { @@ -272,7 +273,7 @@ message LocateKeyResponse { optional KeyInfo keyInfo = 2; // clients' followup request may carry this ID for stateful operations (similar // to a cookie). - optional uint32 ID = 3; + optional uint64 ID = 3; // TODO : allow specifiying a particular version to read. optional uint64 openVersion = 4; } @@ -318,17 +319,17 @@ message ListKeysResponse { message AllocateBlockRequest { required KeyArgs keyArgs = 1; - required uint32 clientID = 2; + required uint64 clientID = 2; } message AllocateBlockResponse { required Status status = 1; - required KeyLocation keyLocation = 2; + optional KeyLocation keyLocation = 2; } message CommitKeyRequest { required KeyArgs keyArgs = 1; - required uint32 clientID = 2; + required uint64 clientID = 2; } message CommitKeyResponse { diff --git a/hadoop-ozone/common/src/main/resources/ozone-version-info.properties b/hadoop-ozone/common/src/main/resources/ozone-version-info.properties new file mode 100644 index 00000000000..599f14d5eca --- /dev/null +++ b/hadoop-ozone/common/src/main/resources/ozone-version-info.properties @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +version=${declared.ozone.version} +release=${ozone.release} +revision=${version-info.scm.commit} +branch=${version-info.scm.branch} +user=${user.name} +date=${version-info.build.time} +url=${version-info.scm.uri} +srcChecksum=${version-info.source.md5} +protocVersion=${protobuf.version} diff --git a/hadoop-ozone/common/src/main/shellprofile.d/hadoop-ozone.sh b/hadoop-ozone/common/src/main/shellprofile.d/hadoop-ozone.sh index 2cd2bb317d2..99885abe226 100644 --- a/hadoop-ozone/common/src/main/shellprofile.d/hadoop-ozone.sh +++ b/hadoop-ozone/common/src/main/shellprofile.d/hadoop-ozone.sh @@ -40,5 +40,6 @@ function _ozone_hadoop_classpath hadoop_add_classpath "${HADOOP_HDFS_HOME}/${HDDS_DIR}"'/*' hadoop_add_classpath "${HADOOP_HDFS_HOME}/${OZONE_LIB_JARS_DIR}"'/*' hadoop_add_classpath "${HADOOP_HDFS_HOME}/${OZONE_DIR}"'/*' + hadoop_add_classpath "${HADOOP_HDFS_HOME}/${OZONEFS_DIR}"'/*' } diff --git a/hadoop-ozone/docs/README.md b/hadoop-ozone/docs/README.md index 426789fdbae..85817a79d06 100644 --- a/hadoop-ozone/docs/README.md +++ b/hadoop-ozone/docs/README.md @@ -1,15 +1,18 @@ # Hadoop Ozone/HDDS docs diff --git a/hadoop-ozone/docs/archetypes/default.md b/hadoop-ozone/docs/archetypes/default.md index e67e68a7ef6..f4cc9998dc6 100644 --- a/hadoop-ozone/docs/archetypes/default.md +++ b/hadoop-ozone/docs/archetypes/default.md @@ -3,15 +3,18 @@ title: "{{ replace .Name "-" " " | title }}" menu: main --- diff --git a/hadoop-ozone/acceptance-test/dev-support/bin/robot-all.sh b/hadoop-ozone/docs/config.yaml old mode 100755 new mode 100644 similarity index 62% rename from hadoop-ozone/acceptance-test/dev-support/bin/robot-all.sh rename to hadoop-ozone/docs/config.yaml index ee9c6b80a52..e86b59970cd --- a/hadoop-ozone/acceptance-test/dev-support/bin/robot-all.sh +++ b/hadoop-ozone/docs/config.yaml @@ -1,4 +1,3 @@ -#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -14,5 +13,29 @@ # See the License for the specific language governing permissions and # limitations under the License. -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -$DIR/robot.sh $DIR/../../src/test/acceptance + +languageCode: "en-us" +DefaultContentLanguage: "en" +title: "Ozone" +theme: "ozonedoc" +pygmentsCodeFences: true +uglyurls: true +relativeURLs: true + +menu: + main: + - identifier: Starting + name: "Getting Started" + title: "Getting Started" + url: runningviadocker.html + weight: 1 + - identifier: Client + name: Client + title: Client + url: commandshell.html + weight: 2 + - identifier: Tools + name: Tools + title: Tools + url: dozone.html + weight: 3 diff --git a/hadoop-ozone/docs/content/BucketCommands.md b/hadoop-ozone/docs/content/BucketCommands.md new file mode 100644 index 00000000000..3ab35053028 --- /dev/null +++ b/hadoop-ozone/docs/content/BucketCommands.md @@ -0,0 +1,122 @@ +--- +title: Bucket Commands +menu: + main: + parent: Client + weight: 3 +--- + + +Ozone shell supports the following bucket commands. + + * [create](#create) + * [delete](#delete) + * [info](#info) + * [list](#list) + * [update](#update) + +### Create + +The bucket create command allows a user to create a bucket. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the bucket in **/volume/bucket** format. + +{{< highlight bash >}} +ozone sh bucket create /hive/jan +{{< /highlight >}} + +The above command will create a bucket called _jan_ in the _hive_ volume. +Since no scheme was specified this command defaults to O3 (RPC) protocol. + +### Delete + +The bucket delete commands allows an user to delete a volume. If the +bucket is not empty then this command will fail. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the bucket + +{{< highlight bash >}} +ozone sh volume delete /hive/jan +{{< /highlight >}} + +The above command will delete _jan_ bucket if it is empty. + +### Info + +The bucket info commands returns the information about the bucket. +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the bucket. + +{{< highlight bash >}} +ozone sh bucket info /hive/jan +{{< /highlight >}} + +The above command will print out the information about _jan_ bucket. + +### List + +The bucket list commands allows uset to list the buckets in a volume. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| -l, --length | Maximum number of results to return. Default: 100 +| -p, --prefix | Optional, Only buckets that match this prefix will be returned. +| -s, --start | The listing will start from key after the start key. +| Uri | The name of the _volume_. + +{{< highlight bash >}} +ozone sh bucket list /hive +{{< /highlight >}} + +This command will list all buckets on the volume _hive_. + + + +### Update + +The bucket update command allows changing access permissions on bucket. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| --addAcl | Optional, Comma separated ACLs that will added to bucket. +| --removeAcl | Optional, Comma separated list of acl to remove. +| Uri | The name of the bucket. + +{{< highlight bash >}} +ozone sh bucket update --addAcl=user:bilbo:rw /hive/jan +{{< /highlight >}} + +The above command gives user bilbo read/write permission to the bucket. + +You can try out these commands from the docker instance of the [Alpha +Cluster](runningviadocker.html). diff --git a/hadoop-ozone/docs/content/BuildingSources.md b/hadoop-ozone/docs/content/BuildingSources.md new file mode 100644 index 00000000000..1953f47f59b --- /dev/null +++ b/hadoop-ozone/docs/content/BuildingSources.md @@ -0,0 +1,54 @@ +--- +title: Building from Sources +weight: 1 +menu: + main: + parent: Starting + weight: 5 +--- + + +***This is a guide on how to build the ozone sources. If you are not +planning to build sources yourself, you can safely skip this page.*** + +If you are a Hadoop ninja, and wise in the ways of Apache, you already know +that a real Apache release is a source release. + +If you want to build from sources, Please untar the source tarball and run +the ozone build command. This instruction assumes that you have all the +dependencies to build Hadoop on your build machine. If you need instructions +on how to build Hadoop, please look at the Apache Hadoop Website. + +{{< highlight bash >}} +mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true -Phdds -Pdist -Dtar -DskipShade +{{< /highlight >}} + + +This will build an ozone-\.tar.gz in your target directory. + +You can copy this tarball and use this instead of binary artifacts that are +provided along with the official release. + +## How to test the build +You can run the acceptance tests in the hadoop-ozone directory to make sure +that your build is functional. To launch the acceptance tests, please follow + the instructions in the **README.md** in the + ```$hadoop_src/hadoop-ozone/acceptance-test``` directory. Acceptance tests + will start a small ozone cluster and verify that ozone shell and ozone file + system is fully functional. diff --git a/hadoop-ozone/docs/content/CommandShell.md b/hadoop-ozone/docs/content/CommandShell.md index 95820e99be1..74072a5218d 100644 --- a/hadoop-ozone/docs/content/CommandShell.md +++ b/hadoop-ozone/docs/content/CommandShell.md @@ -1,158 +1,109 @@ --- -title: Command Shell -menu: main +title: Ozone CLI +menu: + main: + parent: Client + weight: 1 --- -# Ozone Command Shell - - -Ozone command shell gives a command shell interface to work against Ozone. -Please note that this document assumes that cluster is deployed -with simple authentication. - -The Ozone commands take the following format. -``` -ozone oz --command_ /volume/bucket/key -user [-root] -``` - -The `port` specified in command should match the port mentioned in the config -property `hdds.rest.http-address`. This property can be set in `ozone-site.xml`. -The default value for the port is `9880` and is used in below commands. - -The `-root` option is a command line short cut that allows *ozone oz* -commands to be run as the user that started the cluster. This is useful to -indicate that you want the commands to be run as some admin user. The only -reason for this option is that it makes the life of a lazy developer more -easier. - -## Volume Commands - - -The volume commands allow users to create, delete and list the volumes in the -ozone cluster. - -### Create Volume -Volumes can be created only by administrators. Here is an example of creating a volume. -``` -ozone oz -createVolume hive -user bilbo -quota 100TB -root -``` -The above command creates a volume called `hive` owned by user `bilbo`. The -`-root` option allows the command to be executed as user `hdfs` which is an -admin in the cluster. - -### Update Volume -Updates information like ownership and quota on an existing volume. -``` -ozone oz -updateVolume hive -quota 500TB -root -``` - -The above command changes the volume quota of hive from 100TB to 500TB. - -### Delete Volume -Deletes a Volume if it is empty. -``` -ozone oz -deleteVolume /hive -root -``` - -### Info Volume -Info volume command allows the owner or the administrator of the cluster -to read meta-data about a specific volume. -``` -ozone oz -infoVolume /hive -root -``` - -### List Volumes -List volume command can be used by administrator to list volumes of any -user. It can also be used by any user to list their own volumes. -``` -ozone oz -listVolume / -user bilbo -``` - -The above command lists all volumes owned by user bilbo. - -## Bucket Commands - -Bucket commands follow a similar pattern as volume commands. However bucket -commands are designed to be run by the owner of the volume. -Following examples assume that these commands are run by the owner of the -volume or bucket. - -### Create Bucket -Create bucket call allows the owner of a volume to create a bucket. -``` -ozone oz -createBucket /hive/january -``` - -This call creates a bucket called `january` in the volume called `hive`. If -the volume does not exist, then this call will fail. - -### Update Bucket -Updates bucket meta-data, like ACLs. -``` -ozone oz -updateBucket /hive/january -addAcl user:spark:rw -``` -### Delete Bucket -Deletes a bucket if it is empty. -``` -ozone oz -deleteBucket /hive/january -``` - -### Info Bucket -Returns information about a given bucket. -``` -ozone oz -infoBucket /hive/january -``` - -### List Buckets -List buckets in a given volume. -``` -ozone oz -listBucket /hive -``` - -## Ozone Key Commands - -Ozone key commands allows users to put, delete and get keys from Ozone buckets. - -### Put Key -Creates or overwrites a key in Ozone store, -file points to the file you want -to upload. -``` -ozone oz -putKey /hive/january/processed.orc -file processed.orc -``` - -### Get Key -Downloads a file from the Ozone bucket. -``` -ozone oz -getKey /hive/january/processed.orc -file processed.orc.copy -``` - -### Delete Key -Deletes a key from the Ozone store. -``` -ozone oz -deleteKey /hive/january/processed.orc -``` - -### Info Key -Reads key metadata from the Ozone store. -``` -ozone oz -infoKey /hive/january/processed.orc -``` - -### List Keys -List all keys in an Ozone bucket. -``` -ozone oz -listKey /hive/january -``` + +Ozone has a set of command line tools that can be used to manage ozone. + +All these commands are invoked via the ```ozone``` script. + +The commands supported by ozone are: + + * **classpath** - Prints the class path needed to get the hadoop jar and the + required libraries. + * **fs** - Runs a command on ozone file system. + * **datanode** - Via daemon command, the HDDS data nodes can be started or + stopped. + * **envvars** - Display computed Hadoop environment variables. + * **freon** - Runs the ozone load generator. + * **genesis** - Developer Only, Ozone micro-benchmark application. + * **getozoneconf** - Reads ozone config values from configuration. + * **jmxget** - Get JMX exported values from NameNode or DataNode. + * **om** - Ozone Manager, via daemon command can be started or stopped. + * **sh** - Primary command line interface for ozone. + * **scm** - Storage Container Manager service, via daemon can be + stated or stopped. + * **scmcli** - Developer only, Command Line Interface for the Storage + Container Manager. + * **version** - Prints the version of Ozone and HDDS. + * **genconf** - Generate minimally required ozone configs and output to + ozone-site.xml. + +## Understanding Ozone command shell +The most used command when working with Ozone is the Ozone command shell. +Ozone command shell gives a command shell interface to work against +Ozone. + +The Ozone shell commands take the following format. + +> _ozone sh object action url_ + +**ozone** script is used to invoke all Ozone sub-commands. The ozone shell is +invoked via ```sh``` command. + +The object can be a volume, bucket or a key. The action is various verbs like + create, list, delete etc. + + +Ozone URL can point to a volume, bucket or keys in the following format: + +_\[scheme\]\[server:port\]/volume/bucket/key_ + + +Where, + +1. Scheme - Can be one of the following + * o3 - Ozone's native RPC protocol. If you specify this scheme, the + native RPC protocol is used while communicating with Ozone Manager and + data nodes. + * http/https - If an HTTP protocol is specified, then Ozone shell assumes + that you are interested in using the Ozone Rest protocol and falls back + to using the REST protocol instead of RPC. + If no protocol is specified, the Ozone shell defaults to the native RPC + protocol. + +2. Server:Port - This is the address of the Ozone Manager. This can be server + only, in that case, the default port is used. If this value is omitted +then the defaults specified in the ozone-site.xml will be used for Ozone +Manager address. + +Depending on the call, the volume/bucket/key names will be part of the URL. +Please see volume commands, bucket commands, and key commands section for more +detail. + +## Invoking help + +Ozone shell help can be invoked at _object_ level or at _action_ level. +For example: + +{{< highlight bash >}} +ozone sh volume --help +{{< /highlight >}} + +This will show all possible actions for volumes. + +or it can be invoked to explain a specific action like +{{< highlight bash >}} +ozone sh volume create --help +{{< /highlight >}} +This command will give you command line options of the create command. diff --git a/hadoop-ozone/docs/content/Concepts.md b/hadoop-ozone/docs/content/Concepts.md new file mode 100644 index 00000000000..7f7dd3b4899 --- /dev/null +++ b/hadoop-ozone/docs/content/Concepts.md @@ -0,0 +1,108 @@ +--- +title: Architecture +date: "2017-10-10" +menu: main +--- + + + +Ozone is a redundant, distributed object store build by +leveraging primitives present in HDFS. The primary design point of ozone is scalability, and it aims to scale to billions of objects. + +Ozone consists of volumes, buckets, and keys. A volume is similar to a home directory in the ozone world. Only an administrator can create it. Volumes are used to store buckets. Once a volume is created users can create as many buckets as needed. Ozone stores data as keys which live inside these buckets. + +Ozone namespace is composed of many storage volumes. Storage volumes are also used as the basis for storage accounting. + +To access a key, an Ozone URL has the following format: + +``` +http://servername:port/volume/bucket/key +``` + +Where the server name is the name of a data node, the port is the data node HTTP port. The volume represents the name of the ozone volume; bucket is an ozone bucket created by the user and key represents the file. + +Please look at the [command line interface]({{< ref "CommandShell.md#shell" >}}) for more info. + +Ozone supports both REST and RPC protocols. Clients can choose either of these protocols to communicate with Ozone. Please see the [client documentation]({{< ref "JavaApi.md" >}}) for more details. + +Ozone separates namespace management and block space management; this helps +ozone to scale much better. The namespace is managed by a daemon called +[Ozone Manager ]({{< ref "OzoneManager.md" >}}) (OM), and block space is +managed by [Storage Container Manager] ({{< ref "Hdds.md" >}}) (SCM). + +The data nodes provide replication and ability to store blocks; these blocks are stored in groups to reduce the metadata pressure on SCM. This groups of blocks are called storage containers. Hence the block manager is called storage container +manager. + +Ozone Overview +-------------- + +
The following diagram is a high-level overview of the core components of Ozone.

 + +![Architecture diagram](../../OzoneOverview.svg) + +The main elements of Ozone are
: + +### Ozone Manager
 + +[Ozone Manager]({{< ref "OzoneManager.md" >}}) (OM) takes care of the Ozone's namespace. +All ozone objects like volumes, buckets, and keys are managed by OM. In Short, OM is the metadata manager for Ozone. +OM talks to blockManager(SCM) to get blocks and passes it on to the Ozone +client. Ozone client writes data to these blocks. +OM will eventually be replicated via Apache Ratis for High Availability.
 + +### Storage Container Manager + +[Storage Container Manager]({{< ref "Hdds.md" >}}) (SCM) is the block and cluster manager for Ozone. +SCM along with data nodes offer a service called 'storage containers'. +A storage container is a group unrelated of blocks that are managed together as a single entity. + +SCM offers the following abstractions.

 + +![SCM Abstractions](../../SCMBlockDiagram.png) + +### Blocks +Blocks are similar to blocks in HDFS. They are replicated store of data. Client writes data to blocks. + +### Containers +A collection of blocks replicated and managed together. + +### Pipelines +SCM allows each storage container to choose its method of replication. +For example, a storage container might decide that it needs only one copy of a block +and might choose a stand-alone pipeline. Another storage container might want to have a very high level of reliability and pick a RATIS based pipeline. In other words, SCM allows different kinds of replication strategies to co-exist. The client while writing data, chooses a storage container with required properties. + +### Pools +A group of data nodes is called a pool. For scaling purposes, +we define a pool as a set of machines. This makes management of data nodes easier. + +### Nodes +The data node where data is stored. SCM monitors these nodes via heartbeat. + +### Clients +Ozone ships with a set of clients. Ozone [CLI]({{< ref "CommandShell.md#shell" >}}) is the command line interface like 'hdfs' command.
 [Freon] ({{< ref "Freon.md" >}}) is a load generation tool for Ozone.
 + +### REST Handler +Ozone provides an RPC (Remote Procedure Call) as well as a REST (Representational State Transfer) interface. This allows clients to be written in many languages quickly. Ozone strives to maintain an API compatibility between REST and RPC. +For most purposes, a client can make one line change to switch from REST to RPC or vice versa. 
 + +### Ozone File System +Ozone file system (TODO: Add documentation) is a Hadoop compatible file system. This allows Hadoop services and applications like Hive and Spark to run against +Ozone without any change. + +### Ozone Client +This is similar to DFSClient in HDFS. This is the standard client to talk to Ozone. All other components that we have discussed so far rely on Ozone client. Ozone client supports both RPC and REST protocols. diff --git a/hadoop-ozone/docs/content/Dozone.md b/hadoop-ozone/docs/content/Dozone.md new file mode 100644 index 00000000000..7906cf3fd4e --- /dev/null +++ b/hadoop-ozone/docs/content/Dozone.md @@ -0,0 +1,110 @@ +--- +title: "Dozone & Dev Tools" +date: 2017-08-10 +menu: + main: + parent: Tools +--- + + + +Dozone stands for docker for ozone. Ozone supports docker to make it easy to develop and test ozone. Starting a docker based ozone container is simple. + +In the `compose/ozone` directory there are two files that define the docker and ozone settings. + +Developers can + +{{< highlight bash >}} +cd compose/ozone +{{< /highlight >}} + +and simply run + +{{< highlight bash >}} +docker-compose up -d +{{< /highlight >}} + +to run a ozone cluster on docker. + +This command will launch a Namenode, OM, SCM and a data node. + +To access the OM UI, one can run 'http://localhost:9874'. + +_Please note_: dozone does not map the data node ports to the 9864. Instead, it maps to the ephemeral port range. So many examples in the command shell will not work if you run those commands from the host machine. To find out where the data node port is listening, you can run the `docker ps` command or always ssh into a container before running ozone commands. + +To shutdown a running docker based ozone cluster, please run + +{{< highlight bash >}} +docker-compose down +{{< /highlight >}} + + +Adding more config settings +--------------------------- +The file called `docker-config` contains all ozone specific config settings. This file is processed to create the ozone-site.xml. + +Useful Docker & Ozone Commands +------------------------------ + +If you make any modifications to ozone, the simplest way to test it is to run freon and unit tests. + +Here are the instructions to run corona in a docker based cluster. + +{{< highlight bash >}} +docker-compose exec datanode bash +{{< /highlight >}} + +This will open a bash shell on the data node container. +Now we can execute corona for load generation. + +{{< highlight bash >}} +ozone freon randomkeys --numOfVolumes=10 --numOfBuckets 10 --numOfKeys 10 +{{< /highlight >}} + +Here is a set helpful commands while working with docker for ozone. +To check the status of the components: + +{{< highlight bash >}} +docker-compose ps +{{< /highlight >}} + +To get logs from a specific node/service: + +{{< highlight bash >}} +docker-compose logs scm +{{< /highlight >}} + + +As the WebUI ports are forwarded to the external machine, you can check the web UI: + +* For the Storage Container Manager: http://localhost:9876 +* For the Ozone Managerr: http://localhost:9874 +* For the Datanode: check the port with docker ps (as there could be multiple data node ports are mapped to the ephemeral port range) +* For the Namenode: http://localhost:9870 + +You can start multiple data nodes with: + +{{< highlight bash >}} +docker-compose scale datanode=3 +{{< /highlight >}} + +You can test the commands from the [Ozone CLI]({{< ref "CommandShell.md#shell" >}}) after opening a new bash shell in one of the containers: + +{{< highlight bash >}} +docker-compose exec datanode bash +{{< /highlight >}} diff --git a/hadoop-ozone/docs/content/Freon.md b/hadoop-ozone/docs/content/Freon.md new file mode 100644 index 00000000000..6ef0280717e --- /dev/null +++ b/hadoop-ozone/docs/content/Freon.md @@ -0,0 +1,64 @@ +--- +title: Freon +date: "2017-09-02T23:58:17-07:00" +menu: + main: + parent: Tools +--- + + +Overview +-------- + +Freon is a load-generator for Ozone. This tool is used for testing the functionality of ozone. + +### Random keys + +In randomkeys mode, the data written into ozone cluster is randomly generated. +Each key will be of size 10 KB. + +The number of volumes/buckets/keys can be configured. The replication type and +factor (eg. replicate with ratis to 3 nodes) Also can be configured. + +For more information use + +`bin/ozone freon --help` + +### Example + +{{< highlight bash >}} +ozone freon randomkeys --numOfVolumes=10 --numOfBuckets 10 --numOfKeys 10 --replicationType=RATIS --factor=THREE +{{< /highlight >}} + +{{< highlight bash >}} +*************************************************** +Status: Success +Git Base Revision: 48aae081e5afacbb3240657556b26c29e61830c3 +Number of Volumes created: 10 +Number of Buckets created: 100 +Number of Keys added: 1000 +Ratis replication factor: THREE +Ratis replication type: RATIS +Average Time spent in volume creation: 00:00:00,035 +Average Time spent in bucket creation: 00:00:00,319 +Average Time spent in key creation: 00:00:03,659 +Average Time spent in key write: 00:00:10,894 +Total bytes written: 10240000 +Total Execution time: 00:00:16,898 +*********************** +{{< /highlight >}} diff --git a/hadoop-ozone/docs/content/GettingStarted.md b/hadoop-ozone/docs/content/GettingStarted.md deleted file mode 100644 index 117a3071c58..00000000000 --- a/hadoop-ozone/docs/content/GettingStarted.md +++ /dev/null @@ -1,361 +0,0 @@ ---- -title: Getting started -weight: -2 -menu: main ---- - - -# Ozone - Object store for Apache Hadoop - - -## Introduction - -Ozone is a scalable distributed object store for Hadoop. Ozone supports RPC -and REST APIs for working with Volumes, Buckets and Keys. - -Existing Hadoop applications can use Ozone transparently via a Hadoop Compatible -FileSystem shim. - -### Basic terminology -1. **Volumes** - Volumes are a notion similar to accounts. Volumes can be -created or deleted only by administrators. -1. **Buckets** - A volume can contain zero or more buckets. -1. **Keys** - Keys are unique within a given bucket. - -### Services in a minimal Ozone cluster -1. **Ozone Manager (OM)** - stores Ozone Metadata namely Volumes, -Buckets and Key names. -1. **Storage Container Manager (SCM)** - handles Storage Container lifecycle. -Containers are the unit of replication in Ozone and not exposed to users. -1. **DataNodes** - These are HDFS DataNodes which understand how to store -Ozone Containers. Ozone has been designed to efficiently share storage space -with HDFS blocks. - -## Getting Started - -Ozone is currently work-in-progress and lives in the Hadoop source tree. -The sub-projects (`hadoop-ozone` and `hadoop-hdds`) are part of -the Hadoop source tree but they are not compiled by default and not -part of official Apache Hadoop releases. - -To use Ozone, you have to build a package by yourself and deploy a cluster. - -### Building Ozone - -To build Ozone, please checkout the Hadoop sources from the -[Apache Hadoop git repo](https://git-wip-us.apache.org/repos/asf?p=hadoop.git). -Then checkout the `trunk` branch and build it with the `hdds` profile enabled. - -` -git checkout trunk -mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true -Pdist -Phdds -Dtar -DskipShade -` - -`skipShade` is just to make compilation faster and not required. - -This builds a tarball in your distribution directory which can be used to deploy your -Ozone cluster. The tarball path is `hadoop-dist/target/ozone-${project.version}.tar.gz`. - -At this point you can either setup a physical cluster or run Ozone via -docker. - -### Running Ozone via Docker - -This is the quickest way to bring up an Ozone cluster for development/testing -or if you just want to get a feel for Ozone. It assumes that you have docker installed -on the machine. - -Go to the directory where the docker compose files exist and tell -`docker-compose` to start Ozone. This will start SCM, OM and a single datanode -in the background. -``` -cd hadoop-dist/target/compose/ozone - -docker-compose up -d -``` - -Now let us run some workload against Ozone. To do that we will run -_freon_, the Ozone load generator after logging into one of the docker -containers for OM, SCM or DataNode. Let's take DataNode for example:. -``` -docker-compose exec datanode bash - -ozone freon -mode offline -validateWrites -numOfVolumes 1 -numOfBuckets 10 -numOfKeys 100 -``` - -You can checkout the OM UI to see the requests information. -``` -http://localhost:9874/ -``` - -If you need more datanodes you can scale up: -``` -docker-compose up --scale datanode=3 -d -``` - -## Running Ozone using a real cluster - -### Configuration - -First initialize Hadoop cluster configuration files like hadoop-env.sh, -core-site.xml, hdfs-site.xml and any other configuration files that are -needed for your cluster. - -#### Update hdfs-site.xml - -The container manager part of Ozone runs inside DataNodes as a pluggable module. -To activate ozone you should define the service plugin implementation class. -**Important**: It should be added to the **hdfs-site.xml** as the plugin should -be activated as part of the normal HDFS Datanode bootstrap. -``` - - dfs.datanode.plugins - org.apache.hadoop.ozone.HddsDatanodeService - -``` - - -#### Create ozone-site.xml - -Ozone relies on its own configuration file called `ozone-site.xml`. -The following are the most important settings. - - 1. _*ozone.enabled*_ This is the most important setting for ozone. - Currently, Ozone is an opt-in subsystem of HDFS. By default, Ozone is - disabled. Setting this flag to `true` enables ozone in the HDFS cluster. - Here is an example, - ``` - - ozone.enabled - True - - ``` - 1. **ozone.metadata.dirs** Administrators can specify where the - metadata must reside. Usually you pick your fastest disk (SSD if - you have them on your nodes). OM, SCM and datanode will write the metadata - to these disks. This is a required setting, if this is missing Ozone will - fail to come up. Here is an example, - ``` - - ozone.metadata.dirs - /data/disk1/meta - - ``` - -1. **ozone.scm.names** Ozone is build on top of container framework. Storage - container manager(SCM) is a distributed block service which is used by ozone - and other storage services. - This property allows datanodes to discover where SCM is, so that - datanodes can send heartbeat to SCM. SCM is designed to be highly available - and datanodes assume there are multiple instances of SCM which form a highly - available ring. The HA feature of SCM is a work in progress. So we - configure ozone.scm.names to be a single machine. Here is an example, - ``` - - ozone.scm.names - scm.hadoop.apache.org - - ``` - -1. **ozone.scm.datanode.id** Each datanode that speaks to SCM generates an ID -just like HDFS. This is a mandatory setting. Please note: -This path will be created by datanodes if it doesn't exist already. Here is an - example, - ``` - - ozone.scm.datanode.id - /data/disk1/scm/meta/node/datanode.id - - ``` - -1. **ozone.scm.block.client.address** Storage Container Manager(SCM) offers a - set of services that can be used to build a distributed storage system. One - of the services offered is the block services. OM and HDFS would use this - service. This property describes where OM can discover SCM's block service - endpoint. There is corresponding ports etc, but assuming that we are using - default ports, the server address is the only required field. Here is an - example, - ``` - - ozone.scm.block.client.address - scm.hadoop.apache.org - - ``` - -1. **ozone.om.address** OM server address. This is used by OzoneClient and -Ozone File System. - ``` - - ozone.om.address - om.hadoop.apache.org - - ``` - -#### Ozone Settings Summary - -| Setting | Value | Comment | -|--------------------------------|------------------------------|------------------------------------------------------------------| -| ozone.enabled | True | This enables SCM and containers in HDFS cluster. | -| ozone.metadata.dirs | file path | The metadata will be stored here. | -| ozone.scm.names | SCM server name | Hostname:port or or IP:port address of SCM. | -| ozone.scm.block.client.address | SCM server name and port | Used by services like OM | -| ozone.scm.client.address | SCM server name and port | Used by client side | -| ozone.scm.datanode.address | SCM server name and port | Used by datanode to talk to SCM | -| ozone.om.address | OM server name | Used by Ozone handler and Ozone file system. | - - -#### Sample ozone-site.xml - -``` - - - - - ozone.enabled - True - - - - ozone.metadata.dirs - /data/disk1/ozone/meta - - - - ozone.scm.names - 127.0.0.1 - - - - ozone.scm.client.address - 127.0.0.1:9860 - - - - ozone.scm.block.client.address - 127.0.0.1:9863 - - - - ozone.scm.datanode.address - 127.0.0.1:9861 - - - - ozone.om.address - 127.0.0.1:9874 - - -``` - - - -### Starting Ozone - -Ozone is designed to run concurrently with HDFS. The simplest way to [start -HDFS](../hadoop-common/ClusterSetup.html) is to run `start-dfs.sh` from the -`$HADOOP/sbin/start-dfs.sh`. Once HDFS -is running, please verify it is fully functional by running some commands like - - - *./hdfs dfs -mkdir /usr* - - *./hdfs dfs -ls /* - - Once you are sure that HDFS is running, start Ozone. To start ozone, you - need to start SCM and OM. - -The first time you bring up Ozone, SCM must be initialized. -``` -ozone scm -init -``` - -Start SCM. -``` -ozone --daemon start scm -``` - -Once SCM gets started, OM must be initialized. -``` -ozone om -createObjectStore -``` - -Start OM. -``` -ozone --daemon start om -``` - -If you would like to start HDFS and Ozone together, you can do that by running - a single command. -``` -$HADOOP/sbin/start-ozone.sh -``` - -This command will start HDFS and then start the ozone components. - -Once you have ozone running you can use these ozone [shell](./OzoneCommandShell.html) -commands to start creating a volume, bucket and keys. - -## Diagnosing issues - -Ozone tries not to pollute the existing HDFS streams of configuration and -logging. So ozone logs are by default configured to be written to a file -called `ozone.log`. This is controlled by the settings in `log4j.properties` -file in the hadoop configuration directory. - -Here is the log4j properties that are added by ozone. - - -``` - # - # Add a logger for ozone that is separate from the Datanode. - # - #log4j.debug=true - log4j.logger.org.apache.hadoop.ozone=DEBUG,OZONE,FILE - - # Do not log into datanode logs. Remove this line to have single log. - log4j.additivity.org.apache.hadoop.ozone=false - - # For development purposes, log both to console and log file. - log4j.appender.OZONE=org.apache.log4j.ConsoleAppender - log4j.appender.OZONE.Threshold=info - log4j.appender.OZONE.layout=org.apache.log4j.PatternLayout - log4j.appender.OZONE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p \ - %X{component} %X{function} %X{resource} %X{user} %X{request} - %m%n - - # Real ozone logger that writes to ozone.log - log4j.appender.FILE=org.apache.log4j.DailyRollingFileAppender - log4j.appender.FILE.File=${hadoop.log.dir}/ozone.log - log4j.appender.FILE.Threshold=debug - log4j.appender.FILE.layout=org.apache.log4j.PatternLayout - log4j.appender.FILE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p \ - (%F:%L) %X{function} %X{resource} %X{user} %X{request} - \ - %m%n -``` - -If you would like to have a single datanode log instead of ozone stuff -getting written to ozone.log, please remove this line or set this to true. -``` -log4j.additivity.org.apache.hadoop.ozone=false -``` - -On the SCM/OM side, you will be able to see -1. `hadoop-hdfs-om-hostname.log` -1. `hadoop-hdfs-scm-hostname.log` - -## Reporting Bugs -Please file any issues you see under [Apache HDDS Project Jira](https://issues.apache.org/jira/projects/HDDS/issues/). - -## References - - [Object store in HDFS: HDFS-7240](https://issues.apache.org/jira/browse/HDFS-7240) - - [Ozone File System: HDFS-13074](https://issues.apache.org/jira/browse/HDFS-13074) - - [Building HDFS on top of new storage layer (HDDS): HDFS-10419](https://issues.apache.org/jira/browse/HDFS-10419) diff --git a/hadoop-ozone/docs/content/Hdds.md b/hadoop-ozone/docs/content/Hdds.md new file mode 100644 index 00000000000..9978c26e846 --- /dev/null +++ b/hadoop-ozone/docs/content/Hdds.md @@ -0,0 +1,65 @@ +--- +title: "Hadoop Distributed Data Store" +date: "2017-09-14" +menu: + main: + parent: Architecture +weight: 10 +--- + + +SCM Overview +------------ + +Storage Container Manager or SCM is a very important component of ozone. SCM +offers block and container-based services to Ozone Manager. A container is a +collection of unrelated blocks under ozone. SCM and data nodes work together +to maintain the replication levels needed by the cluster. + +It is easier to look at a putKey operation to understand the role that SCM plays. + +To put a key, a client makes a call to KSM with the following arguments. + +-- putKey(keyName, data, pipeline type, replication count) + +1. keyName - refers to the file name. +2. data - The data that the client wants to write. +3. pipeline type - Allows the client to select the pipeline type. A pipeline + refers to the replication strategy used for replicating a block. Ozone + currently supports Stand Alone and Ratis as two different pipeline types. +4. replication count - This specifies how many copies of the block replica should be maintained. + +In most cases, the client does not specify the pipeline type and replication + count. The default pipeline type and replication count are used. + + +Ozone Manager when it receives the putKey call, makes a call to SCM asking +for a pipeline instance with the specified property. So if the client asked +for RATIS replication strategy and a replication count of three, then OM +requests SCM to return a set of data nodes that meet this capability. + +If SCM can find this a pipeline ( that is a set of data nodes) that can meet +the requirement from the client, then those nodes are returned to OM. OM will +persist this info and return a tuple consisting of {BlockID, ContainerName, and Pipeline}. + +If SCM is not able to find a pipeline, then SCM creates a logical pipeline and then returns it. + + +SCM manages blocks, containers, and pipelines. To return healthy pipelines, +SCM also needs to understand the node health. So SCM listens to heartbeats +from data nodes and acts as the node manager too. diff --git a/hadoop-ozone/docs/content/JavaApi.md b/hadoop-ozone/docs/content/JavaApi.md new file mode 100644 index 00000000000..1d32bed1ecf --- /dev/null +++ b/hadoop-ozone/docs/content/JavaApi.md @@ -0,0 +1,172 @@ +--- +title: "Java API" +date: "2017-09-14" +menu: + main: + parent: "Client" +--- + + +Introduction +------------- + +Ozone ships with it own client library, that supports both RPC(Remote +Procedure call) and REST(Representational State Transfer). This library is +the primary user interface to ozone. + +It is trivial to switch from RPC to REST or vice versa, by setting the +property _ozone.client.protocol_ in the configuration or by calling the +appropriate factory method. + +## Creating an Ozone client +The Ozone client factory creates the ozone client. It allows the user to +specify the protocol of communication. For example, to get an REST client, we +can use + +{{< highlight java >}} +OzoneClient ozClient = OzoneClientFactory.getRestClient(); +{{< /highlight >}} + +And to get a a RPC client we can call + +{{< highlight java >}} +OzoneClient ozClient = OzoneClientFactory.getRpcClient(); +{{< /highlight >}} + +If the user want to create a client based on the configuration, then they can +call + +{{< highlight java >}} +OzoneClient ozClient = OzoneClientFactory.getClient(); +{{< /highlight >}} + +and an appropriate client based on configuration will be returned. + +## Writing data using Ozone Client + +The hierarchy of data inside ozone is a volume, bucket and a key. A volume +is a collection of buckets. A bucket is a collection of keys. To write data +to the ozone, you need a volume, bucket and a key. + +### Creating a Volume + +Once we have a client, we need to get a reference to the ObjectStore. This +is done via + +{{< highlight java >}} +ObjectStore objectStore = ozClient.getObjectStore(); +{{< /highlight >}} + +An object store represents an active cluster against which the client is working. + +{{< highlight java >}} +// Let us create a volume to store our game assets. +// This uses default arguments for creating that volume. +objectStore.createVolume(“assets”); + +// Let us verify that the volume got created. +OzoneVolume assets = objectStore.getVolume(“assets”); +{{< /highlight >}} + + +It is possible to pass an array of arguments to the createVolume by creating volume arguments. + +### Creating a Bucket + +Once you have a volume, you can create buckets inside the volume. + +{{< highlight bash >}} +// Let us create a bucket called videos. +assets.createBucket(“videos”); +Ozonebucket video = assets.getBucket(“videos”); +{{< /highlight >}} + +At this point we have a usable volume and a bucket. Our volume is called assets and bucket is called videos. + +Now we can create a Key. + +### Reading and Writing a Key + +With a bucket object the users can now read and write keys. The following code reads a video called intro.mp4 from the local disk and stores in the video bucket that we just created. + +{{< highlight bash >}} +// read data from the file, this is a user provided function. +byte [] vidoeData = readFile(“into.mp4”); + +// Create an output stream and write data. +OzoneOutputStream videoStream = video.createKey(“intro.mp4”, 1048576); +videoStream.write(videoData); + +// Close the stream when it is done. + videoStream.close(); + + +// We can use the same bucket to read the file that we just wrote, by creating an input Stream. +// Let us allocate a byte array to hold the video first. +byte[] data = new byte[(int)1048576]; +OzoneInputStream introStream = video.readKey(“intro.mp4”); +// read intro.mp4 into the data buffer +introStream.read(data); +introStream.close(); +{{< /highlight >}} + + +Here is a complete example of the code that we just wrote. Please note the close functions being called in this program. + +{{< highlight java >}} +// Let us create a client +OzoneClient ozClient = OzoneClientFactory.getClient(); + +// Get a reference to the ObjectStore using the client +ObjectStore objectStore = ozClient.getObjectStore(); + +// Let us create a volume to store our game assets. +// This default arguments for creating that volume. +objectStore.createVolume(“assets”); + +// Let us verify that the volume got created. +OzoneVolume assets = objectStore.getVolume(“assets”); + +// Let us create a bucket called videos. +assets.createBucket(“videos”); +Ozonebucket video = assets.getBucket(“videos”); + +// read data from the file, this is assumed to be a user provided function. +byte [] vidoeData = readFile(“into.mp4”); + +// Create an output stream and write data. +OzoneOutputStream videoStream = video.createKey(“intro.mp4”, 1048576); +videoStream.write(videoData); + +// Close the stream when it is done. + videoStream.close(); + + +// We can use the same bucket to read the file that we just wrote, by creating an input Stream. +// Let us allocate a byte array to hold the video first. + +byte[] data = new byte[(int)1048576]; +OzoneInputStream introStream = video.readKey(“into.mp4”); +introStream.read(data); + +// Close the stream when it is done. +introStream.close(); + +// Close the client. +ozClient.close(); +{{< /highlight >}} diff --git a/hadoop-ozone/docs/content/KeyCommands.md b/hadoop-ozone/docs/content/KeyCommands.md new file mode 100644 index 00000000000..0139a288c9f --- /dev/null +++ b/hadoop-ozone/docs/content/KeyCommands.md @@ -0,0 +1,127 @@ +--- +title: Key Commands +menu: + main: + parent: Client + weight: 3 +--- + + +Ozone shell supports the following key commands. + + * [get](#get) + * [put](#put) + * [delete](#delete) + * [info](#info) + * [list](#list) + + +### Get + +The key get command downloads a key from Ozone cluster to local file system. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the key in **/volume/bucket/key** format. +| FileName | Local file to download the key to. + + +{{< highlight bash >}} +ozone sh key get /hive/jan/sales.orc sales.orc +{{< /highlight >}} +Downloads the file sales.orc from the _/hive/jan_ bucket and writes to the +local file sales.orc. + +### Put + +Uploads a file from the local file system to the specified bucket. + +***Params:*** + + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the key in **/volume/bucket/key** format. +| FileName | Local file to upload. +| -r, --replication | Optional, Number of copies, ONE or THREE are the options. Picks up the default from cluster configuration. + +{{< highlight bash >}} +ozone sh key put /hive/jan/corrected-sales.orc sales.orc +{{< /highlight >}} +The above command will put the sales.orc as a new key into _/hive/jan/corrected-sales.orc_. + +### Delete + +The delete key command removes the key from the bucket. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the key. + +{{< highlight bash >}} +ozone sh key delete /hive/jan/corrected-sales.orc +{{< /highlight >}} + +The above command deletes the key _/hive/jan/corrected-sales.orc_. + + +### Info + +The key info commands returns the information about the key. +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the key. + +{{< highlight bash >}} +ozone sh key info /hive/jan/sales.orc +{{< /highlight >}} + +The above command will print out the information about _/hive/jan/sales.orc_ +key. + +### List + +The key list commands allows user to list all keys in a bucket. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| -l, --length | Maximum number of results to return. Default: 1000 +| -p, --prefix | Optional, Only buckets that match this prefix will be returned. +| -s, --start | The listing will start from key after the start key. +| Uri | The name of the _volume_. + +{{< highlight bash >}} +ozone sh key list /hive/jan +{{< /highlight >}} + +This command will list all key in the bucket _/hive/jan_. + + + + + +You can try out these commands from the docker instance of the [Alpha +Cluster](runningviadocker.html). diff --git a/hadoop-ozone/docs/content/Metrics.md b/hadoop-ozone/docs/content/Metrics.md deleted file mode 100644 index 64a481fa8ce..00000000000 --- a/hadoop-ozone/docs/content/Metrics.md +++ /dev/null @@ -1,170 +0,0 @@ ---- -title: Metrics -menu: main ---- - - - - -HDFS Ozone Metrics -=============== - - - -Overview --------- - -The container metrics that is used in HDFS Ozone. - -### Storage Container Metrics - -The metrics for various storage container operations in HDFS Ozone. - -Storage container is an optional service that can be enabled by setting -'ozone.enabled' to true. -These metrics are only available when ozone is enabled. - -Storage Container Metrics maintains a set of generic metrics for all -container RPC calls that can be made to a datandoe/container. - -Along with the total number of RPC calls containers maintain a set of metrics -for each RPC call. Following is the set of counters maintained for each RPC -operation. - -*Total number of operation* - We maintain an array which counts how -many times a specific operation has been performed. -Eg.`NumCreateContainer` tells us how many times create container has been -invoked on this datanode. - -*Total number of pending operation* - This is an array which counts how -many times a specific operation is waitting to be processed from the client -point of view. -Eg.`NumPendingCreateContainer` tells us how many create container requests that -waitting to be processed. - -*Average latency of each pending operation in nanoseconds* - The average latency -of the operation from the client point of view. -Eg. `CreateContainerLatencyAvgTime` - This tells us the average latency of -Create Container from the client point of view. - -*Number of bytes involved in a specific command* - This is an array that is -maintained for all operations, but makes sense only for read and write -operations. - -While it is possible to read the bytes in update container, it really makes -no sense, since no data stream involved. Users are advised to use this -metric only when it makes sense. Eg. `BytesReadChunk` -- Tells us how -many bytes have been read from this data using Read Chunk operation. - -*Average Latency of each operation* - The average latency of the operation. -Eg. `LatencyCreateContainerAvgTime` - This tells us the average latency of -Create Container. - -*Quantiles for each of these operations* - The 50/75/90/95/99th percentile -of these operations. Eg. `CreateContainerNanos60s50thPercentileLatency` -- -gives latency of the create container operations at the 50th percentile latency -(1 minute granularity). We report 50th, 75th, 90th, 95th and 99th percentile -for all RPCs. - -So this leads to the containers reporting these counters for each of these -RPC operations. - -| Name | Description | -|:---- |:---- | -| `NumOps` | Total number of container operations | -| `CreateContainer` | Create container operation | -| `ReadContainer` | Read container operation | -| `UpdateContainer` | Update container operations | -| `DeleteContainer` | Delete container operations | -| `ListContainer` | List container operations | -| `PutKey` | Put key operations | -| `GetKey` | Get key operations | -| `DeleteKey` | Delete key operations | -| `ListKey` | List key operations | -| `ReadChunk` | Read chunk operations | -| `DeleteChunk` | Delete chunk operations | -| `WriteChunk` | Write chunk operations| -| `ListChunk` | List chunk operations | -| `CompactChunk` | Compact chunk operations | -| `PutSmallFile` | Put small file operations | -| `GetSmallFile` | Get small file operations | -| `CloseContainer` | Close container operations | - -### Storage Container Manager Metrics - -The metrics for containers that managed by Storage Container Manager. - -Storage Container Manager (SCM) is a master service which keeps track of -replicas of storage containers. It also manages all data nodes and their -states, dealing with container reports and dispatching commands for execution. - -Following are the counters for containers: - -| Name | Description | -|:---- |:---- | -| `LastContainerReportSize` | Total size in bytes of all containers in latest container report that SCM received from datanode | -| `LastContainerReportUsed` | Total number of bytes used by all containers in latest container report that SCM received from datanode | -| `LastContainerReportKeyCount` | Total number of keys in all containers in latest container report that SCM received from datanode | -| `LastContainerReportReadBytes` | Total number of bytes have been read from all containers in latest container report that SCM received from datanode | -| `LastContainerReportWriteBytes` | Total number of bytes have been written into all containers in latest container report that SCM received from datanode | -| `LastContainerReportReadCount` | Total number of times containers have been read from in latest container report that SCM received from datanode | -| `LastContainerReportWriteCount` | Total number of times containers have been written to in latest container report that SCM received from datanode | -| `ContainerReportSize` | Total size in bytes of all containers over whole cluster | -| `ContainerReportUsed` | Total number of bytes used by all containers over whole cluster | -| `ContainerReportKeyCount` | Total number of keys in all containers over whole cluster | -| `ContainerReportReadBytes` | Total number of bytes have been read from all containers over whole cluster | -| `ContainerReportWriteBytes` | Total number of bytes have been written into all containers over whole cluster | -| `ContainerReportReadCount` | Total number of times containers have been read from over whole cluster | -| `ContainerReportWriteCount` | Total number of times containers have been written to over whole cluster | - -### Key Space Metrics - -The metrics for various Ozone Manager operations in HDFS Ozone. - -The Ozone Manager (OM) is a service that similar to the Namenode in HDFS. -In the current design of OM, it maintains metadata of all volumes, buckets and keys. -These metrics are only available when ozone is enabled. - -Following is the set of counters maintained for each key space operation. - -*Total number of operation* - We maintain an array which counts how -many times a specific operation has been performed. -Eg.`NumVolumeCreate` tells us how many times create volume has been -invoked in OM. - -*Total number of failed operation* - This type operation is opposite to the above -operation. -Eg.`NumVolumeCreateFails` tells us how many times create volume has been invoked -failed in OM. - -Following are the counters for each of key space operations. - -| Name | Description | -|:---- |:---- | -| `VolumeCreate` | Create volume operation | -| `VolumeUpdates` | Update volume property operation | -| `VolumeInfos` | Get volume information operation | -| `VolumeCheckAccesses` | Check volume access operation | -| `VolumeDeletes` | Delete volume operation | -| `VolumeLists` | List volume operation | -| `BucketCreates` | Create bucket operation | -| `BucketInfos` | Get bucket information operation | -| `BucketUpdates` | Update bucket property operation | -| `BucketDeletes` | Delete bucket operation | -| `BucketLists` | List bucket operation | -| `KeyAllocate` | Allocate key operation | -| `KeyLookup` | Look up key operation | -| `KeyDeletes` | Delete key operation | -| `KeyLists` | List key operation | diff --git a/hadoop-ozone/docs/content/OzoneFS.md b/hadoop-ozone/docs/content/OzoneFS.md new file mode 100644 index 00000000000..d0621bee6dd --- /dev/null +++ b/hadoop-ozone/docs/content/OzoneFS.md @@ -0,0 +1,80 @@ +--- +title: Ozone File System +date: 2017-09-14 +menu: main +menu: + main: + parent: Client +--- + + +There are many Hadoop compatible files systems under Hadoop. Hadoop compatible file systems ensures that storage backends like Ozone can easily be integrated into Hadoop eco-system. + +## Setting up the Ozone file system + +To create an ozone file system, we have to choose a bucket where the file system would live. This bucket will be used as the backend store for OzoneFileSystem. All the files and directories will be stored as keys in this bucket. + +Please run the following commands to create a volume and bucket, if you don't have them already. + +{{< highlight bash >}} +ozone sh volume create /volume +ozone sh bucket create /volume/bucket +{{< /highlight >}} + +Once this is created, please make sure that bucket exists via the listVolume or listBucket commands. + +Please add the following entry to the core-site.xml. + +{{< highlight xml >}} + + fs.o3.impl + org.apache.hadoop.fs.ozone.OzoneFileSystem + + + fs.default.name + o3://localhost:9864/volume/bucket + +{{< /highlight >}} + +This will make this bucket to be the default file system for HDFS dfs commands and register the o3 file system type.. + +You also need to add the ozone-filesystem.jar file to the classpath: + +{{< highlight bash >}} +export HADOOP_CLASSPATH=/opt/ozone/share/hadoop/ozonefs/hadoop-ozone-filesystem.jar:$HADOOP_CLASSPATH +{{< /highlight >}} + + + + +Once the default Filesystem has been setup, users can run commands like ls, put, mkdir, etc. +For example, + +{{< highlight bash >}} +hdfs dfs -ls / +{{< /highlight >}} + +or + +{{< highlight bash >}} +hdfs dfs -mkdir /users +{{< /highlight >}} + + +Or put command etc. In other words, all programs like Hive, Spark, and Distcp will work against this file system. +Please note that any keys created/deleted in the bucket using methods apart from OzoneFileSystem will show up as diectories and files in the Ozone File System. diff --git a/hadoop-ozone/docs/content/OzoneManager.md b/hadoop-ozone/docs/content/OzoneManager.md new file mode 100644 index 00000000000..560f827a58d --- /dev/null +++ b/hadoop-ozone/docs/content/OzoneManager.md @@ -0,0 +1,77 @@ +--- +title: "Ozone Manager" +date: "2017-09-14" +menu: + main: + parent: Architecture +weight: 11 +--- + + +OM Overview +------------- + +Ozone Manager or OM is the namespace manager for Ozone. The clients (RPC clients, Rest proxy, Ozone file system, etc.) communicate with OM to create and delete various ozone objects. + +Each ozone volume is the root of a namespace under OM. This is very different from HDFS which provides a single rooted file system. + +Ozone's namespace is a collection of volumes or is a forest instead of a +single rooted tree as in HDFS. This property makes it easy to deploy multiple + OMs for scaling, this feature is under development. + +OM Metadata +----------------- + +Conceptually, OM maintains a list of volumes, buckets, and keys. For each user, it maintains a list of volumes. For each volume, the list of buckets and for each bucket the list of keys. + +Right now, OM is a single instance service. Ozone already relies on Apache Ratis (A Replicated State Machine based on Raft protocol). OM will be extended to replicate all its metadata via Ratis. With that, OM will be highly available. + +OM UI +------------ + +OM supports a simple UI for the time being. The default port of OM is 9874. To access the OM UI, the user can connect to http://OM:port or for a concrete example, +``` +http://omserver:9874/ +``` +OM UI primarily tries to measure load and latency of OM. The first section of OM UI relates to the number of operations seen by the cluster broken down by the object, operation and whether the operation was successful. + +The latter part of the UI is focused on latency and number of operations that OM is performing. + +One of the hardest problems in HDFS world is discovering the numerous settings offered to tune HDFS. Ozone solves that problem by tagging the configs. To discover settings, click on "Common Tools"->Config. This will take you to the ozone config UI. + +Config UI +------------ + +The ozone config UI is a matrix with row representing the tags, and columns representing All, OM and SCM. + +Suppose a user wanted to discover the required settings for ozone. Then the user can tick the checkbox that says "Required." +This will filter out all "Required" settings along with the description of what each setting does. + +The user can combine different checkboxes and UI will combine the results. That is, If you have more than one row selected, then all keys for those chosen tags are displayed together. + +We are hopeful that this leads to a more straightforward way of discovering settings that manage ozone. + + +OM and SCM +------------------- +[Storage container manager]({{< ref "Hdds.md" >}}) or (SCM) is the block manager + for ozone. When a client requests OM for a set of data nodes to write data, OM talk to SCM and gets a block. + +A block returned by SCM contains a pipeline, which is a set of nodes that we participate in that block replication. + +So OM is dependent on SCM for reading and writing of Keys. However, OM is independent of SCM while doing metadata operations like ozone volume or bucket operations. diff --git a/hadoop-ozone/docs/content/RealCluster.md b/hadoop-ozone/docs/content/RealCluster.md new file mode 100644 index 00000000000..9d86c8458a3 --- /dev/null +++ b/hadoop-ozone/docs/content/RealCluster.md @@ -0,0 +1,74 @@ +--- +title: Starting an Ozone Cluster +weight: 1 +menu: + main: + parent: Starting + weight: 3 +--- + + +Before we boot up the Ozone cluster, we need to initialize both SCM and Ozone Manager. + +{{< highlight bash >}} +ozone scm -init +{{< /highlight >}} +This allows SCM to create the cluster Identity and initialize its state. +The ```init``` command is similar to Namenode format. Init command is executed only once, that allows SCM to create all the required on-disk structures to work correctly. +{{< highlight bash >}} +ozone --daemon start scm +{{< /highlight >}} + +Once we know SCM is up and running, we can create an Object Store for our use. This is done by running the following command. + +{{< highlight bash >}} +ozone om -createObjectStore +{{< /highlight >}} + + +Once Ozone manager has created the Object Store, we are ready to run the name +services. + +{{< highlight bash >}} +ozone --daemon start om +{{< /highlight >}} + +At this point Ozone's name services, the Ozone manager, and the block service SCM is both running. +**Please note**: If SCM is not running +```createObjectStore``` command will fail. SCM start will fail if on-disk data structures are missing. So please make sure you have done both ```init``` and ```createObjectStore``` commands. + +Now we need to start the data nodes. Please run the following command on each datanode. +{{< highlight bash >}} +ozone --daemon start datanode +{{< /highlight >}} + +At this point SCM, Ozone Manager and data nodes are up and running. + +***Congratulations!, You have set up a functional ozone cluster.*** + +------- +If you want to make your life simpler, you can just run +{{< highlight bash >}} +ozone scm -init +ozone om -createObjectStore +start-ozone.sh +{{< /highlight >}} +This assumes that you have set up the slaves file correctly and ssh +configuration that allows ssh-ing to all data nodes. This is the same as the +HDFS configuration, so please refer to HDFS documentation on how to set this +up. diff --git a/hadoop-ozone/docs/content/Rest.md b/hadoop-ozone/docs/content/Rest.md index 2e935d6d453..a25d3ab0ae6 100644 --- a/hadoop-ozone/docs/content/Rest.md +++ b/hadoop-ozone/docs/content/Rest.md @@ -1,33 +1,29 @@ --- -title: Ozone REST API -menu: main +title: REST API +menu: + main: + parent: Client --- -Ozone REST API's. -=================== - - - -Overview --------- - The Ozone REST API's allows user to access ozone via REST protocol. -Authentication and Authorization --------------------- +## Authentication and Authorization For time being, The default authentication mode of REST API is insecure access mode, which is *Simple* mode. Under this mode, ozone server trusts the user @@ -47,8 +43,7 @@ authorized to obtain administrator privilege by using HTTP header for example set following header *Authorization: OZONE root* in the HTTP request, then ozone will authorize the client with administrator privilege. -Common REST Headers --------------------- +## Common REST Headers The following HTTP headers must be set for each REST call. @@ -58,8 +53,7 @@ The following HTTP headers must be set for each REST call. | Date | Standard HTTP header that represents dates. The format is - day of the week, month, day, year and time (military time format) in GMT. Any other time zone will be rejected by ozone server. Eg. *Date : Mon, Apr 4, 2016 06:22:00 GMT*. This field is required. | | x-ozone-version | A required HTTP header to indicate which version of API this call will be communicating to. E.g *x-ozone-version: v1*. Currently ozone only publishes v1 version API. | -Common Reply Headers --------------------- +## Common Reply Headers The common reply headers are part of all Ozone server replies. @@ -69,8 +63,7 @@ The common reply headers are part of all Ozone server replies. | x-ozone-request-id | This is a UUID string that represents an unique request ID. This ID is used to track the request through the ozone system and is useful for debugging purposes. | | x-ozone-server-name | Fully qualified domain name of the sever which handled the request. | -Volume APIs --------------------- +## Volume APIs ### Create a Volume @@ -222,8 +215,7 @@ this request gets all volumes owned by *bilbo* and each volume's name contains p ] } -Bucket APIs --------------------- +## Bucket APIs ### Create Bucket @@ -389,8 +381,7 @@ this request lists all the buckets under volume *volume-of-bilbo*, and the resul ] } -Key APIs ------------------- +## Key APIs ### Put Key diff --git a/hadoop-ozone/docs/content/RunningViaDocker.md b/hadoop-ozone/docs/content/RunningViaDocker.md new file mode 100644 index 00000000000..0b8fece5fb1 --- /dev/null +++ b/hadoop-ozone/docs/content/RunningViaDocker.md @@ -0,0 +1,73 @@ +--- +title: Alpha Cluster +weight: 1 +menu: + main: + parent: Starting + weight: 1 +--- + + + +***This is an alpha release of Ozone. Please don't use this release in +production.*** Please check the road map page for features under +development. + +The easiest way to run ozone is to download the release tarball and launch +ozone via Docker. Docker will create a small ozone cluster on your machine, +including the data nodes and ozone services. + +## Running Ozone via Docker + + +**This assumes that you have Docker installed on the machine.** + +* Download the Ozone tarball and untar it. + +* Go to the directory where the docker compose files exist and tell +`docker-compose` to start Ozone in the background. This will start a small +ozone instance on your machine. + +{{< highlight bash >}} +cd ozone-0.2.1-SNAPSHOT/compose/ozone/ + +docker-compose up -d +{{< /highlight >}} + + +To verify that ozone is working as expected, let us log into a data node and +run _freon_, the load generator for Ozone. The ```exec datanode bash``` command +will open a bash shell on the datanode. The ozone freon command is executed +within the datanode container. You can quit freon via CTRL-C any time. The +```rk``` profile instructs freon to generate random keys. + +{{< highlight bash >}} +docker-compose exec datanode bash +ozone freon rk +{{< /highlight >}} + +You can check out the **OzoneManager UI** at http://localhost:9874/ to see the +activity generated by freon. +While you are there, please don't forget to check out the ozone configuration explorer. + +***Congratulations, You have just run your first ozone cluster.*** + +To shutdown the cluster, please run +{{< highlight bash >}} +docker-compose down +{{< /highlight >}} \ No newline at end of file diff --git a/hadoop-ozone/docs/content/RunningWithHDFS.md b/hadoop-ozone/docs/content/RunningWithHDFS.md new file mode 100644 index 00000000000..2fd2bd6ace2 --- /dev/null +++ b/hadoop-ozone/docs/content/RunningWithHDFS.md @@ -0,0 +1,77 @@ +--- +title: Running concurrently with HDFS +weight: 1 +menu: + main: + parent: Starting + weight: 4 +--- + + +Ozone is designed to work with HDFS. So it is easy to deploy ozone in an +existing HDFS cluster. + +Ozone does *not* support security today. It is a work in progress and tracked + in +[HDDS-4](https://issues.apache.org/jira/browse/HDDS-4). If you enable ozone +in a secure HDFS cluster, for your own protection Ozone will refuse to work. + +In other words, till Ozone security work is done, Ozone will not work in any +secure clusters. + +The container manager part of Ozone runs inside DataNodes as a pluggable module. +To activate ozone you should define the service plugin implementation class. + +

+ +{{< highlight xml >}} + + dfs.datanode.plugins + org.apache.hadoop.ozone.HddsDatanodeService + +{{< /highlight >}} + +You also need to add the ozone-datanode-plugin jar file to the classpath: + +{{< highlight bash >}} +export HADOOP_CLASSPATH=/opt/ozone/share/hadoop/ozoneplugin/hadoop-ozone-datanode-plugin.jar +{{< /highlight >}} + + + +To start ozone with HDFS you should start the the following components: + + 1. HDFS Namenode (from Hadoop distribution) + 2. HDFS Datanode (from the Hadoop distribution with the plugin on the + classpath from the Ozone distribution) + 3. Ozone Manager (from the Ozone distribution) + 4. Storage Container manager (from the Ozone distribution) + +Please check the log of the datanode whether the HDDS/Ozone plugin is started or +not. Log of datanode should contain something like this: + +``` +2018-09-17 16:19:24 INFO HddsDatanodeService:158 - Started plug-in org.apache.hadoop.ozone.web.OzoneHddsDatanodeService@6f94fb9d +``` + + \ No newline at end of file diff --git a/hadoop-ozone/docs/content/SCMCLI.md b/hadoop-ozone/docs/content/SCMCLI.md new file mode 100644 index 00000000000..bd6086c796e --- /dev/null +++ b/hadoop-ozone/docs/content/SCMCLI.md @@ -0,0 +1,29 @@ +--- +title: "SCMCLI" +date: 2017-08-10 +menu: + main: + parent: Tools +--- + + +SCM is the block service for Ozone. It is also the workhorse for ozone. But user process never talks to SCM. However, being able to read the state of SCM is useful. + +SCMCLI allows the developer to access SCM directly. Please note: Improper usage of this tool can destroy your cluster. Unless you know exactly what you are doing, Please do *not* use this tool. In other words, this is a developer only tool. We might even remove this command in future to prevent improper use. + +[^1]: This assumes that you have a working docker installation on the development machine. diff --git a/hadoop-ozone/docs/content/Settings.md b/hadoop-ozone/docs/content/Settings.md new file mode 100644 index 00000000000..b2d30e53f1a --- /dev/null +++ b/hadoop-ozone/docs/content/Settings.md @@ -0,0 +1,142 @@ +--- +title: Configuration +weight: 1 +menu: + main: + parent: Starting + weight: 2 +--- + + + + + +If you are feeling adventurous, you can setup ozone in a real cluster. +Setting up a real cluster requires us to understand the components of Ozone. +Ozone is designed to work concurrently with HDFS. However, Ozone is also +capable of running independently. The components of ozone are the same in both approaches. + +## Ozone Components + +1. Ozone Manager - Is the server that is in charge of the namespace of Ozone. Ozone Manager is responsible for all volume, bucket and key operations. +2. Storage Container Manager - Acts as the block manager. Ozone Manager +requests blocks from SCM, to which clients can write data. +3. Datanodes - Ozone data node code runs inside the HDFS datanode or in the independent deployment case runs an ozone datanode daemon. + + + + +## Setting up an Ozone only cluster + +* Please untar the ozone-0.2.1-SNAPSHOT to the directory where you are going +to run Ozone from. We need Ozone jars on all machines in the cluster. So you +need to do this on all machines in the cluster. + +* Ozone relies on a configuration file called ```ozone-site.xml```. To +generate a template that you can replace with proper values, please run the +following command. This will generate a template called ```ozone-site.xml``` at +the specified path (directory). + +{{< highlight bash >}} +ozone genconf -output +{{< /highlight >}} + +Let us look at the settings inside the generated file (ozone-site.xml) and +how they control ozone. Once the right values are defined, this file +needs to be copied to ```ozone directory/etc/Hadoop```. + + +* **ozone.enabled** This is the most critical setting for ozone. +Ozone is a work in progress and users have to enable this service explicitly. +By default, Ozone is disabled. Setting this flag to `true` enables ozone in the +HDFS or Ozone cluster. + +Here is an example, + +{{< highlight xml >}} + + ozone.enabled + True + +{{< /highlight >}} + +* **ozone.metadata.dirs** Allows Administrators to specify where the + metadata must reside. Usually you pick your fastest disk (SSD if + you have them on your nodes). OzoneManager, SCM and datanode will write the + metadata to this path. This is a required setting, if this is missing Ozone + will fail to come up. + + Here is an example, + +{{< highlight xml >}} + + ozone.metadata.dirs + /data/disk1/meta + +{{< /highlight >}} + +* **ozone.scm.names** Storage container manager(SCM) is a distributed block + service which is used by ozone. This property allows data nodes to discover + SCM's address. Data nodes send heartbeat to SCM. + Until HA feature is complete, we configure ozone.scm.names to be a + single machine. + + Here is an example, + + {{< highlight xml >}} + + ozone.scm.names + scm.hadoop.apache.org + + {{< /highlight >}} + + * **ozone.scm.datanode.id** Data nodes generate a Unique ID called Datanode + ID. This identity is written to the file specified by this path. *Data nodes + will create this path if it doesn't exist already.* + +Here is an example, +{{< highlight xml >}} + + ozone.scm.datanode.id + /data/disk1/meta/node/datanode.id + +{{< /highlight >}} + +* **ozone.om.address** OM server address. This is used by OzoneClient and +Ozone File System. + +Here is an example, +{{< highlight xml >}} + + ozone.om.address + ozonemanager.hadoop.apache.org + +{{< /highlight >}} + + +### Ozone Settings Summary + +| Setting | Value | Comment | +|--------------------------------|------------------------------|------------------------------------------------------------------| +| ozone.enabled | true | This enables SCM and containers in HDFS cluster. | +| ozone.metadata.dirs | file path | The metadata will be stored here. | +| ozone.scm.names | SCM server name | Hostname:port or IP:port address of SCM. | +| ozone.scm.block.client.address | SCM server name and port | Used by services like OM | +| ozone.scm.client.address | SCM server name and port | Used by client-side | +| ozone.scm.datanode.address | SCM server name and port | Used by datanode to talk to SCM | +| ozone.om.address | OM server name | Used by Ozone handler and Ozone file system. | diff --git a/hadoop-ozone/docs/content/VolumeCommands.md b/hadoop-ozone/docs/content/VolumeCommands.md new file mode 100644 index 00000000000..6f024ef87f4 --- /dev/null +++ b/hadoop-ozone/docs/content/VolumeCommands.md @@ -0,0 +1,116 @@ +--- +title: Volume Commands +menu: + main: + parent: Client + weight: 2 +--- + + +Volume commands generally need administrator privileges. The ozone shell supports the following volume commands. + + * [create](#create) + * [delete](#delete) + * [info](#info) + * [list](#list) + * [update](#update) + +### Create + +The volume create command allows an administrator to create a volume and +assign it to a user. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| -q, --quota | Optional, This argument that specifies the maximum size this volume can use in the Ozone cluster. | +| -u, --user | Required, The name of the user who owns this volume. This user can create, buckets and keys on this volume. | +| Uri | The name of the volume. | + +{{< highlight bash >}} +ozone sh volume create --quota=1TB --user=bilbo /hive +{{< /highlight >}} + +The above command will create a volume called _hive_ on the ozone cluster. This +volume has a quota of 1TB, and the owner is _bilbo_. + +### Delete + +The volume delete commands allows an administrator to delete a volume. If the +volume is not empty then this command will fail. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the volume. + +{{< highlight bash >}} +ozone sh volume delete /hive +{{< /highlight >}} + +The above command will delete the volume hive, if the volume has no buckets +inside it. + +### Info + +The volume info commands returns the information about the volume including +quota and owner information. +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the volume. + +{{< highlight bash >}} +ozone sh volume info /hive +{{< /highlight >}} + +The above command will print out the information about hive volume. + +### List + +The volume list command will list the volumes owned by a user. + +{{< highlight bash >}} +ozone sh volume list --user hadoop +{{< /highlight >}} + +The above command will print out all the volumes owned by the user hadoop. + +### Update + +The volume update command allows changing of owner and quota on a given volume. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| -q, --quota | Optional, This argument that specifies the maximum size this volume can use in the Ozone cluster. | +| -u, --user | Optional, The name of the user who owns this volume. This user can create, buckets and keys on this volume. | +| Uri | The name of the volume. | + +{{< highlight bash >}} +ozone sh volume update --quota=10TB /hive +{{< /highlight >}} + +The above command updates the volume quota to 10TB. + +You can try out these commands from the docker instance of the [Alpha +Cluster](runningviadocker.html). diff --git a/hadoop-ozone/docs/content/_index.md b/hadoop-ozone/docs/content/_index.md index 383b2e0ef0a..e297b182fd4 100644 --- a/hadoop-ozone/docs/content/_index.md +++ b/hadoop-ozone/docs/content/_index.md @@ -4,99 +4,36 @@ menu: main weight: -10 --- -
Ozone is an Object store for Apache Hadoop. It aims to scale to billions of -keys. 
The following is a high-level overview of the core components of Ozone.

 +# Apache Hadoop Ozone -![Ozone Architecture Overview](./OzoneOverview.png) 

 +Ozone is a scalable, distributed object store for Hadoop. Applications like +Apache Spark, Hive and YARN, can run against Ozone without any +modifications. Ozone comes with a [Java client library]({{< ref "JavaApi.md" +>}}) and a [command line interface] ({{< ref "CommandShell.md#shell" >}}) which makes it easy to use Ozone. This client library supports both RPC and REST protocols. -The main elements of Ozone are
: +Ozone consists of volumes, buckets, and Keys. -## Clients +* Volumes are similar to user accounts. Only administrators can create or delete volumes. +* Buckets are similar to directories. A bucket can contain any number of keys, but buckets cannot contain other buckets. +* Keys are similar to files. A bucket can contain any number of keys. -Ozone ships with a set of ready-made clients. They are 
Ozone CLI and Freon.
 - * [Ozone CLI](./OzoneCommandShell.html) is the command line interface like 'hdfs' command.
 - * Freon is a load generation tool for Ozone.
 +}}"> -## REST Handler - -Ozone provides both an RPC (Remote Procedure Call) as well as a REST -(Representational State Transfer) style interface. This allows clients to be -written in many languages quickly. Ozone strives to maintain a similar -interface between REST and RPC. The Rest handler offers the REST protocol -services of Ozone. - -For most purposes, a client can make one line change to switch from REST to -RPC or vice versa. 
 - -## Ozone File System - -Ozone file system (TODO: Add documentation) is a Hadoop compatible file system. -This is the important user-visible component of ozone. -This allows Hadoop services and applications like Hive/Spark to run against -Ozone without any change. - -## Ozone Client - -This is like DFSClient in HDFS. This acts as the standard client to talk to -Ozone. All other components that we have discussed so far rely on Ozone client -(TODO: Add Ozone client documentation).
 - -## Ozone Manager - -Ozone Manager (OM) takes care of the Ozone's namespace. -All ozone entities like volumes, buckets and keys are managed by OM -(TODO: Add OM documentation). In short, OM is the metadata manager for Ozone. -OM talks to blockManager(SCM) to get blocks and passes it on to the Ozone -client. Ozone client writes data to these blocks. -OM will eventually be replicated via Apache Ratis for High Availability.
 - -## Storage Container Manager -Storage Container Manager (SCM) is the block and cluster manager for Ozone. -SCM along with data nodes offer a service called 'containers'. -A container is a group unrelated of blocks that are managed together -as a single entity. - -SCM offers the following abstractions.

 - -![SCM Abstractions](../SCMBlockDiagram.png) - -### Blocks - -Blocks are like blocks in HDFS. They are replicated store of data. - -### Containers - -A collection of blocks replicated and managed together. - -### Pipelines - -SCM allows each container to choose its method of replication. -For example, a container might decide that it needs only one copy of a block -and might choose a stand-alone pipeline. Another container might want to have -a very high level of reliability and pick a RATIS based pipeline. In other -words, SCM allows different kinds of replication strategies to co-exist. - -### Pools - -A group of data nodes is called a pool. For scaling purposes, -we define a pool as a set of machines. This makes management of datanodes -easier. - -### Nodes - -The data node where data is stored. diff --git a/hadoop-ozone/docs/pom.xml b/hadoop-ozone/docs/pom.xml index e0f9a87614b..64d0ec86e05 100644 --- a/hadoop-ozone/docs/pom.xml +++ b/hadoop-ozone/docs/pom.xml @@ -20,14 +20,19 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-ozone - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-ozone-docs - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Ozone Documentation Apache Hadoop Ozone Documentation jar + + ozone + true + + @@ -47,10 +52,23 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> dev-support/bin/generate-site.sh - - -b - /docs - + + + + org.apache.rat + apache-rat-plugin + + + themes/ozonedoc/static/js/bootstrap.min.js + themes/ozonedoc/static/js/jquery.min.js + themes/ozonedoc/static/css/bootstrap-theme.min.css + themes/ozonedoc/static/css/bootstrap.min.css.map + themes/ozonedoc/static/css/bootstrap.min.css + themes/ozonedoc/static/css/bootstrap-theme.min.css.map + themes/ozonedoc/static/fonts/glyphicons-halflings-regular.svg + themes/ozonedoc/layouts/index.html + themes/ozonedoc/theme.toml + diff --git a/hadoop-ozone/docs/static/NOTES.md b/hadoop-ozone/docs/static/NOTES.md new file mode 100644 index 00000000000..7b7ca1290b3 --- /dev/null +++ b/hadoop-ozone/docs/static/NOTES.md @@ -0,0 +1,20 @@ + + +The source of Ozone logo is available here: + +https://git-wip-us.apache.org/repos/asf?p=hadoop-ozonesite.git;a=tree;f=static;h=9830788c1fa36c933272cdf87342bb71974c8567;hb=refs/heads/asf-site diff --git a/hadoop-ozone/docs/static/OzoneOverview.svg b/hadoop-ozone/docs/static/OzoneOverview.svg index 0120a5cc367..9d4660db1c8 100644 --- a/hadoop-ozone/docs/static/OzoneOverview.svg +++ b/hadoop-ozone/docs/static/OzoneOverview.svg @@ -1,4 +1,17 @@ + Desktop HD diff --git a/hadoop-ozone/docs/static/ozone-logo.png b/hadoop-ozone/docs/static/ozone-logo.png new file mode 100644 index 00000000000..cdc8e4e7383 Binary files /dev/null and b/hadoop-ozone/docs/static/ozone-logo.png differ diff --git a/hadoop-ozone/docs/themes/ozonedoc/layouts/_default/single.html b/hadoop-ozone/docs/themes/ozonedoc/layouts/_default/single.html index 0fdd1ba4a71..ca2e1c4a65e 100644 --- a/hadoop-ozone/docs/themes/ozonedoc/layouts/_default/single.html +++ b/hadoop-ozone/docs/themes/ozonedoc/layouts/_default/single.html @@ -1,15 +1,18 @@ {{ partial "header.html" . }} @@ -21,7 +24,10 @@
{{ partial "sidebar.html" . }}
+

{{ .Title }}

+
{{ .Content }} +
diff --git a/hadoop-ozone/docs/themes/ozonedoc/layouts/index.html b/hadoop-ozone/docs/themes/ozonedoc/layouts/index.html index c2c8cd0f1e7..17f0246abf4 100644 --- a/hadoop-ozone/docs/themes/ozonedoc/layouts/index.html +++ b/hadoop-ozone/docs/themes/ozonedoc/layouts/index.html @@ -1,3 +1,19 @@ + {{ partial "header.html" . }} diff --git a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/footer.html b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/footer.html index 4aa5b593f51..5aaeed9e1ed 100644 --- a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/footer.html +++ b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/footer.html @@ -1,15 +1,18 @@ diff --git a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/header.html b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/header.html index c1f47a95fa4..35ba4c8042f 100644 --- a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/header.html +++ b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/header.html @@ -1,15 +1,18 @@ @@ -23,9 +26,9 @@ Documentation for Apache Hadoop Ozone - + - + diff --git a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/navbar.html b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/navbar.html index b73a7691ecd..3cd86096578 100644 --- a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/navbar.html +++ b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/navbar.html @@ -1,15 +1,18 @@

+ * + * + * + * + * + * + * + * + * + * + * + * + *
WEIGHT LOCK
0 User Lock
1 Volume Lock
2 Bucket Lock
+ * + * One cannot obtain a lower weight lock while holding a lock with higher + * weight. The other way around is possible.
+ *
+ *

+ * For example: + *
+ * -> acquireVolumeLock (will work)
+ * +-> acquireBucketLock (will work)
+ * +--> acquireUserLock (will throw Exception)
+ *

+ *
+ * + * To acquire a user lock you should not hold any Volume/Bucket lock. Similarly + * to acquire a Volume lock you should not hold any Bucket lock. + * + */ +public final class OzoneManagerLock { + + private static final String VOLUME_LOCK = "volumeLock"; + private static final String BUCKET_LOCK = "bucketLock"; + + + private final LockManager manager; + + // To maintain locks held by current thread. + private final ThreadLocal> myLocks = + ThreadLocal.withInitial(() -> ImmutableMap.of( + VOLUME_LOCK, new AtomicInteger(0), + BUCKET_LOCK, new AtomicInteger(0))); + + /** + * Creates new OzoneManagerLock instance. + * @param conf Configuration object + */ + public OzoneManagerLock(Configuration conf) { + manager = new LockManager<>(conf); + } + + /** + * Acquires user lock on the given resource. + * + *

If the lock is not available then the current thread becomes + * disabled for thread scheduling purposes and lies dormant until the + * lock has been acquired. + * + * @param user User on which the lock has to be acquired + */ + public void acquireUserLock(String user) { + // Calling thread should not hold any volume or bucket lock. + if (hasAnyVolumeLock() || hasAnyBucketLock()) { + throw new RuntimeException( + "Thread '" + Thread.currentThread().getName() + + "' cannot acquire user lock" + + " while holding volume/bucket lock(s)."); + } + manager.lock(OM_USER_PREFIX + user); + } + + /** + * Releases the user lock on given resource. + */ + public void releaseUserLock(String user) { + manager.unlock(OM_USER_PREFIX + user); + } + + /** + * Acquires volume lock on the given resource. + * + *

If the lock is not available then the current thread becomes + * disabled for thread scheduling purposes and lies dormant until the + * lock has been acquired. + * + * @param volume Volume on which the lock has to be acquired + */ + public void acquireVolumeLock(String volume) { + // Calling thread should not hold any bucket lock. + if (hasAnyBucketLock()) { + throw new RuntimeException( + "Thread '" + Thread.currentThread().getName() + + "' cannot acquire volume lock while holding bucket lock(s)."); + } + manager.lock(OM_KEY_PREFIX + volume); + myLocks.get().get(VOLUME_LOCK).incrementAndGet(); + } + + /** + * Releases the volume lock on given resource. + */ + public void releaseVolumeLock(String volume) { + manager.unlock(OM_KEY_PREFIX + volume); + myLocks.get().get(VOLUME_LOCK).decrementAndGet(); + } + + /** + * Acquires bucket lock on the given resource. + * + *

If the lock is not available then the current thread becomes + * disabled for thread scheduling purposes and lies dormant until the + * lock has been acquired. + * + * @param bucket Bucket on which the lock has to be acquired + */ + public void acquireBucketLock(String volume, String bucket) { + manager.lock(OM_KEY_PREFIX + volume + OM_KEY_PREFIX + bucket); + myLocks.get().get(BUCKET_LOCK).incrementAndGet(); + } + + + /** + * Releases the bucket lock on given resource. + */ + public void releaseBucketLock(String volume, String bucket) { + manager.unlock(OM_KEY_PREFIX + volume + OM_KEY_PREFIX + bucket); + myLocks.get().get(BUCKET_LOCK).decrementAndGet(); + } + + /** + * Returns true if the current thread holds any volume lock. + * @return true if current thread holds volume lock, else false + */ + private boolean hasAnyVolumeLock() { + return myLocks.get().get(VOLUME_LOCK).get() != 0; + } + + /** + * Returns true if the current thread holds any bucket lock. + * @return true if current thread holds bucket lock, else false + */ + private boolean hasAnyBucketLock() { + return myLocks.get().get(BUCKET_LOCK).get() != 0; + } + +} diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/VolumeManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/VolumeManagerImpl.java index e50145debdb..c232bf1be75 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/VolumeManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/VolumeManagerImpl.java @@ -28,7 +28,9 @@ .OzoneManagerProtocolProtos.VolumeInfo; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.util.Time; -import org.apache.hadoop.utils.BatchOperation; +import org.apache.hadoop.utils.RocksDBStore; +import org.rocksdb.RocksDBException; +import org.rocksdb.WriteBatch; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -67,10 +69,10 @@ public VolumeManagerImpl(OMMetadataManager metadataManager, // Helpers to add and delete volume from user list private void addVolumeToOwnerList(String volume, String owner, - BatchOperation batchOperation) throws IOException { + WriteBatch batchOperation) throws RocksDBException, IOException { // Get the volume list byte[] dbUserKey = metadataManager.getUserKey(owner); - byte[] volumeList = metadataManager.get(dbUserKey); + byte[] volumeList = metadataManager.getUserTable().get(dbUserKey); List prevVolList = new LinkedList<>(); if (volumeList != null) { VolumeList vlist = VolumeList.parseFrom(volumeList); @@ -87,15 +89,15 @@ private void addVolumeToOwnerList(String volume, String owner, prevVolList.add(volume); VolumeList newVolList = VolumeList.newBuilder() .addAllVolumeNames(prevVolList).build(); - batchOperation.put(dbUserKey, newVolList.toByteArray()); + batchOperation.put(metadataManager.getUserTable().getHandle(), + dbUserKey, newVolList.toByteArray()); } private void delVolumeFromOwnerList(String volume, String owner, - BatchOperation batchOperation) - throws IOException { + WriteBatch batch) throws RocksDBException, IOException { // Get the volume list byte[] dbUserKey = metadataManager.getUserKey(owner); - byte[] volumeList = metadataManager.get(dbUserKey); + byte[] volumeList = metadataManager.getUserTable().get(dbUserKey); List prevVolList = new LinkedList<>(); if (volumeList != null) { VolumeList vlist = VolumeList.parseFrom(volumeList); @@ -108,11 +110,12 @@ private void delVolumeFromOwnerList(String volume, String owner, // Remove the volume from the list prevVolList.remove(volume); if (prevVolList.size() == 0) { - batchOperation.delete(dbUserKey); + batch.delete(metadataManager.getUserTable().getHandle(), dbUserKey); } else { VolumeList newVolList = VolumeList.newBuilder() .addAllVolumeNames(prevVolList).build(); - batchOperation.put(dbUserKey, newVolList.toByteArray()); + batch.put(metadataManager.getUserTable().getHandle(), + dbUserKey, newVolList.toByteArray()); } } @@ -123,10 +126,11 @@ private void delVolumeFromOwnerList(String volume, String owner, @Override public void createVolume(OmVolumeArgs args) throws IOException { Preconditions.checkNotNull(args); - metadataManager.writeLock().lock(); + metadataManager.getLock().acquireUserLock(args.getOwnerName()); + metadataManager.getLock().acquireVolumeLock(args.getVolume()); try { byte[] dbVolumeKey = metadataManager.getVolumeKey(args.getVolume()); - byte[] volumeInfo = metadataManager.get(dbVolumeKey); + byte[] volumeInfo = metadataManager.getVolumeTable().get(dbVolumeKey); // Check of the volume already exists if (volumeInfo != null) { @@ -134,39 +138,48 @@ public void createVolume(OmVolumeArgs args) throws IOException { throw new OMException(ResultCodes.FAILED_VOLUME_ALREADY_EXISTS); } - BatchOperation batch = new BatchOperation(); - // Write the vol info - List metadataList = new LinkedList<>(); - for (Map.Entry entry : args.getKeyValueMap().entrySet()) { - metadataList.add(HddsProtos.KeyValue.newBuilder() - .setKey(entry.getKey()).setValue(entry.getValue()).build()); + try(WriteBatch batch = new WriteBatch()) { + // Write the vol info + List metadataList = new LinkedList<>(); + for (Map.Entry entry : + args.getKeyValueMap().entrySet()) { + metadataList.add(HddsProtos.KeyValue.newBuilder() + .setKey(entry.getKey()).setValue(entry.getValue()).build()); + } + List aclList = args.getAclMap().ozoneAclGetProtobuf(); + + VolumeInfo newVolumeInfo = VolumeInfo.newBuilder() + .setAdminName(args.getAdminName()) + .setOwnerName(args.getOwnerName()) + .setVolume(args.getVolume()) + .setQuotaInBytes(args.getQuotaInBytes()) + .addAllMetadata(metadataList) + .addAllVolumeAcls(aclList) + .setCreationTime(Time.now()) + .build(); + batch.put(metadataManager.getVolumeTable().getHandle(), + dbVolumeKey, newVolumeInfo.toByteArray()); + + // Add volume to user list + addVolumeToOwnerList(args.getVolume(), args.getOwnerName(), batch); + metadataManager.getStore().write(batch); } - List aclList = args.getAclMap().ozoneAclGetProtobuf(); - - VolumeInfo newVolumeInfo = VolumeInfo.newBuilder() - .setAdminName(args.getAdminName()) - .setOwnerName(args.getOwnerName()) - .setVolume(args.getVolume()) - .setQuotaInBytes(args.getQuotaInBytes()) - .addAllMetadata(metadataList) - .addAllVolumeAcls(aclList) - .setCreationTime(Time.now()) - .build(); - batch.put(dbVolumeKey, newVolumeInfo.toByteArray()); - - // Add volume to user list - addVolumeToOwnerList(args.getVolume(), args.getOwnerName(), batch); - metadataManager.writeBatch(batch); LOG.debug("created volume:{} user:{}", args.getVolume(), args.getOwnerName()); - } catch (IOException ex) { + } catch (RocksDBException | IOException ex) { if (!(ex instanceof OMException)) { LOG.error("Volume creation failed for user:{} volume:{}", args.getOwnerName(), args.getVolume(), ex); } - throw ex; + if(ex instanceof RocksDBException) { + throw RocksDBStore.toIOException("Volume creation failed.", + (RocksDBException) ex); + } else { + throw (IOException) ex; + } } finally { - metadataManager.writeLock().unlock(); + metadataManager.getLock().releaseVolumeLock(args.getVolume()); + metadataManager.getLock().releaseUserLock(args.getOwnerName()); } } @@ -181,10 +194,11 @@ public void createVolume(OmVolumeArgs args) throws IOException { public void setOwner(String volume, String owner) throws IOException { Preconditions.checkNotNull(volume); Preconditions.checkNotNull(owner); - metadataManager.writeLock().lock(); + metadataManager.getLock().acquireUserLock(owner); + metadataManager.getLock().acquireVolumeLock(volume); try { byte[] dbVolumeKey = metadataManager.getVolumeKey(volume); - byte[] volInfo = metadataManager.get(dbVolumeKey); + byte[] volInfo = metadataManager.getVolumeTable().get(dbVolumeKey); if (volInfo == null) { LOG.debug("Changing volume ownership failed for user:{} volume:{}", owner, volume); @@ -195,30 +209,37 @@ public void setOwner(String volume, String owner) throws IOException { OmVolumeArgs volumeArgs = OmVolumeArgs.getFromProtobuf(volumeInfo); Preconditions.checkState(volume.equals(volumeInfo.getVolume())); - BatchOperation batch = new BatchOperation(); - delVolumeFromOwnerList(volume, volumeArgs.getOwnerName(), batch); - addVolumeToOwnerList(volume, owner, batch); - - OmVolumeArgs newVolumeArgs = - OmVolumeArgs.newBuilder().setVolume(volumeArgs.getVolume()) - .setAdminName(volumeArgs.getAdminName()) - .setOwnerName(owner) - .setQuotaInBytes(volumeArgs.getQuotaInBytes()) - .setCreationTime(volumeArgs.getCreationTime()) - .build(); - - VolumeInfo newVolumeInfo = newVolumeArgs.getProtobuf(); - batch.put(dbVolumeKey, newVolumeInfo.toByteArray()); - - metadataManager.writeBatch(batch); - } catch (IOException ex) { + try(WriteBatch batch = new WriteBatch()) { + delVolumeFromOwnerList(volume, volumeArgs.getOwnerName(), batch); + addVolumeToOwnerList(volume, owner, batch); + + OmVolumeArgs newVolumeArgs = + OmVolumeArgs.newBuilder().setVolume(volumeArgs.getVolume()) + .setAdminName(volumeArgs.getAdminName()) + .setOwnerName(owner) + .setQuotaInBytes(volumeArgs.getQuotaInBytes()) + .setCreationTime(volumeArgs.getCreationTime()) + .build(); + + VolumeInfo newVolumeInfo = newVolumeArgs.getProtobuf(); + batch.put(metadataManager.getVolumeTable().getHandle(), + dbVolumeKey, newVolumeInfo.toByteArray()); + metadataManager.getStore().write(batch); + } + } catch (RocksDBException | IOException ex) { if (!(ex instanceof OMException)) { LOG.error("Changing volume ownership failed for user:{} volume:{}", owner, volume, ex); } - throw ex; + if(ex instanceof RocksDBException) { + throw RocksDBStore.toIOException("Volume creation failed.", + (RocksDBException) ex); + } else { + throw (IOException) ex; + } } finally { - metadataManager.writeLock().unlock(); + metadataManager.getLock().releaseVolumeLock(volume); + metadataManager.getLock().releaseUserLock(owner); } } @@ -231,10 +252,10 @@ public void setOwner(String volume, String owner) throws IOException { */ public void setQuota(String volume, long quota) throws IOException { Preconditions.checkNotNull(volume); - metadataManager.writeLock().lock(); + metadataManager.getLock().acquireVolumeLock(volume); try { byte[] dbVolumeKey = metadataManager.getVolumeKey(volume); - byte[] volInfo = metadataManager.get(dbVolumeKey); + byte[] volInfo = metadataManager.getVolumeTable().get(dbVolumeKey); if (volInfo == null) { LOG.debug("volume:{} does not exist", volume); throw new OMException(ResultCodes.FAILED_VOLUME_NOT_FOUND); @@ -253,7 +274,8 @@ public void setQuota(String volume, long quota) throws IOException { .setCreationTime(volumeArgs.getCreationTime()).build(); VolumeInfo newVolumeInfo = newVolumeArgs.getProtobuf(); - metadataManager.put(dbVolumeKey, newVolumeInfo.toByteArray()); + metadataManager.getVolumeTable().put(dbVolumeKey, + newVolumeInfo.toByteArray()); } catch (IOException ex) { if (!(ex instanceof OMException)) { LOG.error("Changing volume quota failed for volume:{} quota:{}", volume, @@ -261,7 +283,7 @@ public void setQuota(String volume, long quota) throws IOException { } throw ex; } finally { - metadataManager.writeLock().unlock(); + metadataManager.getLock().releaseVolumeLock(volume); } } @@ -273,10 +295,10 @@ public void setQuota(String volume, long quota) throws IOException { */ public OmVolumeArgs getVolumeInfo(String volume) throws IOException { Preconditions.checkNotNull(volume); - metadataManager.readLock().lock(); + metadataManager.getLock().acquireVolumeLock(volume); try { byte[] dbVolumeKey = metadataManager.getVolumeKey(volume); - byte[] volInfo = metadataManager.get(dbVolumeKey); + byte[] volInfo = metadataManager.getVolumeTable().get(dbVolumeKey); if (volInfo == null) { LOG.debug("volume:{} does not exist", volume); throw new OMException(ResultCodes.FAILED_VOLUME_NOT_FOUND); @@ -292,7 +314,7 @@ public OmVolumeArgs getVolumeInfo(String volume) throws IOException { } throw ex; } finally { - metadataManager.readLock().unlock(); + metadataManager.getLock().releaseVolumeLock(volume); } } @@ -305,11 +327,19 @@ public OmVolumeArgs getVolumeInfo(String volume) throws IOException { @Override public void deleteVolume(String volume) throws IOException { Preconditions.checkNotNull(volume); - metadataManager.writeLock().lock(); + String owner; + metadataManager.getLock().acquireVolumeLock(volume); + try { + owner = getVolumeInfo(volume).getOwnerName(); + } finally { + metadataManager.getLock().releaseVolumeLock(volume); + } + metadataManager.getLock().acquireUserLock(owner); + metadataManager.getLock().acquireVolumeLock(volume); try { - BatchOperation batch = new BatchOperation(); + byte[] dbVolumeKey = metadataManager.getVolumeKey(volume); - byte[] volInfo = metadataManager.get(dbVolumeKey); + byte[] volInfo = metadataManager.getVolumeTable().get(dbVolumeKey); if (volInfo == null) { LOG.debug("volume:{} does not exist", volume); throw new OMException(ResultCodes.FAILED_VOLUME_NOT_FOUND); @@ -324,16 +354,25 @@ public void deleteVolume(String volume) throws IOException { Preconditions.checkState(volume.equals(volumeInfo.getVolume())); // delete the volume from the owner list // as well as delete the volume entry - delVolumeFromOwnerList(volume, volumeInfo.getOwnerName(), batch); - batch.delete(dbVolumeKey); - metadataManager.writeBatch(batch); - } catch (IOException ex) { + try(WriteBatch batch = new WriteBatch()) { + delVolumeFromOwnerList(volume, volumeInfo.getOwnerName(), batch); + batch.delete(metadataManager.getVolumeTable().getHandle(), + dbVolumeKey); + metadataManager.getStore().write(batch); + } + } catch (RocksDBException| IOException ex) { if (!(ex instanceof OMException)) { LOG.error("Delete volume failed for volume:{}", volume, ex); } - throw ex; + if(ex instanceof RocksDBException) { + throw RocksDBStore.toIOException("Volume creation failed.", + (RocksDBException) ex); + } else { + throw (IOException) ex; + } } finally { - metadataManager.writeLock().unlock(); + metadataManager.getLock().releaseVolumeLock(volume); + metadataManager.getLock().releaseUserLock(owner); } } @@ -349,10 +388,10 @@ public boolean checkVolumeAccess(String volume, OzoneAclInfo userAcl) throws IOException { Preconditions.checkNotNull(volume); Preconditions.checkNotNull(userAcl); - metadataManager.readLock().lock(); + metadataManager.getLock().acquireVolumeLock(volume); try { byte[] dbVolumeKey = metadataManager.getVolumeKey(volume); - byte[] volInfo = metadataManager.get(dbVolumeKey); + byte[] volInfo = metadataManager.getVolumeTable().get(dbVolumeKey); if (volInfo == null) { LOG.debug("volume:{} does not exist", volume); throw new OMException(ResultCodes.FAILED_VOLUME_NOT_FOUND); @@ -369,7 +408,7 @@ public boolean checkVolumeAccess(String volume, OzoneAclInfo userAcl) } throw ex; } finally { - metadataManager.readLock().unlock(); + metadataManager.getLock().releaseVolumeLock(volume); } } @@ -378,13 +417,13 @@ public boolean checkVolumeAccess(String volume, OzoneAclInfo userAcl) */ @Override public List listVolumes(String userName, - String prefix, String startKey, int maxKeys) throws IOException { - metadataManager.readLock().lock(); + String prefix, String startKey, int maxKeys) throws IOException { + metadataManager.getLock().acquireUserLock(userName); try { return metadataManager.listVolumes( userName, prefix, startKey, maxKeys); } finally { - metadataManager.readLock().unlock(); + metadataManager.getLock().releaseUserLock(userName); } } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java index 55cef97ed54..393ac91c1da 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java @@ -113,6 +113,7 @@ public OMException(Throwable cause, OMException.ResultCodes result) { FAILED_METADATA_ERROR, FAILED_INTERNAL_ERROR, OM_NOT_INITIALIZED, - SCM_VERSION_MISMATCH_ERROR + SCM_VERSION_MISMATCH_ERROR, + SCM_IN_CHILL_MODE } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java index 40a88b698a6..06d782b8203 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java @@ -519,12 +519,14 @@ public CommitKeyResponse commitKey(RpcController controller, .setVolumeName(keyArgs.getVolumeName()) .setBucketName(keyArgs.getBucketName()) .setKeyName(keyArgs.getKeyName()) - .setDataSize(keyArgs.getDataSize()) + .setLocationInfoList(keyArgs.getKeyLocationsList().stream() + .map(OmKeyLocationInfo::getFromProtobuf) + .collect(Collectors.toList())) .setType(type) .setFactor(factor) + .setDataSize(keyArgs.getDataSize()) .build(); - int id = request.getClientID(); - impl.commitKey(omKeyArgs, id); + impl.commitKey(omKeyArgs, request.getClientID()); resp.setStatus(Status.OK); } catch (IOException e) { resp.setStatus(exceptionToResponseStatus(e)); @@ -544,8 +546,8 @@ public AllocateBlockResponse allocateBlock(RpcController controller, .setBucketName(keyArgs.getBucketName()) .setKeyName(keyArgs.getKeyName()) .build(); - int id = request.getClientID(); - OmKeyLocationInfo newLocation = impl.allocateBlock(omKeyArgs, id); + OmKeyLocationInfo newLocation = impl.allocateBlock(omKeyArgs, + request.getClientID()); resp.setKeyLocation(newLocation.getProtobuf()); resp.setStatus(Status.OK); } catch (IOException e) { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/Handler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/Handler.java index a66e227d5cc..a9550c2e26f 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/Handler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/Handler.java @@ -18,42 +18,46 @@ package org.apache.hadoop.ozone.web.ozShell; -import org.apache.commons.cli.CommandLine; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.concurrent.Callable; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.cli.GenericParentCommand; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.client.OzoneClient; -import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.OzoneClientException; +import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.rest.OzoneException; -import org.apache.http.client.utils.URIBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_HTTP_SCHEME; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_SCHEME; +import org.apache.http.client.utils.URIBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine.Command; +import picocli.CommandLine.ParentCommand; /** * Common interface for command handling. */ -public abstract class Handler { +@Command(mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public abstract class Handler implements Callable { protected static final Logger LOG = LoggerFactory.getLogger(Handler.class); + protected OzoneClient client; - /** - * Executes the Client command. - * - * @param cmd - CommandLine - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException - */ - protected abstract void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException; + @ParentCommand + private GenericParentCommand parent; + + @Override + public Void call() throws Exception { + throw new UnsupportedOperationException(); + } /** * verifies user provided URI. @@ -148,4 +152,9 @@ private static URI stringToUri(String pathString) throws IOException { throw new IllegalArgumentException(e); } } + + public boolean isVerbose() { + return parent.isVerbose(); + } + } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/Shell.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/Shell.java index 2aec0fc0355..0f3969f32c5 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/Shell.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/Shell.java @@ -18,38 +18,15 @@ package org.apache.hadoop.ozone.web.ozShell; -import org.apache.commons.cli.BasicParser; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.ozone.client.rest.OzoneException; -import org.apache.hadoop.ozone.web.ozShell.bucket.UpdateBucketHandler; -import org.apache.hadoop.ozone.web.ozShell.keys.DeleteKeyHandler; -import org.apache.hadoop.ozone.web.ozShell.keys.GetKeyHandler; -import org.apache.hadoop.ozone.web.ozShell.keys.InfoKeyHandler; -import org.apache.hadoop.ozone.web.ozShell.keys.ListKeyHandler; -import org.apache.hadoop.ozone.web.ozShell.keys.PutKeyHandler; -import org.apache.hadoop.ozone.web.ozShell.volume.CreateVolumeHandler; -import org.apache.hadoop.ozone.web.ozShell.volume.DeleteVolumeHandler; -import org.apache.hadoop.ozone.web.ozShell.volume.InfoVolumeHandler; -import org.apache.hadoop.ozone.web.ozShell.volume.ListVolumeHandler; -import org.apache.hadoop.ozone.web.ozShell.volume.UpdateVolumeHandler; -import org.apache.hadoop.ozone.web.ozShell.bucket.CreateBucketHandler; -import org.apache.hadoop.ozone.web.ozShell.bucket.DeleteBucketHandler; -import org.apache.hadoop.ozone.web.ozShell.bucket.InfoBucketHandler; -import org.apache.hadoop.ozone.web.ozShell.bucket.ListBucketHandler; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.hdds.cli.GenericCli; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.web.ozShell.bucket.BucketCommands; +import org.apache.hadoop.ozone.web.ozShell.keys.KeyCommands; +import org.apache.hadoop.ozone.web.ozShell.volume.VolumeCommands; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.net.URISyntaxException; +import picocli.CommandLine.Command; /** * Ozone user interface commands. @@ -57,46 +34,38 @@ * This class uses dispatch method to make calls * to appropriate handlers that execute the ozone functions. */ -public class Shell extends Configured implements Tool { +@Command(name = "ozone sh", + description = "Shell for Ozone object store", + subcommands = { + VolumeCommands.class, + BucketCommands.class, + KeyCommands.class + }, + versionProvider = HddsVersionProvider.class, + mixinStandardHelpOptions = true) +public class Shell extends GenericCli { + + private static final Logger LOG = LoggerFactory.getLogger(Shell.class); - // General options - public static final int DEFAULT_OZONE_PORT = 50070; - public static final String VERBOSE = "v"; + public static final String OZONE_URI_DESCRIPTION = "Ozone URI could start " + + "with o3:// or http(s):// or without prefix. REST protocol will " + + "be used for http(s), RPC otherwise. URI may contain the host and port " + + "of the SCM server. Both are optional. " + + "If they are not specified it will be identified from " + + "the config files."; - // volume related command line arguments - public static final String RUNAS = "root"; - public static final String USER = "user"; - public static final String OWNER = "owner"; - public static final String QUOTA = "quota"; - public static final String CREATE_VOLUME = "createVolume"; - public static final String UPDATE_VOLUME = "updateVolume"; - public static final String DELETE_VOLUME = "deleteVolume"; - public static final String LIST_VOLUME = "listVolume"; - public static final String INFO_VOLUME = "infoVolume"; + public static final String OZONE_VOLUME_URI_DESCRIPTION = + "URI of the volume.\n" + OZONE_URI_DESCRIPTION; - // bucket related command line arguments - public static final String CREATE_BUCKET = "createBucket"; - public static final String UPDATE_BUCKET = "updateBucket"; - public static final String DELETE_BUCKET = "deleteBucket"; - public static final String LIST_BUCKET = "listBucket"; - public static final String INFO_BUCKET = "infoBucket"; - public static final String ADD_ACLS = "addAcl"; - public static final String REMOVE_ACLS = "removeAcl"; - // TODO : Support versioning and StorageType for buckets + public static final String OZONE_BUCKET_URI_DESCRIPTION = + "URI of the volume/bucket.\n" + OZONE_URI_DESCRIPTION; - //Object related command line arguments - public static final String PUT_KEY = "putKey"; - public static final String GET_KEY = "getKey"; - public static final String INFO_KEY = "infoKey"; - public static final String DELETE_KEY = "deleteKey"; - public static final String LIST_KEY = "listKey"; - public static final String FILE = "file"; + public static final String OZONE_KEY_URI_DESCRIPTION = + "URI of the volume/bucket/key.\n" + OZONE_URI_DESCRIPTION; - // Listing related command line arguments - public static final String LIST_LENGTH = "length"; - public static final String START = "start"; - public static final String PREFIX = "prefix"; + // General options + public static final int DEFAULT_OZONE_PORT = 50070; /** * Main for the ozShell Command handling. @@ -105,311 +74,7 @@ * @throws Exception */ public static void main(String[] argv) throws Exception { - Shell shell = new Shell(); - Configuration conf = new OzoneConfiguration(); - conf.setQuietMode(false); - shell.setConf(conf); - int res = 0; - try { - res = ToolRunner.run(shell, argv); - } catch (Exception ex) { - System.err.println("ERROR: " + ex.getMessage()); - System.exit(1); - } - System.exit(res); - } - - /** - * Execute the command with the given arguments. - * - * @param args command specific arguments. - * - * @return exit code. - * - * @throws Exception - */ - @Override - public int run(String[] args) throws Exception { - Options opts = getOpts(); - CommandLine cmd = parseArgs(args, opts); - return dispatch(cmd, opts); - } - - /** - * returns the Command Line Options. - * - * @return Options - */ - private Options getOpts() { - Options opts = new Options(); - addVolumeCommands(opts); - addBucketCommands(opts); - addKeyCommands(opts); - addListingCommands(opts); - return opts; - } - - /** - * This function parses all command line arguments - * and returns the appropriate values. - * - * @param argv - Argv from main - * - * @return CommandLine - */ - private CommandLine parseArgs(String[] argv, Options opts) - throws org.apache.commons.cli.ParseException { - try { - BasicParser parser = new BasicParser(); - return parser.parse(opts, argv); - } catch (ParseException ex) { - System.out.printf(ex.getMessage()); - } - - return null; - } - - - /** - * All volume related commands are added in this function for the command - * parser. - * - * @param options - Command Options class. - */ - private void addVolumeCommands(Options options) { - Option verbose = new Option(VERBOSE, false, "verbose information output."); - options.addOption(verbose); - - Option runas = new Option(RUNAS, false, "Run the command as \"hdfs\" user"); - options.addOption(runas); - - Option userName = new Option(USER, true, - "Name of the user in volume management " + - "functions"); - options.addOption(userName); - - Option quota = new Option(QUOTA, true, "Quota for the volume. E.g. 10TB"); - options.addOption(quota); - - - Option createVolume = new Option(CREATE_VOLUME, true, "creates a volume" + - "for the specified user.\n \t For example : hdfs o3 -createVolume " + - " -root -user \n"); - options.addOption(createVolume); - - Option deleteVolume = new Option(DELETE_VOLUME, true, "deletes a volume" + - "if it is empty.\n \t For example : ozone oz -deleteVolume " + - " -root \n"); - options.addOption(deleteVolume); - - Option listVolume = - new Option(LIST_VOLUME, true, "List the volumes of a given user.\n" + - "For example : ozone oz -listVolume " + - "-user -root or ozone oz " + - "-listVolume"); - options.addOption(listVolume); - - Option updateVolume = - new Option(UPDATE_VOLUME, true, "updates an existing volume.\n" + - "\t For example : ozone oz " + - "-updateVolume -quota " + - "100TB\n"); - options.addOption(updateVolume); - - Option infoVolume = new Option(INFO_VOLUME, true, - "returns information about a specific " + - "volume."); - options.addOption(infoVolume); - } - - /** - * All bucket related commands for ozone. - * - * @param opts - Options - */ - private void addBucketCommands(Options opts) { - Option createBucket = new Option(CREATE_BUCKET, true, - "creates a bucket in a given volume." + - "For example: ozone oz -createBucket "); - opts.addOption(createBucket); - - Option infoBucket = - new Option(INFO_BUCKET, true, "returns information about a bucket."); - opts.addOption(infoBucket); - - Option deleteBucket = - new Option(DELETE_BUCKET, true, "deletes an empty bucket."); - opts.addOption(deleteBucket); - - Option listBucket = - new Option(LIST_BUCKET, true, "lists the buckets in a volume."); - opts.addOption(listBucket); - - Option updateBucket = - new Option(UPDATE_BUCKET, true, "allows changing bucket attributes.\n" + - " For example: ozone oz -updateBucket " + - "-addAcl user:frodo:rw"); - opts.addOption(updateBucket); - - Option addAcl = - new Option(ADD_ACLS, true, "allows user to add acls to a bucket."); - opts.addOption(addAcl); - - Option removeAcl = - new Option(REMOVE_ACLS, true, "allows user to remove acls from a " + - "bucket."); - opts.addOption(removeAcl); - } - - /** - * All key commands. - * - * @param opts - options - */ - private void addKeyCommands(Options opts) { - Option putKey = - new Option(PUT_KEY, true, "creates or overwrites an existing key"); - opts.addOption(putKey); - - Option deleteKey = - new Option(DELETE_KEY, true, "deletes an existing key"); - opts.addOption(deleteKey); - - Option infoKey = - new Option(INFO_KEY, true, "returns information about an existing key"); - opts.addOption(infoKey); - - Option listKey = - new Option(LIST_KEY, true, "list all keys in a given bucket"); - opts.addOption(listKey); - - Option getKey = - new Option(GET_KEY, true, "Gets a specific key from ozone server."); - opts.addOption(getKey); - - Option fileArgument = - new Option(FILE, true, "Data file path"); - opts.addOption(fileArgument); - - } - - /** - * Sub commands for list command. - * @param opts - */ - private void addListingCommands(Options opts) { - Option maxKeys = new Option(LIST_LENGTH, true, - "Specify the max length of listing result."); - opts.addOption(maxKeys); - - Option prevKey = new Option(START, true, - "Specify the start key where to start listing from."); - opts.addOption(prevKey); - - Option prefix = new Option(PREFIX, true, - "Specify the prefix to filter the listing result."); - opts.addOption(prefix); - } - - /** - * Dispatches calls to the right command Handler classes. - * - * @param cmd - CommandLine - * - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException - */ - private int dispatch(CommandLine cmd, Options opts) - throws IOException, OzoneException, URISyntaxException { - Handler handler = null; - final int eightyColumn = 80; - - try { - - // volume functions - if (cmd.hasOption(Shell.CREATE_VOLUME)) { - handler = new CreateVolumeHandler(); - } - - if (cmd.hasOption(Shell.DELETE_VOLUME)) { - handler = new DeleteVolumeHandler(); - } - - if (cmd.hasOption(Shell.LIST_VOLUME)) { - handler = new ListVolumeHandler(); - } - - if (cmd.hasOption(Shell.UPDATE_VOLUME)) { - handler = new UpdateVolumeHandler(); - } - - if (cmd.hasOption(Shell.INFO_VOLUME)) { - handler = new InfoVolumeHandler(); - } - - // bucket functions - if (cmd.hasOption(Shell.CREATE_BUCKET)) { - handler = new CreateBucketHandler(); - } - - if (cmd.hasOption(Shell.DELETE_BUCKET)) { - handler = new DeleteBucketHandler(); - } - - if (cmd.hasOption(Shell.INFO_BUCKET)) { - handler = new InfoBucketHandler(); - } - - if (cmd.hasOption(Shell.LIST_BUCKET)) { - handler = new ListBucketHandler(); - } - - if(cmd.hasOption(Shell.UPDATE_BUCKET)){ - handler = new UpdateBucketHandler(); - } - - //Key Functions - - if(cmd.hasOption(Shell.PUT_KEY)) { - handler = new PutKeyHandler(); - } - - if(cmd.hasOption(Shell.DELETE_KEY)) { - handler = new DeleteKeyHandler(); - } - - if(cmd.hasOption(Shell.INFO_KEY)) { - handler = new InfoKeyHandler(); - } - - if(cmd.hasOption(Shell.LIST_KEY)) { - handler = new ListKeyHandler(); - } - - if(cmd.hasOption(Shell.GET_KEY)) { - handler = new GetKeyHandler(); - } - - if (handler != null) { - handler.execute(cmd); - return 0; - } else { - HelpFormatter helpFormatter = new HelpFormatter(); - helpFormatter.printHelp(eightyColumn, "ozone oz -command uri [args]", - "Ozone Commands", - opts, "Please correct your command and try again."); - return 1; - } - } catch (IOException | URISyntaxException ex) { - System.err.printf("Command Failed : %s%n", ex.getMessage()); - } catch (OzoneException ex) { - System.err.printf("Command Failed : %s%n", ex.toJsonString()); - LOG.debug("Command Failed.", ex); - } catch (IllegalArgumentException ex) { - System.err.printf("Illegal argument: %s%n", ex.getMessage()); - } - return 1; + new Shell().run(argv); } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/BucketCommands.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/BucketCommands.java new file mode 100644 index 00000000000..870f4d71986 --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/BucketCommands.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.web.ozShell.bucket; + +import java.util.concurrent.Callable; + +import org.apache.hadoop.hdds.cli.GenericParentCommand; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.cli.MissingSubcommandException; +import org.apache.hadoop.ozone.web.ozShell.Shell; + +import picocli.CommandLine.Command; +import picocli.CommandLine.ParentCommand; + +/** + * Subcommands for the bucket related operations. + */ +@Command(name = "bucket", + description = "Bucket specific operations", + subcommands = { + InfoBucketHandler.class, + ListBucketHandler.class, + CreateBucketHandler.class, + UpdateBucketHandler.class, + DeleteBucketHandler.class + }, + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class BucketCommands implements GenericParentCommand, Callable { + + @ParentCommand + private Shell shell; + + @Override + public Void call() throws Exception { + throw new MissingSubcommandException( + this.shell.getCmd().getSubcommands().get("bucket").getUsageMessage()); + } + + @Override + public boolean isVerbose() { + return shell.isVerbose(); + } +} diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/CreateBucketHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/CreateBucketHandler.java index 0788f9e20a3..bd8db6007b0 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/CreateBucketHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/CreateBucketHandler.java @@ -17,59 +17,48 @@ */ package org.apache.hadoop.ozone.web.ozShell.bucket; -import org.apache.commons.cli.CommandLine; +import java.net.URI; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneClientUtils; import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; import org.apache.hadoop.ozone.web.utils.JsonUtils; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Path; -import java.nio.file.Paths; +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; /** * create bucket handler. */ +@Command(name = "create", + description = "creates a bucket in a given volume") public class CreateBucketHandler extends Handler { - private String volumeName; - private String bucketName; + @Parameters(arity = "1..1", description = Shell.OZONE_BUCKET_URI_DESCRIPTION) + private String uri; /** * Executes create bucket. - * - * @param cmd - CommandLine - * - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.CREATE_BUCKET)) { - throw new OzoneClientException( - "Incorrect call : createBucket is missing"); - } + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.CREATE_BUCKET); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 2) { throw new OzoneClientException( "volume and bucket name required in createBucket"); } - volumeName = path.getName(0).toString(); - bucketName = path.getName(1).toString(); + String volumeName = path.getName(0).toString(); + String bucketName = path.getName(1).toString(); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume Name : %s%n", volumeName); System.out.printf("Bucket Name : %s%n", bucketName); } @@ -77,10 +66,11 @@ protected void execute(CommandLine cmd) OzoneVolume vol = client.getObjectStore().getVolume(volumeName); vol.createBucket(bucketName); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { OzoneBucket bucket = vol.getBucket(bucketName); System.out.printf(JsonUtils.toJsonStringWithDefaultPrettyPrinter( JsonUtils.toJsonString(OzoneClientUtils.asBucketInfo(bucket)))); } + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/DeleteBucketHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/DeleteBucketHandler.java index 5fc443e632a..79a0c8ef26c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/DeleteBucketHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/DeleteBucketHandler.java @@ -18,60 +18,51 @@ package org.apache.hadoop.ozone.web.ozShell.bucket; -import org.apache.commons.cli.CommandLine; -import org.apache.hadoop.ozone.client.OzoneVolume; +import java.net.URI; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; +import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Path; -import java.nio.file.Paths; +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; /** * Delete bucket Handler. */ +@Command(name = "delete", + description = "deletes an empty bucket") public class DeleteBucketHandler extends Handler { - private String volumeName; - private String bucketName; + + @Parameters(arity = "1..1", description = Shell.OZONE_BUCKET_URI_DESCRIPTION) + private String uri; /** * Executes the Client Calls. - * - * @param cmd - CommandLine - * - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.DELETE_BUCKET)) { - throw new OzoneClientException( - "Incorrect call : deleteBucket is missing"); - } + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.DELETE_BUCKET); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 2) { throw new OzoneClientException( "volume and bucket name required in delete Bucket"); } - volumeName = path.getName(0).toString(); - bucketName = path.getName(1).toString(); + String volumeName = path.getName(0).toString(); + String bucketName = path.getName(1).toString(); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume Name : %s%n", volumeName); System.out.printf("Bucket Name : %s%n", bucketName); } OzoneVolume vol = client.getObjectStore().getVolume(volumeName); vol.deleteBucket(bucketName); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/InfoBucketHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/InfoBucketHandler.java index b3ca4e5565c..4122b729857 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/InfoBucketHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/InfoBucketHandler.java @@ -17,49 +17,38 @@ */ package org.apache.hadoop.ozone.web.ozShell.bucket; +import java.net.URI; +import java.nio.file.Path; +import java.nio.file.Paths; -import org.apache.commons.cli.CommandLine; import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneClientUtils; import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; import org.apache.hadoop.ozone.web.utils.JsonUtils; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Path; -import java.nio.file.Paths; +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; /** * Executes Info bucket. */ +@Command(name = "info", + description = "returns information about a bucket") public class InfoBucketHandler extends Handler { - private String volumeName; - private String bucketName; + + @Parameters(arity = "1..1", description = Shell.OZONE_BUCKET_URI_DESCRIPTION) + private String uri; /** * Executes the Client Calls. - * - * @param cmd - CommandLine - * - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.INFO_BUCKET)) { - throw new OzoneClientException( - "Incorrect call : infoBucket is missing"); - } - - String ozoneURIString = cmd.getOptionValue(Shell.INFO_BUCKET); - URI ozoneURI = verifyURI(ozoneURIString); + public Void call() throws Exception { + String volumeName, bucketName; + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 2) { @@ -70,7 +59,7 @@ protected void execute(CommandLine cmd) volumeName = path.getName(0).toString(); bucketName = path.getName(1).toString(); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume Name : %s%n", volumeName); System.out.printf("Bucket Name : %s%n", bucketName); } @@ -80,6 +69,7 @@ protected void execute(CommandLine cmd) System.out.printf("%s%n", JsonUtils.toJsonStringWithDefaultPrettyPrinter( JsonUtils.toJsonString(OzoneClientUtils.asBucketInfo(bucket)))); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/ListBucketHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/ListBucketHandler.java index 655022ad1ea..1d97bf5d34d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/ListBucketHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/ListBucketHandler.java @@ -18,95 +18,92 @@ package org.apache.hadoop.ozone.web.ozShell.bucket; -import org.apache.commons.cli.CommandLine; +import java.net.URI; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneClientUtils; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.rest.response.BucketInfo; -import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; import org.apache.hadoop.ozone.web.utils.JsonUtils; -import org.apache.hadoop.ozone.web.utils.OzoneUtils; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; +import picocli.CommandLine.Command; +import picocli.CommandLine.Help.Visibility; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; /** * Executes List Bucket. */ +@Command(name = "list", + aliases = "ls", + description = "lists the buckets in a volume.") public class ListBucketHandler extends Handler { - private String volumeName; + @Parameters(arity = "1..1", description = Shell.OZONE_VOLUME_URI_DESCRIPTION) + private String uri; + + @Option(names = {"--length", "-l"}, + description = "Limit of the max results", + defaultValue = "100", + showDefaultValue = Visibility.ALWAYS) + private int maxBuckets; + + @Option(names = {"--start", "-s"}, + description = "The first bucket to start the listing") + private String startBucket; + + @Option(names = {"--prefix", "-p"}, + description = "Prefix to filter the buckets") + private String prefix; /** * Executes the Client Calls. - * - * @param cmd - CommandLine - * - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.LIST_BUCKET)) { - throw new OzoneClientException( - "Incorrect call : listBucket is missing"); - } + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.LIST_BUCKET); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 1) { throw new OzoneClientException("volume is required in listBucket"); } - volumeName = path.getName(0).toString(); - - if (cmd.hasOption(Shell.VERBOSE)) { - System.out.printf("Volume Name : %s%n", volumeName); + if (maxBuckets < 1) { + throw new IllegalArgumentException( + "the length should be a positive number"); } - int maxBuckets = Integer.MAX_VALUE; - if (cmd.hasOption(Shell.LIST_LENGTH)) { - String length = cmd.getOptionValue(Shell.LIST_LENGTH); - OzoneUtils.verifyMaxKeyLength(length); - maxBuckets = Integer.parseInt(length); - } + String volumeName = path.getName(0).toString(); - String startBucket = null; - if (cmd.hasOption(Shell.START)) { - startBucket = cmd.getOptionValue(Shell.START); + if (isVerbose()) { + System.out.printf("Volume Name : %s%n", volumeName); } - String prefix = null; - if (cmd.hasOption(Shell.PREFIX)) { - prefix = cmd.getOptionValue(Shell.PREFIX); - } OzoneVolume vol = client.getObjectStore().getVolume(volumeName); Iterator bucketIterator = vol.listBuckets(prefix, startBucket); List bucketList = new ArrayList<>(); while (maxBuckets > 0 && bucketIterator.hasNext()) { - BucketInfo bucketInfo = OzoneClientUtils.asBucketInfo(bucketIterator.next()); + BucketInfo bucketInfo = + OzoneClientUtils.asBucketInfo(bucketIterator.next()); bucketList.add(bucketInfo); maxBuckets -= 1; } - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Found : %d buckets for volume : %s ", bucketList.size(), volumeName); } System.out.println(JsonUtils.toJsonStringWithDefaultPrettyPrinter( JsonUtils.toJsonString(bucketList))); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/UpdateBucketHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/UpdateBucketHandler.java index aff0e19c5aa..3562dc04549 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/UpdateBucketHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/bucket/UpdateBucketHandler.java @@ -17,43 +17,50 @@ */ package org.apache.hadoop.ozone.web.ozShell.bucket; -import org.apache.commons.cli.CommandLine; +import java.net.URI; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + import org.apache.hadoop.ozone.OzoneAcl; import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneClientUtils; import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; import org.apache.hadoop.ozone.web.utils.JsonUtils; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; /** * Allows users to add and remove acls and from a bucket. */ +@Command(name = "update", + description = "allows changing bucket attributes") public class UpdateBucketHandler extends Handler { - private String volumeName; - private String bucketName; + + @Parameters(arity = "1..1", description = Shell.OZONE_BUCKET_URI_DESCRIPTION) + private String uri; + + @Option(names = {"--addAcl"}, + description = "Comma separated list of acl rules to add (eg. " + + "user:bilbo:rw)") + private String addAcl; + + @Option(names = {"--removeAcl"}, + description = "Comma separated list of acl rules to remove (eg. " + + "user:bilbo:rw)") + private String removeAcl; @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.UPDATE_BUCKET)) { - throw new OzoneClientException( - "Incorrect call : updateBucket is missing"); - } + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.UPDATE_BUCKET); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 2) { @@ -61,28 +68,26 @@ protected void execute(CommandLine cmd) "volume and bucket name required in update bucket"); } - volumeName = path.getName(0).toString(); - bucketName = path.getName(1).toString(); + String volumeName = path.getName(0).toString(); + String bucketName = path.getName(1).toString(); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume Name : %s%n", volumeName); System.out.printf("Bucket Name : %s%n", bucketName); } OzoneVolume vol = client.getObjectStore().getVolume(volumeName); OzoneBucket bucket = vol.getBucket(bucketName); - if (cmd.hasOption(Shell.ADD_ACLS)) { - String aclString = cmd.getOptionValue(Shell.ADD_ACLS); - String[] aclArray = aclString.split(","); + if (addAcl != null) { + String[] aclArray = addAcl.split(","); List aclList = Arrays.stream(aclArray).map(acl -> OzoneAcl.parseAcl(acl)) .collect(Collectors.toList()); bucket.addAcls(aclList); } - if (cmd.hasOption(Shell.REMOVE_ACLS)) { - String aclString = cmd.getOptionValue(Shell.REMOVE_ACLS); - String[] aclArray = aclString.split(","); + if (removeAcl != null) { + String[] aclArray = removeAcl.split(","); List aclList = Arrays.stream(aclArray).map(acl -> OzoneAcl.parseAcl(acl)) .collect(Collectors.toList()); @@ -91,5 +96,6 @@ protected void execute(CommandLine cmd) System.out.printf("%s%n", JsonUtils.toJsonStringWithDefaultPrettyPrinter( JsonUtils.toJsonString(OzoneClientUtils.asBucketInfo(bucket)))); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/DeleteKeyHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/DeleteKeyHandler.java index fccabe7188a..dff6e6719a1 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/DeleteKeyHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/DeleteKeyHandler.java @@ -18,58 +18,47 @@ package org.apache.hadoop.ozone.web.ozShell.keys; -import org.apache.commons.cli.CommandLine; +import java.net.URI; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.apache.hadoop.ozone.client.OzoneBucket; -import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; +import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Path; -import java.nio.file.Paths; +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; /** * Executes Delete Key. */ +@Command(name = "delete", + description = "deletes an existing key") public class DeleteKeyHandler extends Handler { - private String volumeName; - private String bucketName; - private String keyName; + + @Parameters(arity = "1..1", description = Shell.OZONE_KEY_URI_DESCRIPTION) + private String uri; /** * Executes the Client Calls. - * - * @param cmd - CommandLine - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.DELETE_KEY)) { - throw new OzoneClientException( - "Incorrect call : deleteKey is missing"); - } + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.DELETE_KEY); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 3) { throw new OzoneClientException( "volume/bucket/key name required in deleteKey"); } - volumeName = path.getName(0).toString(); - bucketName = path.getName(1).toString(); - keyName = path.getName(2).toString(); - + String volumeName = path.getName(0).toString(); + String bucketName = path.getName(1).toString(); + String keyName = path.getName(2).toString(); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume Name : %s%n", volumeName); System.out.printf("Bucket Name : %s%n", bucketName); System.out.printf("Key Name : %s%n", keyName); @@ -78,5 +67,6 @@ protected void execute(CommandLine cmd) OzoneVolume vol = client.getObjectStore().getVolume(volumeName); OzoneBucket bucket = vol.getBucket(bucketName); bucket.deleteKey(keyName); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/GetKeyHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/GetKeyHandler.java index 34620b4934a..583d22b6e67 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/GetKeyHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/GetKeyHandler.java @@ -18,91 +18,77 @@ package org.apache.hadoop.ozone.web.ozShell.keys; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.codec.digest.DigestUtils; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.net.URI; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.io.OzoneInputStream; -import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Path; -import java.nio.file.Paths; - -import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_CHUNK_SIZE_DEFAULT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_CHUNK_SIZE_KEY; +import org.apache.commons.codec.digest.DigestUtils; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY; +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; /** * Gets an existing key. */ +@Command(name = "get", + description = "Gets a specific key from ozone server") public class GetKeyHandler extends Handler { - private String volumeName; - private String bucketName; - private String keyName; + @Parameters(index = "0", arity = "1..1", description = + Shell.OZONE_KEY_URI_DESCRIPTION) + private String uri; + + @Parameters(index = "1", arity = "1..1", + description = "File path to download the key to") + private String fileName; /** * Executes the Client Calls. - * - * @param cmd - CommandLine - * - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.GET_KEY)) { - throw new OzoneClientException("Incorrect call : getKey is missing"); - } - - if (!cmd.hasOption(Shell.FILE)) { - throw new OzoneClientException( - "get key needs a file path to download to"); - } + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.GET_KEY); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 3) { throw new OzoneClientException( "volume/bucket/key name required in putKey"); } - volumeName = path.getName(0).toString(); - bucketName = path.getName(1).toString(); - keyName = path.getName(2).toString(); + String volumeName = path.getName(0).toString(); + String bucketName = path.getName(1).toString(); + String keyName = path.getName(2).toString(); - - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume Name : %s%n", volumeName); System.out.printf("Bucket Name : %s%n", bucketName); System.out.printf("Key Name : %s%n", keyName); } - - String fileName = cmd.getOptionValue(Shell.FILE); Path dataFilePath = Paths.get(fileName); File dataFile = new File(fileName); + if (dataFile.exists() && dataFile.isDirectory()) { + dataFile = new File(fileName, keyName); + } if (dataFile.exists()) { - throw new OzoneClientException(fileName + - "exists. Download will overwrite an " + - "existing file. Aborting."); + throw new OzoneClientException( + fileName + "exists. Download will overwrite an " + + "existing file. Aborting."); } OzoneVolume vol = client.getObjectStore().getVolume(volumeName); @@ -117,12 +103,12 @@ protected void execute(CommandLine cmd) throw new OzoneClientException( "Can not access the file \"" + fileName + "\""); } - if(cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { FileInputStream stream = new FileInputStream(dataFile); String hash = DigestUtils.md5Hex(stream); System.out.printf("Downloaded file hash : %s%n", hash); stream.close(); } - + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/InfoKeyHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/InfoKeyHandler.java index 3fcdda99369..6ae9b6f9e32 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/InfoKeyHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/InfoKeyHandler.java @@ -18,60 +18,55 @@ package org.apache.hadoop.ozone.web.ozShell.keys; -import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; import java.nio.file.Path; import java.nio.file.Paths; -import org.apache.commons.cli.CommandLine; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneClientUtils; -import org.apache.hadoop.ozone.client.OzoneBucket; -import org.apache.hadoop.ozone.client.OzoneKey; +import org.apache.hadoop.ozone.client.OzoneKeyDetails; import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; import org.apache.hadoop.ozone.web.utils.JsonUtils; +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; + /** * Executes Info Object. */ +@Command(name = "info", + description = "returns information about an existing key") public class InfoKeyHandler extends Handler { - private String volumeName; - private String bucketName; - private String keyName; + @Parameters(arity = "1..1", description = Shell.OZONE_KEY_URI_DESCRIPTION) + private String uri; /** * Executes the Client Calls. - * - * @param cmd - CommandLine - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.INFO_KEY)) { - throw new OzoneClientException("Incorrect call : infoKey is missing"); - } - - String ozoneURIString = cmd.getOptionValue(Shell.INFO_KEY); - URI ozoneURI = verifyURI(ozoneURIString); + public Void call() throws Exception { + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 3) { throw new OzoneClientException( "volume/bucket/key name required in infoKey"); } - volumeName = path.getName(0).toString(); - bucketName = path.getName(1).toString(); - keyName = path.getName(2).toString(); + String volumeName = path.getName(0).toString(); + String bucketName = path.getName(1).toString(); + + String searchString = volumeName + OzoneConsts.OZONE_URI_DELIMITER + + bucketName + OzoneConsts.OZONE_URI_DELIMITER; + String keyName = + uri.substring(uri.indexOf(searchString) + + searchString.length()); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume Name : %s%n", volumeName); System.out.printf("Bucket Name : %s%n", bucketName); System.out.printf("Key Name : %s%n", keyName); @@ -79,9 +74,10 @@ protected void execute(CommandLine cmd) OzoneVolume vol = client.getObjectStore().getVolume(volumeName); OzoneBucket bucket = vol.getBucket(bucketName); - OzoneKey key = bucket.getKey(keyName); + OzoneKeyDetails key = bucket.getKey(keyName); System.out.printf("%s%n", JsonUtils.toJsonStringWithDefaultPrettyPrinter( - JsonUtils.toJsonString(OzoneClientUtils.asKeyInfo(key)))); + JsonUtils.toJsonString(OzoneClientUtils.asKeyInfoDetails(key)))); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/KeyCommands.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/KeyCommands.java new file mode 100644 index 00000000000..d0346861e73 --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/KeyCommands.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.web.ozShell.keys; + +import java.util.concurrent.Callable; + +import org.apache.hadoop.hdds.cli.GenericParentCommand; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.cli.MissingSubcommandException; +import org.apache.hadoop.ozone.web.ozShell.Shell; + +import picocli.CommandLine.Command; +import picocli.CommandLine.ParentCommand; + +/** + * Subcommand to group key related operations. + */ +@Command(name = "key", + description = "Key specific operations", + subcommands = { + InfoKeyHandler.class, + ListKeyHandler.class, + GetKeyHandler.class, + PutKeyHandler.class, + DeleteKeyHandler.class + }, + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class KeyCommands implements GenericParentCommand, Callable { + + @ParentCommand + private Shell shell; + + @Override + public Void call() throws Exception { + throw new MissingSubcommandException( + this.shell.getCmd().getSubcommands().get("key").getUsageMessage()); + } + + @Override + public boolean isVerbose() { + return shell.isVerbose(); + } +} diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/ListKeyHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/ListKeyHandler.java index 6e266fd3bee..0a710ac945a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/ListKeyHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/ListKeyHandler.java @@ -18,78 +18,73 @@ package org.apache.hadoop.ozone.web.ozShell.keys; -import org.apache.commons.cli.CommandLine; -import org.apache.hadoop.ozone.client.*; -import org.apache.hadoop.ozone.client.rest.response.KeyInfo; -import org.apache.hadoop.ozone.client.rest.OzoneException; -import org.apache.hadoop.ozone.web.ozShell.Handler; -import org.apache.hadoop.ozone.web.ozShell.Shell; -import org.apache.hadoop.ozone.web.utils.JsonUtils; -import org.apache.hadoop.ozone.web.utils.OzoneUtils; - -import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClientException; +import org.apache.hadoop.ozone.client.OzoneClientUtils; +import org.apache.hadoop.ozone.client.OzoneKey; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.client.rest.response.KeyInfo; +import org.apache.hadoop.ozone.web.ozShell.Handler; +import org.apache.hadoop.ozone.web.ozShell.Shell; +import org.apache.hadoop.ozone.web.utils.JsonUtils; + +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; + /** * Executes List Keys. */ +@Command(name = "list", + aliases = "ls", + description = "list all keys in a given bucket") public class ListKeyHandler extends Handler { - private String volumeName; - private String bucketName; - /** - * Executes the Client Calls. - * - * @param cmd - CommandLine - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException - */ - @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { + @Parameters(arity = "1..1", description = Shell.OZONE_BUCKET_URI_DESCRIPTION) + private String uri; - if (!cmd.hasOption(Shell.LIST_KEY)) { - throw new OzoneClientException( - "Incorrect call : listKey is missing"); - } + @Option(names = {"--length", "-l"}, + description = "Limit of the max results", + defaultValue = "100") + private int maxKeys; - int maxKeys = Integer.MAX_VALUE; - if (cmd.hasOption(Shell.LIST_LENGTH)) { - String length = cmd.getOptionValue(Shell.LIST_LENGTH); - OzoneUtils.verifyMaxKeyLength(length); - maxKeys = Integer.parseInt(length); - } + @Option(names = {"--start", "-s"}, + description = "The first key to start the listing") + private String startKey; - String startKey = null; - if (cmd.hasOption(Shell.START)) { - startKey = cmd.getOptionValue(Shell.START); - } + @Option(names = {"--prefix", "-p"}, + description = "Prefix to filter the key") + private String prefix; - String prefix = null; - if (cmd.hasOption(Shell.PREFIX)) { - prefix = cmd.getOptionValue(Shell.PREFIX); - } + /** + * Executes the Client Calls. + */ + @Override + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.LIST_KEY); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 2) { throw new OzoneClientException( "volume/bucket is required in listKey"); } - volumeName = path.getName(0).toString(); - bucketName = path.getName(1).toString(); + if (maxKeys < 1) { + throw new IllegalArgumentException( + "the length should be a positive number"); + } + String volumeName = path.getName(0).toString(); + String bucketName = path.getName(1).toString(); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume Name : %s%n", volumeName); System.out.printf("bucket Name : %s%n", bucketName); } @@ -105,12 +100,13 @@ protected void execute(CommandLine cmd) maxKeys -= 1; } - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Found : %d keys for bucket %s in volume : %s ", keyInfos.size(), bucketName, volumeName); } System.out.println(JsonUtils.toJsonStringWithDefaultPrettyPrinter( JsonUtils.toJsonString(keyInfos))); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/PutKeyHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/PutKeyHandler.java index ed8cc8822dc..bbd32352200 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/PutKeyHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/keys/PutKeyHandler.java @@ -18,84 +18,79 @@ package org.apache.hadoop.ozone.web.ozShell.keys; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.codec.digest.DigestUtils; +import java.io.File; +import java.io.FileInputStream; +import java.net.URI; +import java.nio.file.Path; +import java.nio.file.Paths; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.client.ReplicationFactor; +import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.hdds.client.ReplicationFactor; -import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; -import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Path; -import java.nio.file.Paths; - +import org.apache.commons.codec.digest.DigestUtils; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION_TYPE; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_REPLICATION_TYPE_DEFAULT; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; /** * Puts a file into an ozone bucket. */ +@Command(name = "put", + description = "creates or overwrites an existing key") public class PutKeyHandler extends Handler { - private String volumeName; - private String bucketName; - private String keyName; + @Parameters(index = "0", arity = "1..1", description = + Shell.OZONE_KEY_URI_DESCRIPTION) + private String uri; + + @Parameters(index = "1", arity = "1..1", description = "File to upload") + private String fileName; + + @Option(names = {"-r", "--replication"}, + description = "Replication factor of the new key. (use ONE or THREE) " + + "Default is specified in the cluster-wide config.") + private ReplicationFactor replicationFactor; /** * Executes the Client Calls. - * - * @param cmd - CommandLine - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.PUT_KEY)) { - throw new OzoneClientException("Incorrect call : putKey is missing"); - } - - if (!cmd.hasOption(Shell.FILE)) { - throw new OzoneClientException("put key needs a file to put"); - } + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.PUT_KEY); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); Path path = Paths.get(ozoneURI.getPath()); if (path.getNameCount() < 3) { throw new OzoneClientException( "volume/bucket/key name required in putKey"); } - volumeName = path.getName(0).toString(); - bucketName = path.getName(1).toString(); - keyName = path.getName(2).toString(); - + String volumeName = path.getName(0).toString(); + String bucketName = path.getName(1).toString(); + String keyName = path.getName(2).toString(); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume Name : %s%n", volumeName); System.out.printf("Bucket Name : %s%n", bucketName); System.out.printf("Key Name : %s%n", keyName); } - String fileName = cmd.getOptionValue(Shell.FILE); File dataFile = new File(fileName); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { FileInputStream stream = new FileInputStream(dataFile); String hash = DigestUtils.md5Hex(stream); System.out.printf("File Hash : %s%n", hash); @@ -103,11 +98,13 @@ protected void execute(CommandLine cmd) } Configuration conf = new OzoneConfiguration(); - ReplicationFactor replicationFactor = ReplicationFactor.valueOf( - conf.getInt(OZONE_REPLICATION, ReplicationFactor.THREE.getValue())); - ReplicationType replicationType = ReplicationType.valueOf( - conf.get(OZONE_REPLICATION_TYPE, ReplicationType.RATIS.toString())); + if (replicationFactor == null) { + replicationFactor = ReplicationFactor.valueOf( + conf.getInt(OZONE_REPLICATION, OZONE_REPLICATION_DEFAULT)); + } + ReplicationType replicationType = ReplicationType.valueOf( + conf.get(OZONE_REPLICATION_TYPE, OZONE_REPLICATION_TYPE_DEFAULT)); OzoneVolume vol = client.getObjectStore().getVolume(volumeName); OzoneBucket bucket = vol.getBucket(bucketName); OzoneOutputStream outputStream = bucket @@ -118,6 +115,7 @@ protected void execute(CommandLine cmd) conf.getInt(OZONE_SCM_CHUNK_SIZE_KEY, OZONE_SCM_CHUNK_SIZE_DEFAULT)); outputStream.close(); fileInputStream.close(); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/CreateVolumeHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/CreateVolumeHandler.java index 74fdbb0a5dd..26976398d11 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/CreateVolumeHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/CreateVolumeHandler.java @@ -18,77 +18,72 @@ package org.apache.hadoop.ozone.web.ozShell.volume; -import org.apache.commons.cli.CommandLine; +import java.net.URI; + +import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneClientUtils; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.VolumeArgs; -import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; import org.apache.hadoop.ozone.web.utils.JsonUtils; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; /** * Executes the create volume call for the shell. */ +@Command(name = "create", + description = "Creates a volume for the specified user") public class CreateVolumeHandler extends Handler { - private String rootName; + @Parameters(arity = "1..1", description = Shell.OZONE_VOLUME_URI_DESCRIPTION) + private String uri; + + @Option(names = {"--user", "-u"}, + description = "Owner of of the volume", required = + true) private String userName; - private String volumeName; + + @Option(names = {"--quota", "-q"}, + description = + "Quota of the newly created volume (eg. 1G)") private String quota; + @Option(names = {"--root"}, + description = "Development flag to execute the " + + "command as the admin (hdfs) user.") + private boolean root; + /** * Executes the Create Volume. - * - * @param cmd - CommandLine - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.CREATE_VOLUME)) { - throw new OzoneClientException( - "Incorrect call : createVolume is missing"); - } + public Void call() throws Exception { + + URI ozoneURI = verifyURI(uri); - String ozoneURIString = cmd.getOptionValue(Shell.CREATE_VOLUME); - URI ozoneURI = verifyURI(ozoneURIString); - if (ozoneURI.getPath().isEmpty()) { + // we need to skip the slash in the URI path + // getPath returns /volumeName needs to remove the initial slash. + String volumeName = ozoneURI.getPath().replaceAll("^/+", ""); + if (volumeName.isEmpty()) { throw new OzoneClientException( "Volume name is required to create a volume"); } - // we need to skip the slash in the URI path - // getPath returns /volumeName needs to remove the first slash. - volumeName = ozoneURI.getPath().substring(1); - - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume name : %s%n", volumeName); } - if (cmd.hasOption(Shell.RUNAS)) { + + String rootName; + if (root) { rootName = "hdfs"; } else { rootName = System.getProperty("user.name"); } - if (!cmd.hasOption(Shell.USER)) { - throw new OzoneClientException( - "User name is needed in createVolume call."); - } - - if (cmd.hasOption(Shell.QUOTA)) { - quota = cmd.getOptionValue(Shell.QUOTA); - } - - userName = cmd.getOptionValue(Shell.USER); - VolumeArgs.Builder volumeArgsBuilder = VolumeArgs.newBuilder() .setAdmin(rootName) .setOwner(userName); @@ -97,11 +92,13 @@ protected void execute(CommandLine cmd) } client.getObjectStore().createVolume(volumeName, volumeArgsBuilder.build()); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { OzoneVolume vol = client.getObjectStore().getVolume(volumeName); System.out.printf("%s%n", JsonUtils.toJsonStringWithDefaultPrettyPrinter( JsonUtils.toJsonString(OzoneClientUtils.asVolumeInfo(vol)))); } + return null; } + } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/DeleteVolumeHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/DeleteVolumeHandler.java index d6facf63a22..d1e96fcf74f 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/DeleteVolumeHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/DeleteVolumeHandler.java @@ -18,54 +18,46 @@ package org.apache.hadoop.ozone.web.ozShell.volume; -import org.apache.commons.cli.CommandLine; +import java.net.URI; + import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; /** * Executes deleteVolume call for the shell. */ +@Command(name = "delete", + description = "deletes a volume if it is empty") public class DeleteVolumeHandler extends Handler { - private String volumeName; + @Parameters(arity = "1..1", description = Shell.OZONE_VOLUME_URI_DESCRIPTION) + private String uri; /** * Executes the delete volume call. - * - * @param cmd - CommandLine - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - - if (!cmd.hasOption(Shell.DELETE_VOLUME)) { - throw new OzoneClientException( - "Incorrect call : deleteVolume call is missing"); - } + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.DELETE_VOLUME); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); if (ozoneURI.getPath().isEmpty()) { throw new OzoneClientException( "Volume name is required to delete a volume"); } // we need to skip the slash in the URI path - volumeName = ozoneURI.getPath().substring(1); + String volumeName = ozoneURI.getPath().substring(1); - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Volume name : %s%n", volumeName); } client.getObjectStore().deleteVolume(volumeName); + System.out.printf("Volume %s is deleted%n", volumeName); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/InfoVolumeHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/InfoVolumeHandler.java index b5be2c65e38..60a8c6d9cf9 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/InfoVolumeHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/InfoVolumeHandler.java @@ -18,56 +18,47 @@ package org.apache.hadoop.ozone.web.ozShell.volume; -import org.apache.commons.cli.CommandLine; +import java.net.URI; + +import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneClientUtils; import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; import org.apache.hadoop.ozone.web.utils.JsonUtils; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; +import picocli.CommandLine.Command; +import picocli.CommandLine.Parameters; /** * Executes volume Info calls. */ +@Command(name = "info", + description = "returns information about a specific volume") public class InfoVolumeHandler extends Handler{ - private String volumeName; + @Parameters(arity = "1..1", description = Shell.OZONE_VOLUME_URI_DESCRIPTION) + private String uri; /** * Executes volume Info. - * - * @param cmd - CommandLine - * - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { + public Void call() throws Exception { - if (!cmd.hasOption(Shell.INFO_VOLUME)) { - throw new OzoneClientException( - "Incorrect call : infoVolume is missing"); - } - - String ozoneURIString = cmd.getOptionValue(Shell.INFO_VOLUME); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); if (ozoneURI.getPath().isEmpty()) { throw new OzoneClientException( "Volume name is required to get info of a volume"); } // we need to skip the slash in the URI path - volumeName = ozoneURI.getPath().substring(1); + String volumeName = ozoneURI.getPath().substring(1); OzoneVolume vol = client.getObjectStore().getVolume(volumeName); System.out.printf("%s%n", JsonUtils.toJsonStringWithDefaultPrettyPrinter( JsonUtils.toJsonString(OzoneClientUtils.asVolumeInfo(vol)))); + return null; } + } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/ListVolumeHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/ListVolumeHandler.java index 3749df437ee..a54393c3dab 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/ListVolumeHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/ListVolumeHandler.java @@ -18,19 +18,20 @@ package org.apache.hadoop.ozone.web.ozShell.volume; -import org.apache.commons.cli.CommandLine; +import com.google.common.base.Strings; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; + import org.apache.hadoop.ozone.client.OzoneClientUtils; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.rest.response.VolumeInfo; import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; import org.apache.hadoop.ozone.web.utils.JsonUtils; -import org.apache.hadoop.ozone.web.utils.OzoneUtils; -import java.io.IOException; -import java.net.URISyntaxException; +import java.net.URI; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -38,51 +39,54 @@ /** * Executes List Volume call. */ +@Command(name = "list", + aliases = "ls", + description = "List the volumes of a given user") public class ListVolumeHandler extends Handler { + + @Parameters(arity = "1..1", + description = Shell.OZONE_VOLUME_URI_DESCRIPTION, + defaultValue = "/") + private String uri; + + @Option(names = {"--length", "-l"}, + description = "Limit of the max results", + defaultValue = "100") + private int maxVolumes; + + @Option(names = {"--start", "-s"}, + description = "The first volume to start the listing") + private String startVolume; + + @Option(names = {"--prefix", "-p"}, + description = "Prefix to filter the volumes") + private String prefix; + + @Option(names = {"--user", "-u"}, + description = "Owner of the volumes to list.") private String userName; /** * Executes the Client Calls. - * - * @param cmd - CommandLine - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { + public Void call() throws Exception { - if (!cmd.hasOption(Shell.LIST_VOLUME)) { + URI ozoneURI = verifyURI(uri); + if (!Strings.isNullOrEmpty(ozoneURI.getPath()) && !ozoneURI.getPath() + .equals("/")) { throw new OzoneClientException( - "Incorrect call : listVolume is missing"); + "Invalid URI: " + ozoneURI + " . Specified path not used." + ozoneURI + .getPath()); } - int maxVolumes = Integer.MAX_VALUE; - if (cmd.hasOption(Shell.LIST_LENGTH)) { - String length = cmd.getOptionValue(Shell.LIST_LENGTH); - OzoneUtils.verifyMaxKeyLength(length); - - maxVolumes = Integer.parseInt(length); - } - - String startVolume = null; - if (cmd.hasOption(Shell.START)) { - startVolume = cmd.getOptionValue(Shell.START); - } - - String prefix = null; - if (cmd.hasOption(Shell.PREFIX)) { - prefix = cmd.getOptionValue(Shell.PREFIX); + if (userName == null) { + userName = System.getProperty("user.name"); } - String ozoneURIString = cmd.getOptionValue(Shell.LIST_VOLUME); - verifyURI(ozoneURIString); - - if (cmd.hasOption(Shell.USER)) { - userName = cmd.getOptionValue(Shell.USER); - } else { - userName = System.getProperty("user.name"); + if (maxVolumes < 1) { + throw new IllegalArgumentException( + "the length should be a positive number"); } Iterator volumeIterator; @@ -101,12 +105,13 @@ protected void execute(CommandLine cmd) maxVolumes -= 1; } - if (cmd.hasOption(Shell.VERBOSE)) { + if (isVerbose()) { System.out.printf("Found : %d volumes for user : %s ", volumeInfos.size(), userName); } System.out.println(JsonUtils.toJsonStringWithDefaultPrettyPrinter( JsonUtils.toJsonString(volumeInfos))); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/UpdateVolumeHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/UpdateVolumeHandler.java index 1e3fbb5a7b3..0336fc2bbf8 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/UpdateVolumeHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/UpdateVolumeHandler.java @@ -18,59 +18,53 @@ package org.apache.hadoop.ozone.web.ozShell.volume; -import org.apache.commons.cli.CommandLine; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; + import org.apache.hadoop.hdds.client.OzoneQuota; import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.client.OzoneClientUtils; import org.apache.hadoop.ozone.client.OzoneClientException; -import org.apache.hadoop.ozone.client.rest.OzoneException; import org.apache.hadoop.ozone.web.ozShell.Handler; import org.apache.hadoop.ozone.web.ozShell.Shell; +import org.apache.hadoop.ozone.web.utils.JsonUtils; -import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; /** * Executes update volume calls. */ +@Command(name = "update", + description = "Updates parameter of the volumes") public class UpdateVolumeHandler extends Handler { + + @Parameters(arity = "1..1", description = Shell.OZONE_VOLUME_URI_DESCRIPTION) + private String uri; + + @Option(names = {"--user"}, + description = "Owner of the volume to set") private String ownerName; - private String volumeName; + + @Option(names = {"--quota"}, + description = "Quota of the volume to set" + + "(eg. 1G)") private String quota; /** - * Executes update volume calls. - * - * @param cmd - CommandLine - * @throws IOException - * @throws OzoneException - * @throws URISyntaxException + * Executes the Client Calls. */ @Override - protected void execute(CommandLine cmd) - throws IOException, OzoneException, URISyntaxException { - if (!cmd.hasOption(Shell.UPDATE_VOLUME)) { - throw new OzoneClientException( - "Incorrect call : updateVolume is missing"); - } + public Void call() throws Exception { - String ozoneURIString = cmd.getOptionValue(Shell.UPDATE_VOLUME); - URI ozoneURI = verifyURI(ozoneURIString); + URI ozoneURI = verifyURI(uri); if (ozoneURI.getPath().isEmpty()) { throw new OzoneClientException( "Volume name is required to update a volume"); } // we need to skip the slash in the URI path - volumeName = ozoneURI.getPath().substring(1); - - if (cmd.hasOption(Shell.QUOTA)) { - quota = cmd.getOptionValue(Shell.QUOTA); - } - - if (cmd.hasOption(Shell.USER)) { - ownerName = cmd.getOptionValue(Shell.USER); - } + String volumeName = ozoneURI.getPath().substring(1); OzoneVolume volume = client.getObjectStore().getVolume(volumeName); if (quota != null && !quota.isEmpty()) { @@ -80,5 +74,9 @@ protected void execute(CommandLine cmd) if (ownerName != null && !ownerName.isEmpty()) { volume.setOwner(ownerName); } + + System.out.printf("%s%n", JsonUtils.toJsonStringWithDefaultPrettyPrinter( + JsonUtils.toJsonString(OzoneClientUtils.asVolumeInfo(volume)))); + return null; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/VolumeCommands.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/VolumeCommands.java new file mode 100644 index 00000000000..e3f5d2d38eb --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/web/ozShell/volume/VolumeCommands.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.web.ozShell.volume; + +import java.util.concurrent.Callable; + +import org.apache.hadoop.hdds.cli.GenericParentCommand; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.cli.MissingSubcommandException; +import org.apache.hadoop.ozone.web.ozShell.Shell; + +import picocli.CommandLine.Command; +import picocli.CommandLine.ParentCommand; + +/** + * Subcommand to group volume related operations. + */ +@Command(name = "volume", + aliases = "vol", + description = "Volume specific operations", + subcommands = { + InfoVolumeHandler.class, + ListVolumeHandler.class, + CreateVolumeHandler.class, + UpdateVolumeHandler.class, + DeleteVolumeHandler.class + }, + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class VolumeCommands implements GenericParentCommand, Callable { + + @ParentCommand + private Shell shell; + + @Override + public Void call() throws Exception { + throw new MissingSubcommandException( + this.shell.getCmd().getSubcommands().get("volume").getUsageMessage()); + } + + @Override + public boolean isVerbose() { + return shell.isVerbose(); + } +} diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestIngClient.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestIngClient.java new file mode 100644 index 00000000000..2da60ded675 --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ScmBlockLocationTestIngClient.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.ozone.om; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ScmInfo; +import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; +import org.apache.hadoop.hdds.scm.container.common.helpers.DeleteBlockResult; +import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; +import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; +import org.apache.hadoop.ozone.common.BlockGroup; +import org.apache.hadoop.ozone.common.DeleteBlockGroupResult; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +import static org.apache.hadoop.hdds.protocol.proto + .ScmBlockLocationProtocolProtos.DeleteScmBlockResult.Result; +import static org.apache.hadoop.hdds.protocol.proto + .ScmBlockLocationProtocolProtos.DeleteScmBlockResult.Result.success; +import static org.apache.hadoop.hdds.protocol.proto + .ScmBlockLocationProtocolProtos.DeleteScmBlockResult.Result.unknownFailure; + +/** + * This is a testing client that allows us to intercept calls from OzoneManager + * to SCM. + *

+ * TODO: OzoneManager#getScmBlockClient -- so that we can load this class up via + * config setting into OzoneManager. Right now, we just pass this to + * KeyDeletingService only. + *

+ * TODO: Move this class to a generic test utils so we can use this class in + * other Ozone Manager tests. + */ +public class ScmBlockLocationTestIngClient implements ScmBlockLocationProtocol { + private static final Logger LOG = + LoggerFactory.getLogger(ScmBlockLocationTestIngClient.class); + private final String clusterID; + private final String scmId; + + // 0 means no calls will fail, +1 means all calls will fail, +2 means every + // second call will fail, +3 means every third and so on. + private final int failCallsFrequency; + private int currentCall = 0; + + /** + * If ClusterID or SCMID is blank a per instance ID is generated. + * + * @param clusterID - String or blank. + * @param scmId - String or Blank. + * @param failCallsFrequency - Set to 0 for no failures, 1 for always to fail, + * a positive number for that frequency of failure. + */ + public ScmBlockLocationTestIngClient(String clusterID, String scmId, + int failCallsFrequency) { + this.clusterID = StringUtils.isNotBlank(clusterID) ? clusterID : + UUID.randomUUID().toString(); + this.scmId = StringUtils.isNotBlank(scmId) ? scmId : + UUID.randomUUID().toString(); + this.failCallsFrequency = Math.abs(failCallsFrequency); + switch (this.failCallsFrequency) { + case 0: + LOG.debug("Set to no failure mode, all delete block calls will " + + "succeed."); + break; + case 1: + LOG.debug("Set to all failure mode. All delete block calls to SCM" + + " will fail."); + break; + default: + LOG.debug("Set to Mix mode, every {} -th call will fail", + this.failCallsFrequency); + } + + } + + /** + * Returns Fake blocks to the KeyManager so we get blocks in the Database. + * @param size - size of the block. + * @param type Replication Type + * @param factor - Replication factor + * @param owner - String owner. + * @return + * @throws IOException + */ + @Override + public AllocatedBlock allocateBlock(long size, + HddsProtos.ReplicationType type, HddsProtos.ReplicationFactor factor, + String owner) throws IOException { + DatanodeDetails datanodeDetails = TestUtils.randomDatanodeDetails(); + Pipeline pipeline = createPipeline(datanodeDetails); + long containerID = Time.monotonicNow(); + long localID = Time.monotonicNow(); + AllocatedBlock.Builder abb = + new AllocatedBlock.Builder() + .setBlockID(new BlockID(containerID, localID)) + .setPipeline(pipeline) + .setShouldCreateContainer(false); + return abb.build(); + } + + private Pipeline createPipeline(DatanodeDetails datanode) { + final Pipeline pipeline = + new Pipeline(datanode.getUuidString(), HddsProtos.LifeCycleState.OPEN, + HddsProtos.ReplicationType.STAND_ALONE, + HddsProtos.ReplicationFactor.ONE, + PipelineID.randomId()); + pipeline.addMember(datanode); + return pipeline; + } + + @Override + public List deleteKeyBlocks( + List keyBlocksInfoList) throws IOException { + List results = new ArrayList<>(); + List blockResultList = new ArrayList<>(); + Result result; + for (BlockGroup keyBlocks : keyBlocksInfoList) { + for (BlockID blockKey : keyBlocks.getBlockIDList()) { + currentCall++; + switch (this.failCallsFrequency) { + case 0: + result = success; + break; + case 1: + result = unknownFailure; + break; + default: + if (currentCall % this.failCallsFrequency == 0) { + result = unknownFailure; + } else { + result = success; + } + } + blockResultList.add(new DeleteBlockResult(blockKey, result)); + } + results.add(new DeleteBlockGroupResult(keyBlocks.getGroupID(), + blockResultList)); + } + return results; + } + + @Override + public ScmInfo getScmInfo() throws IOException { + ScmInfo.Builder builder = + new ScmInfo.Builder() + .setClusterId(clusterID) + .setScmId(scmId); + return builder.build(); + } +} diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestBucketManagerImpl.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestBucketManagerImpl.java index 1ecac7fdacb..9684a1f2224 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestBucketManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestBucketManagerImpl.java @@ -17,33 +17,26 @@ package org.apache.hadoop.ozone.om; import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.server.ServerUtils; import org.apache.hadoop.hdfs.DFSUtil; -import org.apache.hadoop.ozone.om.helpers.OmBucketArgs; -import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; -import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.OzoneAcl; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes; -import org.apache.hadoop.ozone.OzoneAcl; +import org.apache.hadoop.ozone.om.helpers.OmBucketArgs; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; -import org.mockito.Mockito; -import org.mockito.invocation.InvocationOnMock; import org.mockito.runners.MockitoJUnitRunner; -import org.mockito.stubbing.Answer; +import java.io.File; import java.io.IOException; -import java.util.Map; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; import java.util.LinkedList; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; - -import static org.mockito.Mockito.any; +import java.util.List; /** * Tests BucketManagerImpl, mocks OMMetadataManager for testing. @@ -53,86 +46,35 @@ @Rule public ExpectedException thrown = ExpectedException.none(); - private OMMetadataManager getMetadataManagerMock(String... volumesToCreate) - throws IOException { - OMMetadataManager metadataManager = Mockito.mock(OMMetadataManager.class); - Map metadataDB = new HashMap<>(); - ReadWriteLock lock = new ReentrantReadWriteLock(); - - Mockito.when(metadataManager.writeLock()).thenReturn(lock.writeLock()); - Mockito.when(metadataManager.readLock()).thenReturn(lock.readLock()); - Mockito.when(metadataManager.getVolumeKey(any(String.class))).thenAnswer( - (InvocationOnMock invocation) -> - DFSUtil.string2Bytes( - OzoneConsts.OM_VOLUME_PREFIX + invocation.getArguments()[0])); - Mockito.when(metadataManager - .getBucketKey(any(String.class), any(String.class))).thenAnswer( - (InvocationOnMock invocation) -> - DFSUtil.string2Bytes( - OzoneConsts.OM_VOLUME_PREFIX - + invocation.getArguments()[0] - + OzoneConsts.OM_BUCKET_PREFIX - + invocation.getArguments()[1])); - - Mockito.doAnswer( - new Answer() { - @Override - public Boolean answer(InvocationOnMock invocation) - throws Throwable { - String keyRootName = OzoneConsts.OM_KEY_PREFIX - + invocation.getArguments()[0] - + OzoneConsts.OM_KEY_PREFIX - + invocation.getArguments()[1] - + OzoneConsts.OM_KEY_PREFIX; - Iterator keyIterator = metadataDB.keySet().iterator(); - while(keyIterator.hasNext()) { - if(keyIterator.next().startsWith(keyRootName)) { - return false; - } - } - return true; - } - }).when(metadataManager).isBucketEmpty(any(String.class), - any(String.class)); - - Mockito.doAnswer( - new Answer() { - @Override - public Void answer(InvocationOnMock invocation) throws Throwable { - metadataDB.put(DFSUtil.bytes2String( - (byte[])invocation.getArguments()[0]), - (byte[])invocation.getArguments()[1]); - return null; - } - }).when(metadataManager).put(any(byte[].class), any(byte[].class)); - - Mockito.when(metadataManager.get(any(byte[].class))).thenAnswer( - (InvocationOnMock invocation) -> - metadataDB.get(DFSUtil.bytes2String( - (byte[])invocation.getArguments()[0])) - ); - Mockito.doAnswer( - new Answer() { - @Override - public Void answer(InvocationOnMock invocation) throws Throwable { - metadataDB.remove(DFSUtil.bytes2String( - (byte[])invocation.getArguments()[0])); - return null; - } - }).when(metadataManager).delete(any(byte[].class)); + @Rule + public TemporaryFolder folder = new TemporaryFolder(); - for(String volumeName : volumesToCreate) { - byte[] dummyVolumeInfo = DFSUtil.string2Bytes(volumeName); - metadataDB.put(OzoneConsts.OM_VOLUME_PREFIX + volumeName, - dummyVolumeInfo); + private OzoneConfiguration createNewTestPath() throws IOException { + OzoneConfiguration conf = new OzoneConfiguration(); + File newFolder = folder.newFolder(); + if (!newFolder.exists()) { + Assert.assertTrue(newFolder.mkdirs()); } - return metadataManager; + ServerUtils.setOzoneMetaDirPath(conf, newFolder.toString()); + return conf; + } + + private OmMetadataManagerImpl createSampleVol() throws IOException { + OzoneConfiguration conf = createNewTestPath(); + OmMetadataManagerImpl metaMgr = new OmMetadataManagerImpl(conf); + byte[] volumeKey = metaMgr.getVolumeKey("sampleVol"); + // This is a simple hack for testing, we just test if the volume via a + // null check, do not parse the value part. So just write some dummy value. + metaMgr.getVolumeTable().put(volumeKey, volumeKey); + return metaMgr; } @Test - public void testCreateBucketWithoutVolume() throws IOException { + public void testCreateBucketWithoutVolume() throws Exception { thrown.expectMessage("Volume doesn't exist"); - OMMetadataManager metaMgr = getMetadataManagerMock(); + OzoneConfiguration conf = createNewTestPath(); + OmMetadataManagerImpl metaMgr = + new OmMetadataManagerImpl(conf); try { BucketManager bucketManager = new BucketManagerImpl(metaMgr); OmBucketInfo bucketInfo = OmBucketInfo.newBuilder() @@ -140,29 +82,35 @@ public void testCreateBucketWithoutVolume() throws IOException { .setBucketName("bucketOne") .build(); bucketManager.createBucket(bucketInfo); - } catch(OMException omEx) { + } catch (OMException omEx) { Assert.assertEquals(ResultCodes.FAILED_VOLUME_NOT_FOUND, omEx.getResult()); throw omEx; + } finally { + metaMgr.getStore().close(); } } @Test - public void testCreateBucket() throws IOException { - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + public void testCreateBucket() throws Exception { + OmMetadataManagerImpl metaMgr = createSampleVol(); + BucketManager bucketManager = new BucketManagerImpl(metaMgr); OmBucketInfo bucketInfo = OmBucketInfo.newBuilder() .setVolumeName("sampleVol") .setBucketName("bucketOne") .build(); bucketManager.createBucket(bucketInfo); - Assert.assertNotNull(bucketManager.getBucketInfo("sampleVol", "bucketOne")); + Assert.assertNotNull(bucketManager.getBucketInfo("sampleVol", + "bucketOne")); + metaMgr.getStore().close(); } @Test - public void testCreateAlreadyExistingBucket() throws IOException { + public void testCreateAlreadyExistingBucket() throws Exception { thrown.expectMessage("Bucket already exist"); - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + OmMetadataManagerImpl metaMgr = createSampleVol(); + try { BucketManager bucketManager = new BucketManagerImpl(metaMgr); OmBucketInfo bucketInfo = OmBucketInfo.newBuilder() @@ -171,30 +119,37 @@ public void testCreateAlreadyExistingBucket() throws IOException { .build(); bucketManager.createBucket(bucketInfo); bucketManager.createBucket(bucketInfo); - } catch(OMException omEx) { + } catch (OMException omEx) { Assert.assertEquals(ResultCodes.FAILED_BUCKET_ALREADY_EXISTS, omEx.getResult()); throw omEx; + } finally { + metaMgr.getStore().close(); } } @Test - public void testGetBucketInfoForInvalidBucket() throws IOException { + public void testGetBucketInfoForInvalidBucket() throws Exception { thrown.expectMessage("Bucket not found"); + OmMetadataManagerImpl metaMgr = createSampleVol(); try { - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + + BucketManager bucketManager = new BucketManagerImpl(metaMgr); bucketManager.getBucketInfo("sampleVol", "bucketOne"); - } catch(OMException omEx) { + } catch (OMException omEx) { Assert.assertEquals(ResultCodes.FAILED_BUCKET_NOT_FOUND, omEx.getResult()); throw omEx; + } finally { + metaMgr.getStore().close(); } } @Test - public void testGetBucketInfo() throws IOException { - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + public void testGetBucketInfo() throws Exception { + OmMetadataManagerImpl metaMgr = createSampleVol(); + BucketManager bucketManager = new BucketManagerImpl(metaMgr); OmBucketInfo bucketInfo = OmBucketInfo.newBuilder() .setVolumeName("sampleVol") @@ -210,11 +165,13 @@ public void testGetBucketInfo() throws IOException { Assert.assertEquals(StorageType.DISK, result.getStorageType()); Assert.assertEquals(false, result.getIsVersionEnabled()); + metaMgr.getStore().close(); } @Test - public void testSetBucketPropertyAddACL() throws IOException { - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + public void testSetBucketPropertyAddACL() throws Exception { + OmMetadataManagerImpl metaMgr = createSampleVol(); + List acls = new LinkedList<>(); OzoneAcl ozoneAcl = new OzoneAcl(OzoneAcl.OzoneACLType.USER, "root", OzoneAcl.OzoneACLRights.READ); @@ -247,11 +204,13 @@ public void testSetBucketPropertyAddACL() throws IOException { "sampleVol", "bucketOne"); Assert.assertEquals(2, updatedResult.getAcls().size()); Assert.assertTrue(updatedResult.getAcls().contains(newAcl)); + metaMgr.getStore().close(); } @Test - public void testSetBucketPropertyRemoveACL() throws IOException { - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + public void testSetBucketPropertyRemoveACL() throws Exception { + OmMetadataManagerImpl metaMgr = createSampleVol(); + List acls = new LinkedList<>(); OzoneAcl aclOne = new OzoneAcl(OzoneAcl.OzoneACLType.USER, "root", OzoneAcl.OzoneACLRights.READ); @@ -283,11 +242,13 @@ public void testSetBucketPropertyRemoveACL() throws IOException { "sampleVol", "bucketOne"); Assert.assertEquals(1, updatedResult.getAcls().size()); Assert.assertFalse(updatedResult.getAcls().contains(aclTwo)); + metaMgr.getStore().close(); } @Test - public void testSetBucketPropertyChangeStorageType() throws IOException { - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + public void testSetBucketPropertyChangeStorageType() throws Exception { + OmMetadataManagerImpl metaMgr = createSampleVol(); + BucketManager bucketManager = new BucketManagerImpl(metaMgr); OmBucketInfo bucketInfo = OmBucketInfo.newBuilder() .setVolumeName("sampleVol") @@ -309,11 +270,13 @@ public void testSetBucketPropertyChangeStorageType() throws IOException { "sampleVol", "bucketOne"); Assert.assertEquals(StorageType.SSD, updatedResult.getStorageType()); + metaMgr.getStore().close(); } @Test - public void testSetBucketPropertyChangeVersioning() throws IOException { - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + public void testSetBucketPropertyChangeVersioning() throws Exception { + OmMetadataManagerImpl metaMgr = createSampleVol(); + BucketManager bucketManager = new BucketManagerImpl(metaMgr); OmBucketInfo bucketInfo = OmBucketInfo.newBuilder() .setVolumeName("sampleVol") @@ -333,21 +296,22 @@ public void testSetBucketPropertyChangeVersioning() throws IOException { OmBucketInfo updatedResult = bucketManager.getBucketInfo( "sampleVol", "bucketOne"); Assert.assertTrue(updatedResult.getIsVersionEnabled()); + metaMgr.getStore().close(); } @Test - public void testDeleteBucket() throws IOException { + public void testDeleteBucket() throws Exception { thrown.expectMessage("Bucket not found"); - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + OmMetadataManagerImpl metaMgr = createSampleVol(); BucketManager bucketManager = new BucketManagerImpl(metaMgr); - for(int i = 0; i < 5; i++) { + for (int i = 0; i < 5; i++) { OmBucketInfo bucketInfo = OmBucketInfo.newBuilder() .setVolumeName("sampleVol") .setBucketName("bucket_" + i) .build(); bucketManager.createBucket(bucketInfo); } - for(int i = 0; i < 5; i++) { + for (int i = 0; i < 5; i++) { Assert.assertEquals("bucket_" + i, bucketManager.getBucketInfo( "sampleVol", "bucket_" + i).getBucketName()); @@ -356,22 +320,23 @@ public void testDeleteBucket() throws IOException { bucketManager.deleteBucket("sampleVol", "bucket_1"); Assert.assertNotNull(bucketManager.getBucketInfo( "sampleVol", "bucket_2")); - } catch(IOException ex) { + } catch (IOException ex) { Assert.fail(ex.getMessage()); } try { bucketManager.getBucketInfo("sampleVol", "bucket_1"); - } catch(OMException omEx) { + } catch (OMException omEx) { Assert.assertEquals(ResultCodes.FAILED_BUCKET_NOT_FOUND, omEx.getResult()); throw omEx; } + metaMgr.getStore().close(); } @Test - public void testDeleteNonEmptyBucket() throws IOException { + public void testDeleteNonEmptyBucket() throws Exception { thrown.expectMessage("Bucket is not empty"); - OMMetadataManager metaMgr = getMetadataManagerMock("sampleVol"); + OmMetadataManagerImpl metaMgr = createSampleVol(); BucketManager bucketManager = new BucketManagerImpl(metaMgr); OmBucketInfo bucketInfo = OmBucketInfo.newBuilder() .setVolumeName("sampleVol") @@ -379,16 +344,19 @@ public void testDeleteNonEmptyBucket() throws IOException { .build(); bucketManager.createBucket(bucketInfo); //Create keys in bucket - metaMgr.put(DFSUtil.string2Bytes("/sampleVol/bucketOne/key_one"), + metaMgr.getKeyTable().put(DFSUtil.string2Bytes("/sampleVol/bucketOne" + + "/key_one"), DFSUtil.string2Bytes("value_one")); - metaMgr.put(DFSUtil.string2Bytes("/sampleVol/bucketOne/key_two"), + metaMgr.getKeyTable().put(DFSUtil.string2Bytes("/sampleVol/bucketOne" + + "/key_two"), DFSUtil.string2Bytes("value_two")); try { bucketManager.deleteBucket("sampleVol", "bucketOne"); - } catch(OMException omEx) { + } catch (OMException omEx) { Assert.assertEquals(ResultCodes.FAILED_BUCKET_NOT_EMPTY, omEx.getResult()); throw omEx; } + metaMgr.getStore().close(); } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestChunkStreams.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestChunkStreams.java index 7ce916a9cc3..7ff9d636534 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestChunkStreams.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestChunkStreams.java @@ -121,6 +121,7 @@ public void testReadGroupInputStream() throws Exception { int tempOffset = offset; ChunkInputStream in = new ChunkInputStream(null, null, null, new ArrayList<>(), null) { + private long pos = 0; private ByteArrayInputStream in = new ByteArrayInputStream(buf, tempOffset, 100); @@ -131,7 +132,7 @@ public void seek(long pos) throws IOException { @Override public long getPos() throws IOException { - throw new UnsupportedOperationException(); + return pos; } @Override @@ -147,7 +148,9 @@ public int read() throws IOException { @Override public int read(byte[] b, int off, int len) throws IOException { - return in.read(b, off, len); + int readLen = in.read(b, off, len); + pos += readLen; + return readLen; } }; inputStreams.add(in); @@ -175,6 +178,7 @@ public void testErrorReadGroupInputStream() throws Exception { int tempOffset = offset; ChunkInputStream in = new ChunkInputStream(null, null, null, new ArrayList<>(), null) { + private long pos = 0; private ByteArrayInputStream in = new ByteArrayInputStream(buf, tempOffset, 100); @@ -185,7 +189,7 @@ public void seek(long pos) throws IOException { @Override public long getPos() throws IOException { - throw new UnsupportedOperationException(); + return pos; } @Override @@ -201,7 +205,9 @@ public int read() throws IOException { @Override public int read(byte[] b, int off, int len) throws IOException { - return in.read(b, off, len); + int readLen = in.read(b, off, len); + pos += readLen; + return readLen; } }; inputStreams.add(in); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestKeyDeletingService.java new file mode 100644 index 00000000000..60c6fc39d7f --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestKeyDeletingService.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.ozone.om; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.server.ServerUtils; +import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OpenKeySession; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.utils.db.DBConfigFromFile; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import static org.apache.hadoop.hdds.HddsConfigKeys + .HDDS_CONTAINER_REPORT_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConfigKeys + .OZONE_BLOCK_DELETING_SERVICE_INTERVAL; + +/** + * Test Key Deleting Service. + *

+ * This test does the following things. + *

+ * 1. Creates a bunch of keys. 2. Then executes delete key directly using + * Metadata Manager. 3. Waits for a while for the KeyDeleting Service to pick up + * and call into SCM. 4. Confirms that calls have been successful. + */ +public class TestKeyDeletingService { + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private OzoneConfiguration createConfAndInitValues() throws IOException { + OzoneConfiguration conf = new OzoneConfiguration(); + File newFolder = folder.newFolder(); + if (!newFolder.exists()) { + Assert.assertTrue(newFolder.mkdirs()); + } + System.setProperty(DBConfigFromFile.CONFIG_DIR, "/"); + ServerUtils.setOzoneMetaDirPath(conf, newFolder.toString()); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, + TimeUnit.MILLISECONDS); + conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200, + TimeUnit.MILLISECONDS); + conf.setQuietMode(false); + + return conf; + } + + /** + * In this test, we create a bunch of keys and delete them. Then we start the + * KeyDeletingService and pass a SCMClient which does not fail. We make sure + * that all the keys that we deleted is picked up and deleted by + * OzoneManager. + * + * @throws IOException - on Failure. + */ + + @Test(timeout = 30000) + public void checkIfDeleteServiceisDeletingKeys() + throws IOException, TimeoutException, InterruptedException { + OzoneConfiguration conf = createConfAndInitValues(); + OmMetadataManagerImpl metaMgr = new OmMetadataManagerImpl(conf); + KeyManager keyManager = + new KeyManagerImpl( + new ScmBlockLocationTestIngClient(null, null, 0), + metaMgr, conf, UUID.randomUUID().toString()); + final int keyCount = 100; + createAndDeleteKeys(keyManager, keyCount, 1); + KeyDeletingService keyDeletingService = + (KeyDeletingService) keyManager.getDeletingService(); + keyManager.start(); + Assert.assertEquals( + keyManager.getPendingDeletionKeys(Integer.MAX_VALUE).size(), keyCount); + GenericTestUtils.waitFor( + () -> keyDeletingService.getDeletedKeyCount().get() >= keyCount, + 1000, 10000); + Assert.assertTrue(keyDeletingService.getRunCount().get() > 1); + Assert.assertEquals( + keyManager.getPendingDeletionKeys(Integer.MAX_VALUE).size(), 0); + } + + @Test(timeout = 30000) + public void checkIfDeleteServiceWithFailingSCM() + throws IOException, TimeoutException, InterruptedException { + OzoneConfiguration conf = createConfAndInitValues(); + OmMetadataManagerImpl metaMgr = new OmMetadataManagerImpl(conf); + //failCallsFrequency = 1 , means all calls fail. + KeyManager keyManager = + new KeyManagerImpl( + new ScmBlockLocationTestIngClient(null, null, 1), + metaMgr, conf, UUID.randomUUID().toString()); + final int keyCount = 100; + createAndDeleteKeys(keyManager, keyCount, 1); + KeyDeletingService keyDeletingService = + (KeyDeletingService) keyManager.getDeletingService(); + keyManager.start(); + Assert.assertEquals( + keyManager.getPendingDeletionKeys(Integer.MAX_VALUE).size(), keyCount); + // Make sure that we have run the background thread 5 times more + GenericTestUtils.waitFor( + () -> keyDeletingService.getRunCount().get() >= 5, + 100, 1000); + // Since SCM calls are failing, deletedKeyCount should be zero. + Assert.assertEquals(keyDeletingService.getDeletedKeyCount().get(), 0); + Assert.assertEquals( + keyManager.getPendingDeletionKeys(Integer.MAX_VALUE).size(), keyCount); + } + + @Test(timeout = 30000) + public void checkDeletionForEmptyKey() + throws IOException, TimeoutException, InterruptedException { + OzoneConfiguration conf = createConfAndInitValues(); + OmMetadataManagerImpl metaMgr = new OmMetadataManagerImpl(conf); + //failCallsFrequency = 1 , means all calls fail. + KeyManager keyManager = + new KeyManagerImpl( + new ScmBlockLocationTestIngClient(null, null, 1), + metaMgr, conf, UUID.randomUUID().toString()); + final int keyCount = 100; + createAndDeleteKeys(keyManager, keyCount, 0); + KeyDeletingService keyDeletingService = + (KeyDeletingService) keyManager.getDeletingService(); + keyManager.start(); + + // Since empty keys are directly deleted from db there should be no + // pending deletion keys. Also deletedKeyCount should be zero. + Assert.assertEquals( + keyManager.getPendingDeletionKeys(Integer.MAX_VALUE).size(), 0); + // Make sure that we have run the background thread 2 times or more + GenericTestUtils.waitFor( + () -> keyDeletingService.getRunCount().get() >= 2, + 100, 1000); + Assert.assertEquals(keyDeletingService.getDeletedKeyCount().get(), 0); + } + + private void createAndDeleteKeys(KeyManager keyManager, int keyCount, + int numBlocks) throws IOException { + for (int x = 0; x < keyCount; x++) { + String volumeName = String.format("volume%s", + RandomStringUtils.randomAlphanumeric(5)); + String bucketName = String.format("bucket%s", + RandomStringUtils.randomAlphanumeric(5)); + String keyName = String.format("key%s", + RandomStringUtils.randomAlphanumeric(5)); + byte[] volumeBytes = + keyManager.getMetadataManager().getVolumeKey(volumeName); + byte[] bucketBytes = + keyManager.getMetadataManager().getBucketKey(volumeName, bucketName); + // cheat here, just create a volume and bucket entry so that we can + // create the keys, we put the same data for key and value since the + // system does not decode the object + keyManager.getMetadataManager().getVolumeTable().put(volumeBytes, + volumeBytes); + + keyManager.getMetadataManager().getBucketTable().put(bucketBytes, + bucketBytes); + + OmKeyArgs arg = + new OmKeyArgs.Builder() + .setVolumeName(volumeName) + .setBucketName(bucketName) + .setKeyName(keyName) + .setLocationInfoList(new ArrayList<>()) + .build(); + //Open, Commit and Delete the Keys in the Key Manager. + OpenKeySession session = keyManager.openKey(arg); + for (int i = 0; i < numBlocks; i++) { + arg.addLocationInfo(keyManager.allocateBlock(arg, session.getId())); + } + keyManager.commitKey(arg, session.getId()); + keyManager.deleteKey(arg); + } + } +} \ No newline at end of file diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java new file mode 100644 index 00000000000..d72215579d1 --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestKeyManagerImpl.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; +import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; +import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.ozone.om.exceptions.OMException; +import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.KeyInfo; +import org.apache.hadoop.test.LambdaTestUtils; +import org.apache.hadoop.util.Time; +import org.apache.hadoop.utils.db.RDBStore; +import org.apache.hadoop.utils.db.Table; +import org.apache.hadoop.utils.db.TableConfig; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; +import org.rocksdb.ColumnFamilyOptions; +import org.rocksdb.DBOptions; +import org.rocksdb.RocksDB; +import org.rocksdb.Statistics; +import org.rocksdb.StatsLevel; + +/** + * Test class for @{@link KeyManagerImpl}. + * */ +public class TestKeyManagerImpl { + + private static KeyManagerImpl keyManager; + private static ScmBlockLocationProtocol scmBlockLocationProtocol; + private static OzoneConfiguration conf; + private static OMMetadataManager metadataManager; + private static long blockSize = 1000; + private static final String KEY_NAME = "key1"; + private static final String BUCKET_NAME = "bucket1"; + private static final String VOLUME_NAME = "vol1"; + private static RDBStore rdbStore = null; + private static Table rdbTable = null; + private static DBOptions options = null; + private KeyInfo keyData; + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + @Before + public void setUp() throws Exception { + conf = new OzoneConfiguration(); + scmBlockLocationProtocol = Mockito.mock(ScmBlockLocationProtocol.class); + metadataManager = Mockito.mock(OMMetadataManager.class); + keyManager = new KeyManagerImpl(scmBlockLocationProtocol, metadataManager, + conf, "om1"); + setupMocks(); + } + + private void setupMocks() throws Exception { + Mockito.when(scmBlockLocationProtocol + .allocateBlock(Mockito.anyLong(), Mockito.any(ReplicationType.class), + Mockito.any(ReplicationFactor.class), Mockito.anyString())) + .thenThrow( + new SCMException("ChillModePrecheck failed for allocateBlock", + ResultCodes.CHILL_MODE_EXCEPTION)); + setupRocksDb(); + Mockito.when(metadataManager.getVolumeTable()).thenReturn(rdbTable); + Mockito.when(metadataManager.getBucketTable()).thenReturn(rdbTable); + Mockito.when(metadataManager.getOpenKeyTable()).thenReturn(rdbTable); + Mockito.when(metadataManager.getLock()) + .thenReturn(new OzoneManagerLock(conf)); + Mockito.when(metadataManager.getVolumeKey(VOLUME_NAME)) + .thenReturn(VOLUME_NAME.getBytes()); + Mockito.when(metadataManager.getBucketKey(VOLUME_NAME, BUCKET_NAME)) + .thenReturn(BUCKET_NAME.getBytes()); + Mockito.when(metadataManager.getOpenKeyBytes(VOLUME_NAME, BUCKET_NAME, + KEY_NAME, 1)).thenReturn(KEY_NAME.getBytes()); + } + + private void setupRocksDb() throws Exception { + options = new DBOptions(); + options.setCreateIfMissing(true); + options.setCreateMissingColumnFamilies(true); + + Statistics statistics = new Statistics(); + statistics.setStatsLevel(StatsLevel.ALL); + options = options.setStatistics(statistics); + + Set configSet = new HashSet<>(); + for (String name : Arrays + .asList(DFSUtil.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY), + "testTable")) { + TableConfig newConfig = new TableConfig(name, new ColumnFamilyOptions()); + configSet.add(newConfig); + } + keyData = KeyInfo.newBuilder() + .setKeyName(KEY_NAME) + .setBucketName(BUCKET_NAME) + .setVolumeName(VOLUME_NAME) + .setDataSize(blockSize) + .setType(ReplicationType.STAND_ALONE) + .setFactor(ReplicationFactor.ONE) + .setCreationTime(Time.now()) + .setModificationTime(Time.now()) + .build(); + + rdbStore = new RDBStore(folder.newFolder(), options, configSet); + rdbTable = rdbStore.getTable("testTable"); + rdbTable.put(VOLUME_NAME.getBytes(), + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8)); + rdbTable.put(BUCKET_NAME.getBytes(), + RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8)); + rdbTable.put(KEY_NAME.getBytes(), keyData.toByteArray()); + } + + @Test + public void allocateBlockFailureInChillMode() throws Exception { + OmKeyArgs keyArgs = new OmKeyArgs.Builder().setKeyName(KEY_NAME) + .setBucketName(BUCKET_NAME) + .setFactor(ReplicationFactor.ONE) + .setType(ReplicationType.STAND_ALONE) + .setVolumeName(VOLUME_NAME).build(); + LambdaTestUtils.intercept(OMException.class, + "ChillModePrecheck failed for allocateBlock", () -> { + keyManager.allocateBlock(keyArgs, 1); + }); + } + + @Test + public void openKeyFailureInChillMode() throws Exception { + OmKeyArgs keyArgs = new OmKeyArgs.Builder().setKeyName(KEY_NAME) + .setBucketName(BUCKET_NAME) + .setFactor(ReplicationFactor.ONE) + .setDataSize(1000) + .setType(ReplicationType.STAND_ALONE) + .setVolumeName(VOLUME_NAME).build(); + LambdaTestUtils.intercept(OMException.class, + "ChillModePrecheck failed for allocateBlock", () -> { + keyManager.openKey(keyArgs); + }); + } +} \ No newline at end of file diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerLock.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerLock.java new file mode 100644 index 00000000000..d8d3096b0fe --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerLock.java @@ -0,0 +1,192 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.om; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.junit.Assert; +import org.junit.Test; + +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Contains test-cases to verify OzoneManagerLock. + */ +public class TestOzoneManagerLock { + + @Test(timeout = 1000) + public void testDifferentUserLock() { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireUserLock("userOne"); + lock.acquireUserLock("userTwo"); + lock.releaseUserLock("userOne"); + lock.releaseUserLock("userTwo"); + Assert.assertTrue(true); + } + + @Test + public void testSameUserLock() throws Exception { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireUserLock("userOne"); + AtomicBoolean gotLock = new AtomicBoolean(false); + new Thread(() -> { + lock.acquireUserLock("userOne"); + gotLock.set(true); + lock.releaseUserLock("userOne"); + }).start(); + // Let's give some time for the new thread to run + Thread.sleep(100); + // Since the new thread is trying to get lock on same user, it will wait. + Assert.assertFalse(gotLock.get()); + lock.releaseUserLock("userOne"); + // Since we have released the lock, the new thread should have the lock + // now + // Let's give some time for the new thread to run + Thread.sleep(100); + Assert.assertTrue(gotLock.get()); + } + + @Test(timeout = 1000) + public void testDifferentVolumeLock() { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireVolumeLock("volOne"); + lock.acquireVolumeLock("volTwo"); + lock.releaseVolumeLock("volOne"); + lock.releaseVolumeLock("volTwo"); + Assert.assertTrue(true); + } + + @Test + public void testSameVolumeLock() throws Exception { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireVolumeLock("volOne"); + AtomicBoolean gotLock = new AtomicBoolean(false); + new Thread(() -> { + lock.acquireVolumeLock("volOne"); + gotLock.set(true); + lock.releaseVolumeLock("volOne"); + }).start(); + // Let's give some time for the new thread to run + Thread.sleep(100); + // Since the new thread is trying to get lock on same user, it will wait. + Assert.assertFalse(gotLock.get()); + lock.releaseVolumeLock("volOne"); + // Since we have released the lock, the new thread should have the lock + // now + // Let's give some time for the new thread to run + Thread.sleep(100); + Assert.assertTrue(gotLock.get()); + } + + @Test(timeout = 1000) + public void testDifferentBucketLock() { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireBucketLock("volOne", "bucketOne"); + lock.acquireBucketLock("volOne", "bucketTwo"); + lock.releaseBucketLock("volOne", "bucketTwo"); + lock.releaseBucketLock("volOne", "bucketOne"); + Assert.assertTrue(true); + } + + @Test + public void testSameBucketLock() throws Exception { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireBucketLock("volOne", "bucketOne"); + AtomicBoolean gotLock = new AtomicBoolean(false); + new Thread(() -> { + lock.acquireBucketLock("volOne", "bucketOne"); + gotLock.set(true); + lock.releaseBucketLock("volOne", "bucketOne"); + }).start(); + // Let's give some time for the new thread to run + Thread.sleep(100); + // Since the new thread is trying to get lock on same user, it will wait. + Assert.assertFalse(gotLock.get()); + lock.releaseBucketLock("volOne", "bucketOne"); + // Since we have released the lock, the new thread should have the lock + // now + // Let's give some time for the new thread to run + Thread.sleep(100); + Assert.assertTrue(gotLock.get()); + } + + @Test(timeout = 1000) + public void testVolumeLockAfterUserLock() { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireUserLock("userOne"); + lock.acquireVolumeLock("volOne"); + lock.releaseVolumeLock("volOne"); + lock.releaseUserLock("userOne"); + Assert.assertTrue(true); + } + + @Test(timeout = 1000) + public void testBucketLockAfterVolumeLock() { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireVolumeLock("volOne"); + lock.acquireBucketLock("volOne", "bucketOne"); + lock.releaseBucketLock("volOne", "bucketOne"); + lock.releaseVolumeLock("volOne"); + Assert.assertTrue(true); + } + + @Test(timeout = 1000) + public void testBucketLockAfterVolumeLockAfterUserLock() { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireUserLock("userOne"); + lock.acquireVolumeLock("volOne"); + lock.acquireBucketLock("volOne", "bucketOne"); + lock.releaseBucketLock("volOne", "bucketOne"); + lock.releaseVolumeLock("volOne"); + lock.releaseUserLock("userOne"); + Assert.assertTrue(true); + } + + @Test + public void testUserLockAfterVolumeLock() { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireVolumeLock("volOne"); + try { + lock.acquireUserLock("userOne"); + Assert.fail(); + } catch (RuntimeException ex) { + String msg = + "cannot acquire user lock while holding volume/bucket lock(s)."; + Assert.assertTrue(ex.getMessage().contains(msg)); + } + lock.releaseVolumeLock("volOne"); + Assert.assertTrue(true); + } + + @Test + public void testVolumeLockAfterBucketLock() { + OzoneManagerLock lock = new OzoneManagerLock(new OzoneConfiguration()); + lock.acquireBucketLock("volOne", "bucketOne"); + try { + lock.acquireVolumeLock("volOne"); + Assert.fail(); + } catch (RuntimeException ex) { + String msg = + "cannot acquire volume lock while holding bucket lock(s)."; + Assert.assertTrue(ex.getMessage().contains(msg)); + } + lock.releaseBucketLock("volOne", "bucketOne"); + Assert.assertTrue(true); + } + + +} \ No newline at end of file diff --git a/hadoop-ozone/ozonefs/pom.xml b/hadoop-ozone/ozonefs/pom.xml index c3de4d1b32f..c45aacd3c78 100644 --- a/hadoop-ozone/ozonefs/pom.xml +++ b/hadoop-ozone/ozonefs/pom.xml @@ -19,15 +19,17 @@ org.apache.hadoop hadoop-ozone - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-ozone-filesystem Apache Hadoop Ozone FileSystem jar - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT UTF-8 true + ozone + true @@ -50,20 +52,20 @@ - com.google.guava:guava:jar - org.slf4j:slf4j-api:jar + com.google.guava:guava + org.slf4j:slf4j-api com.google.protobuf:protobuf-java - com.nimbusds:nimbus-jose-jwt:jar + com.nimbusds:nimbus-jose-jwt com.github.stephenc.jcip:jcip-annotations - com.google.code.findbugs:jsr305:jar + com.google.code.findbugs:jsr305 org.apache.hadoop:hadoop-ozone-client org.apache.hadoop:hadoop-hdds-client org.apache.hadoop:hadoop-hdds-common org.fusesource.leveldbjni:leveldbjni-all org.apache.ratis:ratis-server - org.apache.ratis:ratis-proto-shaded:jar + org.apache.ratis:ratis-proto-shaded com.google.auto.value:auto-value-annotations - com.squareup:javapoet:jar + com.squareup:javapoet org.jctools:jctools-core org.apache.ratis:ratis-common org.apache.ratis:ratis-client diff --git a/hadoop-ozone/ozonefs/src/main/java/org/apache/hadoop/fs/ozone/OzoneFileSystem.java b/hadoop-ozone/ozonefs/src/main/java/org/apache/hadoop/fs/ozone/OzoneFileSystem.java index 6906a9dc478..b876dc28b16 100644 --- a/hadoop-ozone/ozonefs/src/main/java/org/apache/hadoop/fs/ozone/OzoneFileSystem.java +++ b/hadoop-ozone/ozonefs/src/main/java/org/apache/hadoop/fs/ozone/OzoneFileSystem.java @@ -42,7 +42,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException; -import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; @@ -60,7 +59,6 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.ozone.client.io.OzoneInputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import static org.apache.hadoop.fs.ozone.Constants.OZONE_DEFAULT_USER; diff --git a/hadoop-ozone/ozonefs/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFSInputStream.java b/hadoop-ozone/ozonefs/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFSInputStream.java index ad21f28ec44..d3bc857632d 100644 --- a/hadoop-ozone/ozonefs/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFSInputStream.java +++ b/hadoop-ozone/ozonefs/src/test/java/org/apache/hadoop/fs/ozone/TestOzoneFSInputStream.java @@ -57,8 +57,7 @@ /** * Create a MiniDFSCluster for testing. *

- * Ozone is made active by setting OZONE_ENABLED = true and - * OZONE_HANDLER_TYPE_KEY = "distributed" + * Ozone is made active by setting OZONE_ENABLED = true * * @throws IOException */ @@ -90,9 +89,6 @@ public static void init() throws Exception { // Fetch the host and port for File System init DatanodeDetails datanodeDetails = cluster.getHddsDatanodes().get(0) .getDatanodeDetails(); - int port = datanodeDetails - .getPort(DatanodeDetails.Port.Name.REST).getValue(); - String host = datanodeDetails.getHostName(); // Set the fs.defaultFS and start the filesystem String uri = String.format("%s://%s.%s/", diff --git a/hadoop-ozone/ozonefs/src/test/java/org/apache/hadoop/fs/ozone/contract/ITestOzoneContractGetFileStatus.java b/hadoop-ozone/ozonefs/src/test/java/org/apache/hadoop/fs/ozone/contract/ITestOzoneContractGetFileStatus.java index 98bbb141ad5..fe442f76ca0 100644 --- a/hadoop-ozone/ozonefs/src/test/java/org/apache/hadoop/fs/ozone/contract/ITestOzoneContractGetFileStatus.java +++ b/hadoop-ozone/ozonefs/src/test/java/org/apache/hadoop/fs/ozone/contract/ITestOzoneContractGetFileStatus.java @@ -50,7 +50,7 @@ protected AbstractFSContract createContract(Configuration conf) { @Override public void teardown() throws Exception { - getLog().info("FS details {}", getFileSystem()); + getLogger().info("FS details {}", getFileSystem()); super.teardown(); } diff --git a/hadoop-ozone/ozonefs/src/test/resources/log4j.properties b/hadoop-ozone/ozonefs/src/test/resources/log4j.properties index 3bf1619d227..a7b5aa93e9c 100644 --- a/hadoop-ozone/ozonefs/src/test/resources/log4j.properties +++ b/hadoop-ozone/ozonefs/src/test/resources/log4j.properties @@ -17,6 +17,7 @@ log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.logger.org.apache.hadoop.security.ShellBasedUnixGroupsMapping=ERROR log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR # for debugging low level Ozone operations, uncomment this line diff --git a/hadoop-ozone/pom.xml b/hadoop-ozone/pom.xml index b655088c57b..c73be60379b 100644 --- a/hadoop-ozone/pom.xml +++ b/hadoop-ozone/pom.xml @@ -24,11 +24,16 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> ../hadoop-project-dist hadoop-ozone - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Ozone Project Apache Hadoop Ozone pom + + 0.3.0-SNAPSHOT + Arches + ${ozone.version} + common client @@ -44,47 +49,38 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-common - provided org.apache.hadoop hadoop-hdfs - provided org.apache.hadoop hadoop-hdfs-client - provided org.apache.hadoop hadoop-hdds-common - provided org.apache.hadoop hadoop-hdds-server-framework - provided org.apache.hadoop hadoop-hdds-server-scm - provided org.apache.hadoop hadoop-hdds-container-service - provided org.apache.hadoop hadoop-hdds-client - provided org.apache.hadoop hadoop-hdds-tools - provided org.apache.hadoop @@ -124,48 +120,37 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> apache-rat-plugin + **/*.json + **/hs_err*.log **/target/** - acceptance-test/hadoop-ozone-acceptance-test.iml .gitattributes .idea/** + dev-support/*tests dev-support/checkstyle* dev-support/jdiff/** - dev-support/*tests - src/test/empty-file - src/test/all-tests - src/test/resources/*.tgz - src/test/resources/data* - **/*.json - src/test/resources/empty-file + src/contrib/** src/main/webapps/datanode/robots.txt src/main/webapps/hdfs/robots.txt src/main/webapps/journal/robots.txt - src/main/webapps/secondary/robots.txt src/main/webapps/router/robots.txt - src/contrib/** + src/main/webapps/secondary/robots.txt src/site/resources/images/* + src/test/all-tests + src/test/empty-file + src/test/resources/*.tgz + src/test/resources/data* + src/test/resources/empty-file + webapps/static/angular-1.6.4.min.js + webapps/static/angular-nvd3-1.0.9.min.js + webapps/static/angular-route-1.6.4.min.js webapps/static/bootstrap-3.3.7/** + webapps/static/d3-3.5.17.min.js webapps/static/jquery-3.3.1.min.js webapps/static/jquery.dataTables.min.js webapps/static/nvd3-1.8.5.min.css.map - webapps/static/nvd3-1.8.5.min.js - webapps/static/angular-route-1.6.4.min.js - webapps/static/nvd3-1.8.5.min.css - webapps/static/angular-nvd3-1.0.9.min.js webapps/static/nvd3-1.8.5.min.js.map - webapps/static/angular-1.6.4.min.js - webapps/static/d3-3.5.17.min.js - static/OzoneOverview.svg - themes/ozonedoc/static/js/jquery.min.js - themes/ozonedoc/static/js/bootstrap.min.js - themes/ozonedoc/static/css/bootstrap.min.css - themes/ozonedoc/static/css/bootstrap.min.css.map - themes/ozonedoc/static/css/bootstrap-theme.min.css - themes/ozonedoc/static/css/bootstrap-theme.min.css.map - themes/ozonedoc/static/fonts/glyphicons-halflings-regular.svg - themes/ozonedoc/layouts/index.html - themes/ozonedoc/theme.toml + webapps/static/nvd3-1.8.5.min.js @@ -178,4 +163,53 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + + + parallel-tests + + + + org.apache.hadoop + hadoop-maven-plugins + + + parallel-tests-createdir + + parallel-tests-createdir + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + ${testsThreadCount} + false + ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true + + ${testsThreadCount} + ${test.build.data}/${surefire.forkNumber} + ${test.build.dir}/${surefire.forkNumber} + ${hadoop.tmp.dir}/${surefire.forkNumber} + + + + + + ${test.build.data} + + + + + + fork-${surefire.forkNumber} + + + + + + + diff --git a/hadoop-ozone/tools/pom.xml b/hadoop-ozone/tools/pom.xml index a78565ae7f1..eeec595ee41 100644 --- a/hadoop-ozone/tools/pom.xml +++ b/hadoop-ozone/tools/pom.xml @@ -20,16 +20,16 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-ozone - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT hadoop-ozone-tools - 0.2.1-SNAPSHOT + 0.3.0-SNAPSHOT Apache Hadoop Ozone Tools Apache Hadoop Ozone Tools jar - hdds + ozone true diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/Freon.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/Freon.java index ab52b86c23a..f9e8c9b97ce 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/Freon.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/Freon.java @@ -14,1136 +14,26 @@ * License for the specific language governing permissions and limitations under * the License. */ - package org.apache.hadoop.ozone.freon; -import com.codahale.metrics.Histogram; -import com.codahale.metrics.Snapshot; -import com.codahale.metrics.UniformReservoir; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.PropertyAccessor; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectWriter; -import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.Options; -import org.apache.commons.lang3.ArrayUtils; -import org.apache.commons.lang3.RandomStringUtils; -import org.apache.commons.lang3.time.DurationFormatUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.hdds.client.OzoneQuota; -import org.apache.hadoop.hdds.client.ReplicationFactor; -import org.apache.hadoop.hdds.client.ReplicationType; -import org.apache.hadoop.hdfs.DFSUtil; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.client.*; -import org.apache.hadoop.ozone.client.io.OzoneInputStream; -import org.apache.hadoop.ozone.client.io.OzoneOutputStream; -import org.apache.hadoop.util.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.text.SimpleDateFormat; -import java.util.*; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Supplier; +import org.apache.hadoop.hdds.cli.GenericCli; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; -import static java.lang.Math.min; +import picocli.CommandLine.Command; /** - * Freon - A tool to populate ozone with data for testing.
- * This is not a map-reduce program and this is not for benchmarking - * Ozone write throughput.
- * It supports both online and offline modes. Default mode is offline, - * -mode can be used to change the mode. - *

- * In online mode, active internet connection is required, - * common crawl data from AWS will be used.
- * Default source is:
- * https://commoncrawl.s3.amazonaws.com/crawl-data/ - * CC-MAIN-2017-17/warc.paths.gz
- * (it contains the path to actual data segment)
- * User can override this using -source. - * The following values are derived from URL of Common Crawl data - *

    - *
  • Domain will be used as Volume
  • - *
  • URL will be used as Bucket
  • - *
  • FileName will be used as Key
  • - *

- * In offline mode, the data will be random bytes and - * size of data will be 10 KB.
- *
    - *
  • Default number of Volumes 10, -numOfVolumes - * can be used to override
  • - *
  • Default number of Buckets per Volume 1000, -numOfBuckets - * can be used to override
  • - *
  • Default number of Keys per Bucket 500000, -numOfKeys - * can be used to override
  • - *
+ * Ozone data generator and performance test tool. */ -public final class Freon extends Configured implements Tool { - - enum FreonOps { - VOLUME_CREATE, - BUCKET_CREATE, - KEY_CREATE, - KEY_WRITE - } - - private static final String HELP = "help"; - private static final String MODE = "mode"; - private static final String SOURCE = "source"; - private static final String VALIDATE_WRITE = "validateWrites"; - private static final String JSON_WRITE_DIRECTORY = "jsonDir"; - private static final String NUM_OF_THREADS = "numOfThreads"; - private static final String NUM_OF_VOLUMES = "numOfVolumes"; - private static final String NUM_OF_BUCKETS = "numOfBuckets"; - private static final String NUM_OF_KEYS = "numOfKeys"; - private static final String KEY_SIZE = "keySize"; - private static final String RATIS = "ratis"; - - private static final String MODE_DEFAULT = "offline"; - private static final String SOURCE_DEFAULT = - "https://commoncrawl.s3.amazonaws.com/" + - "crawl-data/CC-MAIN-2017-17/warc.paths.gz"; - private static final String NUM_OF_THREADS_DEFAULT = "10"; - private static final String NUM_OF_VOLUMES_DEFAULT = "10"; - private static final String NUM_OF_BUCKETS_DEFAULT = "1000"; - private static final String NUM_OF_KEYS_DEFAULT = "500000"; - private static final String DURATION_FORMAT = "HH:mm:ss,SSS"; - - private static final int KEY_SIZE_DEFAULT = 10240; - private static final int QUANTILES = 10; - - private static final Logger LOG = - LoggerFactory.getLogger(Freon.class); - - private boolean printUsage = false; - private boolean completed = false; - private boolean exception = false; - - private String mode; - private String source; - private String numOfThreads; - private String numOfVolumes; - private String numOfBuckets; - private String numOfKeys; - private String jsonDir; - private boolean useRatis; - private ReplicationType type; - private ReplicationFactor factor; - - private int threadPoolSize; - private int keySize; - private byte[] keyValue = null; - - private boolean validateWrites; - - private OzoneClient ozoneClient; - private ObjectStore objectStore; - private ExecutorService processor; - - private long startTime; - private long jobStartTime; - - private AtomicLong volumeCreationTime; - private AtomicLong bucketCreationTime; - private AtomicLong keyCreationTime; - private AtomicLong keyWriteTime; - - private AtomicLong totalBytesWritten; - - private AtomicInteger numberOfVolumesCreated; - private AtomicInteger numberOfBucketsCreated; - private AtomicLong numberOfKeysAdded; - - private Long totalWritesValidated; - private Long writeValidationSuccessCount; - private Long writeValidationFailureCount; - - private BlockingQueue validationQueue; - private ArrayList histograms = new ArrayList<>(); - - @VisibleForTesting - Freon(Configuration conf) throws IOException { - startTime = System.nanoTime(); - jobStartTime = System.currentTimeMillis(); - volumeCreationTime = new AtomicLong(); - bucketCreationTime = new AtomicLong(); - keyCreationTime = new AtomicLong(); - keyWriteTime = new AtomicLong(); - totalBytesWritten = new AtomicLong(); - numberOfVolumesCreated = new AtomicInteger(); - numberOfBucketsCreated = new AtomicInteger(); - numberOfKeysAdded = new AtomicLong(); - ozoneClient = OzoneClientFactory.getClient(conf); - objectStore = ozoneClient.getObjectStore(); - for (FreonOps ops : FreonOps.values()) { - histograms.add(ops.ordinal(), new Histogram(new UniformReservoir())); - } - } - - /** - * @param args arguments - */ - public static void main(String[] args) throws Exception { - Configuration conf = new OzoneConfiguration(); - int res = ToolRunner.run(conf, new Freon(conf), args); - System.exit(res); - } - - @Override - public int run(String[] args) throws Exception { - GenericOptionsParser parser = new GenericOptionsParser(getConf(), - getOptions(), args); - parseOptions(parser.getCommandLine()); - if (printUsage) { - usage(); - return 0; - } - - keyValue = - DFSUtil.string2Bytes(RandomStringUtils.randomAscii(keySize - 36)); - - LOG.info("Number of Threads: " + numOfThreads); - threadPoolSize = - min(Integer.parseInt(numOfVolumes), Integer.parseInt(numOfThreads)); - processor = Executors.newFixedThreadPool(threadPoolSize); - addShutdownHook(); - if (mode.equals("online")) { - LOG.info("Mode: online"); - throw new UnsupportedOperationException("Not yet implemented."); - } else { - LOG.info("Mode: offline"); - LOG.info("Number of Volumes: {}.", numOfVolumes); - LOG.info("Number of Buckets per Volume: {}.", numOfBuckets); - LOG.info("Number of Keys per Bucket: {}.", numOfKeys); - LOG.info("Key size: {} bytes", keySize); - for (int i = 0; i < Integer.parseInt(numOfVolumes); i++) { - String volume = "vol-" + i + "-" + - RandomStringUtils.randomNumeric(5); - processor.submit(new OfflineProcessor(volume)); - } - } - Thread validator = null; - if (validateWrites) { - totalWritesValidated = 0L; - writeValidationSuccessCount = 0L; - writeValidationFailureCount = 0L; - - validationQueue = - new ArrayBlockingQueue<>(Integer.parseInt(numOfThreads)); - validator = new Thread(new Validator()); - validator.start(); - LOG.info("Data validation is enabled."); - } - Thread progressbar = getProgressBarThread(); - LOG.info("Starting progress bar Thread."); - progressbar.start(); - processor.shutdown(); - processor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS); - completed = true; - progressbar.join(); - if (validateWrites) { - validator.join(); - } - ozoneClient.close(); - return 0; - } - - private Options getOptions() { - Options options = new Options(); - - OptionBuilder.withDescription("prints usage."); - Option optHelp = OptionBuilder.create(HELP); - - OptionBuilder.withArgName("online | offline"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("specifies the mode of " + - "Freon run."); - Option optMode = OptionBuilder.create(MODE); - - OptionBuilder.withArgName("source url"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("specifies the URL of s3 " + - "commoncrawl warc file to be used when the mode is online."); - Option optSource = OptionBuilder.create(SOURCE); - - OptionBuilder.withDescription("do random validation of " + - "data written into ozone, only subset of data is validated."); - Option optValidateWrite = OptionBuilder.create(VALIDATE_WRITE); - - - OptionBuilder.withDescription("directory where json is created"); - OptionBuilder.hasArg(); - Option optJsonDir = OptionBuilder.create(JSON_WRITE_DIRECTORY); - - OptionBuilder.withArgName("value"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("number of threads to be launched " + - "for the run"); - Option optNumOfThreads = OptionBuilder.create(NUM_OF_THREADS); - - OptionBuilder.withArgName("value"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("specifies number of Volumes to be " + - "created in offline mode"); - Option optNumOfVolumes = OptionBuilder.create(NUM_OF_VOLUMES); - - OptionBuilder.withArgName("value"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("specifies number of Buckets to be " + - "created per Volume in offline mode"); - Option optNumOfBuckets = OptionBuilder.create(NUM_OF_BUCKETS); - - OptionBuilder.withArgName("value"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("specifies number of Keys to be " + - "created per Bucket in offline mode"); - Option optNumOfKeys = OptionBuilder.create(NUM_OF_KEYS); - - OptionBuilder.withArgName("value"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("specifies the size of Key in bytes to be " + - "created in offline mode"); - Option optKeySize = OptionBuilder.create(KEY_SIZE); - - OptionBuilder.withArgName(RATIS); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("Use Ratis as the default replication " + - "strategy"); - Option optRatis = OptionBuilder.create(RATIS); - - options.addOption(optHelp); - options.addOption(optMode); - options.addOption(optSource); - options.addOption(optValidateWrite); - options.addOption(optJsonDir); - options.addOption(optNumOfThreads); - options.addOption(optNumOfVolumes); - options.addOption(optNumOfBuckets); - options.addOption(optNumOfKeys); - options.addOption(optKeySize); - options.addOption(optRatis); - return options; - } - - private void parseOptions(CommandLine cmdLine) { - printUsage = cmdLine.hasOption(HELP); - - mode = cmdLine.getOptionValue(MODE, MODE_DEFAULT); - - source = cmdLine.getOptionValue(SOURCE, SOURCE_DEFAULT); - - numOfThreads = - cmdLine.getOptionValue(NUM_OF_THREADS, NUM_OF_THREADS_DEFAULT); - - validateWrites = cmdLine.hasOption(VALIDATE_WRITE); - - jsonDir = cmdLine.getOptionValue(JSON_WRITE_DIRECTORY); - - numOfVolumes = - cmdLine.getOptionValue(NUM_OF_VOLUMES, NUM_OF_VOLUMES_DEFAULT); - - numOfBuckets = - cmdLine.getOptionValue(NUM_OF_BUCKETS, NUM_OF_BUCKETS_DEFAULT); - - numOfKeys = cmdLine.getOptionValue(NUM_OF_KEYS, NUM_OF_KEYS_DEFAULT); - - keySize = cmdLine.hasOption(KEY_SIZE) ? - Integer.parseInt(cmdLine.getOptionValue(KEY_SIZE)) : KEY_SIZE_DEFAULT; - if (keySize < 1024) { - throw new IllegalArgumentException( - "keySize can not be less than 1024 bytes"); - } - - useRatis = cmdLine.hasOption(RATIS); - - type = ReplicationType.STAND_ALONE; - factor = ReplicationFactor.ONE; - - if (useRatis) { - type = ReplicationType.RATIS; - int replicationFactor = Integer.parseInt(cmdLine.getOptionValue(RATIS)); - switch (replicationFactor) { - case 1: - factor = ReplicationFactor.ONE; - break; - case 3: - factor = ReplicationFactor.THREE; - break; - default: - throw new IllegalArgumentException("Illegal replication factor:" - + replicationFactor); - } - } - } - - private void usage() { - System.out.println("Options supported are:"); - System.out.println("-numOfThreads " - + "number of threads to be launched for the run."); - System.out.println("-validateWrites " - + "do random validation of data written into ozone, " + - "only subset of data is validated."); - System.out.println("-jsonDir " - + "directory where json is created."); - System.out.println("-mode [online | offline] " - + "specifies the mode in which Freon should run."); - System.out.println("-source " - + "specifies the URL of s3 commoncrawl warc file to " + - "be used when the mode is online."); - System.out.println("-numOfVolumes " - + "specifies number of Volumes to be created in offline mode"); - System.out.println("-numOfBuckets " - + "specifies number of Buckets to be created per Volume " + - "in offline mode"); - System.out.println("-numOfKeys " - + "specifies number of Keys to be created per Bucket " + - "in offline mode"); - System.out.println("-keySize " - + "specifies the size of Key in bytes to be created in offline mode"); - System.out.println("-help " - + "prints usage."); - System.out.println(); - } - - /** - * Adds ShutdownHook to print statistics. - */ - private void addShutdownHook() { - Runtime.getRuntime().addShutdownHook( - new Thread(() -> printStats(System.out))); - } - - private Thread getProgressBarThread() { - Supplier currentValue; - long maxValue; - - if (mode.equals("online")) { - throw new UnsupportedOperationException("Not yet implemented."); - } else { - currentValue = () -> numberOfKeysAdded.get(); - maxValue = Long.parseLong(numOfVolumes) * - Long.parseLong(numOfBuckets) * - Long.parseLong(numOfKeys); - } - Thread progressBarThread = new Thread( - new ProgressBar(System.out, currentValue, maxValue)); - progressBarThread.setName("ProgressBar"); - return progressBarThread; - } - - /** - * Prints stats of {@link Freon} run to the PrintStream. - * - * @param out PrintStream - */ - private void printStats(PrintStream out) { - long endTime = System.nanoTime() - startTime; - String execTime = DurationFormatUtils - .formatDuration(TimeUnit.NANOSECONDS.toMillis(endTime), - DURATION_FORMAT); - - long volumeTime = TimeUnit.NANOSECONDS.toMillis(volumeCreationTime.get()) - / threadPoolSize; - String prettyAverageVolumeTime = - DurationFormatUtils.formatDuration(volumeTime, DURATION_FORMAT); - - long bucketTime = TimeUnit.NANOSECONDS.toMillis(bucketCreationTime.get()) - / threadPoolSize; - String prettyAverageBucketTime = - DurationFormatUtils.formatDuration(bucketTime, DURATION_FORMAT); - - long averageKeyCreationTime = - TimeUnit.NANOSECONDS.toMillis(keyCreationTime.get()) - / threadPoolSize; - String prettyAverageKeyCreationTime = DurationFormatUtils - .formatDuration(averageKeyCreationTime, DURATION_FORMAT); - - long averageKeyWriteTime = - TimeUnit.NANOSECONDS.toMillis(keyWriteTime.get()) / threadPoolSize; - String prettyAverageKeyWriteTime = DurationFormatUtils - .formatDuration(averageKeyWriteTime, DURATION_FORMAT); - - out.println(); - out.println("***************************************************"); - out.println("Status: " + (exception ? "Failed" : "Success")); - out.println("Git Base Revision: " + VersionInfo.getRevision()); - out.println("Number of Volumes created: " + numberOfVolumesCreated); - out.println("Number of Buckets created: " + numberOfBucketsCreated); - out.println("Number of Keys added: " + numberOfKeysAdded); - out.println("Ratis replication factor: " + factor.name()); - out.println("Ratis replication type: " + type.name()); - out.println( - "Average Time spent in volume creation: " + prettyAverageVolumeTime); - out.println( - "Average Time spent in bucket creation: " + prettyAverageBucketTime); - out.println( - "Average Time spent in key creation: " + prettyAverageKeyCreationTime); - out.println( - "Average Time spent in key write: " + prettyAverageKeyWriteTime); - out.println("Total bytes written: " + totalBytesWritten); - if (validateWrites) { - out.println("Total number of writes validated: " + - totalWritesValidated); - out.println("Writes validated: " + - (100.0 * totalWritesValidated / numberOfKeysAdded.get()) - + " %"); - out.println("Successful validation: " + - writeValidationSuccessCount); - out.println("Unsuccessful validation: " + - writeValidationFailureCount); - } - out.println("Total Execution time: " + execTime); - out.println("***************************************************"); - - if (jsonDir != null) { - - String[][] quantileTime = - new String[FreonOps.values().length][QUANTILES + 1]; - String[] deviations = new String[FreonOps.values().length]; - String[] means = new String[FreonOps.values().length]; - for (FreonOps ops : FreonOps.values()) { - Snapshot snapshot = histograms.get(ops.ordinal()).getSnapshot(); - for (int i = 0; i <= QUANTILES; i++) { - quantileTime[ops.ordinal()][i] = DurationFormatUtils.formatDuration( - TimeUnit.NANOSECONDS - .toMillis((long) snapshot.getValue((1.0 / QUANTILES) * i)), - DURATION_FORMAT); - } - deviations[ops.ordinal()] = DurationFormatUtils.formatDuration( - TimeUnit.NANOSECONDS.toMillis((long) snapshot.getStdDev()), - DURATION_FORMAT); - means[ops.ordinal()] = DurationFormatUtils.formatDuration( - TimeUnit.NANOSECONDS.toMillis((long) snapshot.getMean()), - DURATION_FORMAT); - } - - FreonJobInfo jobInfo = new FreonJobInfo().setExecTime(execTime) - .setGitBaseRevision(VersionInfo.getRevision()) - .setMeanVolumeCreateTime(means[FreonOps.VOLUME_CREATE.ordinal()]) - .setDeviationVolumeCreateTime( - deviations[FreonOps.VOLUME_CREATE.ordinal()]) - .setTenQuantileVolumeCreateTime( - quantileTime[FreonOps.VOLUME_CREATE.ordinal()]) - .setMeanBucketCreateTime(means[FreonOps.BUCKET_CREATE.ordinal()]) - .setDeviationBucketCreateTime( - deviations[FreonOps.BUCKET_CREATE.ordinal()]) - .setTenQuantileBucketCreateTime( - quantileTime[FreonOps.BUCKET_CREATE.ordinal()]) - .setMeanKeyCreateTime(means[FreonOps.KEY_CREATE.ordinal()]) - .setDeviationKeyCreateTime(deviations[FreonOps.KEY_CREATE.ordinal()]) - .setTenQuantileKeyCreateTime( - quantileTime[FreonOps.KEY_CREATE.ordinal()]) - .setMeanKeyWriteTime(means[FreonOps.KEY_WRITE.ordinal()]) - .setDeviationKeyWriteTime(deviations[FreonOps.KEY_WRITE.ordinal()]) - .setTenQuantileKeyWriteTime( - quantileTime[FreonOps.KEY_WRITE.ordinal()]); - String jsonName = - new SimpleDateFormat("yyyyMMddHHmmss").format(Time.now()) + ".json"; - String jsonPath = jsonDir + "/" + jsonName; - FileOutputStream os = null; - try { - os = new FileOutputStream(jsonPath); - ObjectMapper mapper = new ObjectMapper(); - mapper.setVisibility(PropertyAccessor.FIELD, - JsonAutoDetect.Visibility.ANY); - ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); - writer.writeValue(os, jobInfo); - } catch (FileNotFoundException e) { - out.println("Json File could not be created for the path: " + jsonPath); - out.println(e); - } catch (IOException e) { - out.println("Json object could not be created"); - out.println(e); - } finally { - try { - if (os != null) { - os.close(); - } - } catch (IOException e) { - LOG.warn("Could not close the output stream for json", e); - } - } - } - } - - /** - * Returns the number of volumes created. - * @return volume count. - */ - @VisibleForTesting - int getNumberOfVolumesCreated() { - return numberOfVolumesCreated.get(); - } - - /** - * Returns the number of buckets created. - * @return bucket count. - */ - @VisibleForTesting - int getNumberOfBucketsCreated() { - return numberOfBucketsCreated.get(); - } - - /** - * Returns the number of keys added. - * @return keys count. - */ - @VisibleForTesting - long getNumberOfKeysAdded() { - return numberOfKeysAdded.get(); - } - - /** - * Returns true if random validation of write is enabled. - * @return validateWrites - */ - @VisibleForTesting - boolean getValidateWrites() { - return validateWrites; - } - - /** - * Returns the number of keys validated. - * @return validated key count. - */ - @VisibleForTesting - long getTotalKeysValidated() { - return totalWritesValidated; - } - - /** - * Returns the number of successful validation. - * @return successful validation count. - */ - @VisibleForTesting - long getSuccessfulValidationCount() { - return writeValidationSuccessCount; - } - - /** - * Returns the number of unsuccessful validation. - * @return unsuccessful validation count. - */ - @VisibleForTesting - long getUnsuccessfulValidationCount() { - return writeValidationFailureCount; - } - - /** - * Returns the length of the common key value initialized. - * @return key value length initialized. - */ - @VisibleForTesting - long getKeyValueLength(){ - return keyValue.length; - } - - /** - * Wrapper to hold ozone key-value pair. - */ - private static class KeyValue { - - /** - * Bucket name associated with the key-value. - */ - private OzoneBucket bucket; - /** - * Key name associated with the key-value. - */ - private String key; - /** - * Value associated with the key-value. - */ - private byte[] value; - - /** - * Constructs a new ozone key-value pair. - * - * @param key key part - * @param value value part - */ - KeyValue(OzoneBucket bucket, String key, byte[] value) { - this.bucket = bucket; - this.key = key; - this.value = value; - } - } - - private class OfflineProcessor implements Runnable { - - private int totalBuckets; - private int totalKeys; - private String volumeName; - - OfflineProcessor(String volumeName) { - this.totalBuckets = Integer.parseInt(numOfBuckets); - this.totalKeys = Integer.parseInt(numOfKeys); - this.volumeName = volumeName; - } - - @Override - public void run() { - LOG.trace("Creating volume: {}", volumeName); - long start = System.nanoTime(); - OzoneVolume volume; - try { - objectStore.createVolume(volumeName); - long volumeCreationDuration = System.nanoTime() - start; - volumeCreationTime.getAndAdd(volumeCreationDuration); - histograms.get(FreonOps.VOLUME_CREATE.ordinal()) - .update(volumeCreationDuration); - numberOfVolumesCreated.getAndIncrement(); - volume = objectStore.getVolume(volumeName); - } catch (IOException e) { - exception = true; - LOG.error("Could not create volume", e); - return; - } - - Long threadKeyWriteTime = 0L; - for (int j = 0; j < totalBuckets; j++) { - String bucketName = "bucket-" + j + "-" + - RandomStringUtils.randomNumeric(5); - try { - LOG.trace("Creating bucket: {} in volume: {}", - bucketName, volume.getName()); - start = System.nanoTime(); - volume.createBucket(bucketName); - long bucketCreationDuration = System.nanoTime() - start; - histograms.get(FreonOps.BUCKET_CREATE.ordinal()) - .update(bucketCreationDuration); - bucketCreationTime.getAndAdd(bucketCreationDuration); - numberOfBucketsCreated.getAndIncrement(); - OzoneBucket bucket = volume.getBucket(bucketName); - for (int k = 0; k < totalKeys; k++) { - String key = "key-" + k + "-" + - RandomStringUtils.randomNumeric(5); - byte[] randomValue = - DFSUtil.string2Bytes(UUID.randomUUID().toString()); - try { - LOG.trace("Adding key: {} in bucket: {} of volume: {}", - key, bucket, volume); - long keyCreateStart = System.nanoTime(); - OzoneOutputStream os = - bucket.createKey(key, keySize, type, factor); - long keyCreationDuration = System.nanoTime() - keyCreateStart; - histograms.get(FreonOps.KEY_CREATE.ordinal()) - .update(keyCreationDuration); - keyCreationTime.getAndAdd(keyCreationDuration); - long keyWriteStart = System.nanoTime(); - os.write(keyValue); - os.write(randomValue); - os.close(); - long keyWriteDuration = System.nanoTime() - keyWriteStart; - threadKeyWriteTime += keyWriteDuration; - histograms.get(FreonOps.KEY_WRITE.ordinal()) - .update(keyWriteDuration); - totalBytesWritten.getAndAdd(keySize); - numberOfKeysAdded.getAndIncrement(); - if (validateWrites) { - byte[] value = ArrayUtils.addAll(keyValue, randomValue); - boolean validate = validationQueue.offer( - new KeyValue(bucket, key, value)); - if (validate) { - LOG.trace("Key {}, is queued for validation.", key); - } - } - } catch (Exception e) { - exception = true; - LOG.error("Exception while adding key: {} in bucket: {}" + - " of volume: {}.", key, bucket, volume, e); - } - } - } catch (Exception e) { - exception = true; - LOG.error("Exception while creating bucket: {}" + - " in volume: {}.", bucketName, volume, e); - } - } - - keyWriteTime.getAndAdd(threadKeyWriteTime); - } - - } - - private final class FreonJobInfo { - - private String status; - private String gitBaseRevision; - private String jobStartTime; - private String numOfVolumes; - private String numOfBuckets; - private String numOfKeys; - private String numOfThreads; - private String mode; - private String dataWritten; - private String execTime; - private String replicationFactor; - private String replicationType; - - private int keySize; - - private String totalThroughputPerSecond; - - private String meanVolumeCreateTime; - private String deviationVolumeCreateTime; - private String[] tenQuantileVolumeCreateTime; - - private String meanBucketCreateTime; - private String deviationBucketCreateTime; - private String[] tenQuantileBucketCreateTime; - - private String meanKeyCreateTime; - private String deviationKeyCreateTime; - private String[] tenQuantileKeyCreateTime; - - private String meanKeyWriteTime; - private String deviationKeyWriteTime; - private String[] tenQuantileKeyWriteTime; - - private FreonJobInfo() { - this.status = exception ? "Failed" : "Success"; - this.numOfVolumes = Freon.this.numOfVolumes; - this.numOfBuckets = Freon.this.numOfBuckets; - this.numOfKeys = Freon.this.numOfKeys; - this.numOfThreads = Freon.this.numOfThreads; - this.keySize = Freon.this.keySize; - this.mode = Freon.this.mode; - this.jobStartTime = Time.formatTime(Freon.this.jobStartTime); - this.replicationFactor = Freon.this.factor.name(); - this.replicationType = Freon.this.type.name(); - - long totalBytes = - Long.parseLong(numOfVolumes) * Long.parseLong(numOfBuckets) * Long - .parseLong(numOfKeys) * keySize; - this.dataWritten = getInStorageUnits((double) totalBytes); - this.totalThroughputPerSecond = getInStorageUnits( - (totalBytes * 1.0) / TimeUnit.NANOSECONDS - .toSeconds(Freon.this.keyWriteTime.get() / threadPoolSize)); - } - - private String getInStorageUnits(Double value) { - double size; - OzoneQuota.Units unit; - if ((long) (value / OzoneConsts.TB) != 0) { - size = value / OzoneConsts.TB; - unit = OzoneQuota.Units.TB; - } else if ((long) (value / OzoneConsts.GB) != 0) { - size = value / OzoneConsts.GB; - unit = OzoneQuota.Units.GB; - } else if ((long) (value / OzoneConsts.MB) != 0) { - size = value / OzoneConsts.MB; - unit = OzoneQuota.Units.MB; - } else if ((long) (value / OzoneConsts.KB) != 0) { - size = value / OzoneConsts.KB; - unit = OzoneQuota.Units.KB; - } else { - size = value; - unit = OzoneQuota.Units.BYTES; - } - return size + " " + unit; - } - - public FreonJobInfo setGitBaseRevision(String gitBaseRevisionVal) { - gitBaseRevision = gitBaseRevisionVal; - return this; - } - - public FreonJobInfo setExecTime(String execTimeVal) { - execTime = execTimeVal; - return this; - } - - public FreonJobInfo setMeanKeyWriteTime(String deviationKeyWriteTimeVal) { - this.meanKeyWriteTime = deviationKeyWriteTimeVal; - return this; - } - - public FreonJobInfo setDeviationKeyWriteTime( - String deviationKeyWriteTimeVal) { - this.deviationKeyWriteTime = deviationKeyWriteTimeVal; - return this; - } - - public FreonJobInfo setTenQuantileKeyWriteTime( - String[] tenQuantileKeyWriteTimeVal) { - this.tenQuantileKeyWriteTime = tenQuantileKeyWriteTimeVal; - return this; - } - - public FreonJobInfo setMeanKeyCreateTime(String deviationKeyWriteTimeVal) { - this.meanKeyCreateTime = deviationKeyWriteTimeVal; - return this; - } - - public FreonJobInfo setDeviationKeyCreateTime( - String deviationKeyCreateTimeVal) { - this.deviationKeyCreateTime = deviationKeyCreateTimeVal; - return this; - } - - public FreonJobInfo setTenQuantileKeyCreateTime( - String[] tenQuantileKeyCreateTimeVal) { - this.tenQuantileKeyCreateTime = tenQuantileKeyCreateTimeVal; - return this; - } - - public FreonJobInfo setMeanBucketCreateTime( - String deviationKeyWriteTimeVal) { - this.meanBucketCreateTime = deviationKeyWriteTimeVal; - return this; - } +@Command( + name = "ozone freon", + description = "Load generator and tester tool for ozone", + subcommands = RandomKeyGenerator.class, + versionProvider = HddsVersionProvider.class, + mixinStandardHelpOptions = true) +public class Freon extends GenericCli { - public FreonJobInfo setDeviationBucketCreateTime( - String deviationBucketCreateTimeVal) { - this.deviationBucketCreateTime = deviationBucketCreateTimeVal; - return this; - } - - public FreonJobInfo setTenQuantileBucketCreateTime( - String[] tenQuantileBucketCreateTimeVal) { - this.tenQuantileBucketCreateTime = tenQuantileBucketCreateTimeVal; - return this; - } - - public FreonJobInfo setMeanVolumeCreateTime( - String deviationKeyWriteTimeVal) { - this.meanVolumeCreateTime = deviationKeyWriteTimeVal; - return this; - } - - public FreonJobInfo setDeviationVolumeCreateTime( - String deviationVolumeCreateTimeVal) { - this.deviationVolumeCreateTime = deviationVolumeCreateTimeVal; - return this; - } - - public FreonJobInfo setTenQuantileVolumeCreateTime( - String[] tenQuantileVolumeCreateTimeVal) { - this.tenQuantileVolumeCreateTime = tenQuantileVolumeCreateTimeVal; - return this; - } - - public String getJobStartTime() { - return jobStartTime; - } - - public String getNumOfVolumes() { - return numOfVolumes; - } - - public String getNumOfBuckets() { - return numOfBuckets; - } - - public String getNumOfKeys() { - return numOfKeys; - } - - public String getNumOfThreads() { - return numOfThreads; - } - - public String getMode() { - return mode; - } - - public String getExecTime() { - return execTime; - } - - public String getReplicationFactor() { - return replicationFactor; - } - - public String getReplicationType() { - return replicationType; - } - - public String getStatus() { - return status; - } - - public int getKeySize() { - return keySize; - } - - public String getGitBaseRevision() { - return gitBaseRevision; - } - - public String getDataWritten() { - return dataWritten; - } - - public String getTotalThroughputPerSecond() { - return totalThroughputPerSecond; - } - - public String getMeanVolumeCreateTime() { - return meanVolumeCreateTime; - } - - public String getDeviationVolumeCreateTime() { - return deviationVolumeCreateTime; - } - - public String[] getTenQuantileVolumeCreateTime() { - return tenQuantileVolumeCreateTime; - } - - public String getMeanBucketCreateTime() { - return meanBucketCreateTime; - } - - public String getDeviationBucketCreateTime() { - return deviationBucketCreateTime; - } - - public String[] getTenQuantileBucketCreateTime() { - return tenQuantileBucketCreateTime; - } - - public String getMeanKeyCreateTime() { - return meanKeyCreateTime; - } - - public String getDeviationKeyCreateTime() { - return deviationKeyCreateTime; - } - - public String[] getTenQuantileKeyCreateTime() { - return tenQuantileKeyCreateTime; - } - - public String getMeanKeyWriteTime() { - return meanKeyWriteTime; - } - - public String getDeviationKeyWriteTime() { - return deviationKeyWriteTime; - } - - public String[] getTenQuantileKeyWriteTime() { - return tenQuantileKeyWriteTime; - } - } - - private class ProgressBar implements Runnable { - - private static final long REFRESH_INTERVAL = 1000L; - - private PrintStream stream; - private Supplier currentValue; - private long maxValue; - - ProgressBar(PrintStream stream, Supplier currentValue, - long maxValue) { - this.stream = stream; - this.currentValue = currentValue; - this.maxValue = maxValue; - } - - @Override - public void run() { - try { - stream.println(); - long value; - while ((value = currentValue.get()) < maxValue) { - print(value); - if (completed) { - break; - } - Thread.sleep(REFRESH_INTERVAL); - } - if (exception) { - stream.println(); - stream.println("Incomplete termination, " + - "check log for exception."); - } else { - print(maxValue); - } - stream.println(); - } catch (InterruptedException e) { - } - } - - /** - * Given current value prints the progress bar. - * - * @param value - */ - private void print(long value) { - stream.print('\r'); - double percent = 100.0 * value / maxValue; - StringBuilder sb = new StringBuilder(); - sb.append(" " + String.format("%.2f", percent) + "% |"); - - for (int i = 0; i <= percent; i++) { - sb.append('█'); - } - for (int j = 0; j < 100 - percent; j++) { - sb.append(' '); - } - sb.append("| "); - sb.append(value + "/" + maxValue); - long timeInSec = TimeUnit.SECONDS.convert( - System.nanoTime() - startTime, TimeUnit.NANOSECONDS); - String timeToPrint = String.format("%d:%02d:%02d", timeInSec / 3600, - (timeInSec % 3600) / 60, timeInSec % 60); - sb.append(" Time: " + timeToPrint); - stream.print(sb); - } + public static void main(String[] args) { + new Freon().run(args); } - /** - * Validates the write done in ozone cluster. - */ - private class Validator implements Runnable { - - @Override - public void run() { - while (!completed) { - try { - KeyValue kv = validationQueue.poll(5, TimeUnit.SECONDS); - if (kv != null) { - - OzoneInputStream is = kv.bucket.readKey(kv.key); - byte[] value = new byte[kv.value.length]; - int length = is.read(value); - totalWritesValidated++; - if (length == kv.value.length && Arrays.equals(value, kv.value)) { - writeValidationSuccessCount++; - } else { - writeValidationFailureCount++; - LOG.warn("Data validation error for key {}/{}/{}", - kv.bucket.getVolumeName(), kv.bucket, kv.key); - LOG.warn("Expected: {}, Actual: {}", - DFSUtil.bytes2String(kv.value), - DFSUtil.bytes2String(value)); - } - } - } catch (IOException | InterruptedException ex) { - LOG.error("Exception while validating write: " + ex.getMessage()); - } - } - } - } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/RandomKeyGenerator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/RandomKeyGenerator.java new file mode 100644 index 00000000000..d73e37e062b --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/RandomKeyGenerator.java @@ -0,0 +1,1039 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.freon; + +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.UUID; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Supplier; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.client.OzoneQuota; +import org.apache.hadoop.hdds.client.ReplicationFactor; +import org.apache.hadoop.hdds.client.ReplicationType; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.client.io.OzoneInputStream; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.util.Time; +import org.apache.hadoop.util.VersionInfo; + +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Snapshot; +import com.codahale.metrics.UniformReservoir; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.PropertyAccessor; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.google.common.annotations.VisibleForTesting; +import static java.lang.Math.min; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.time.DurationFormatUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import picocli.CommandLine.ParentCommand; + +/** + * Data generator tool to generate as much keys as possible. + */ +@Command(name = "randomkeys", + aliases = "rk", + description = "Generate volumes/buckets and put generated keys.", + versionProvider = HddsVersionProvider.class, + mixinStandardHelpOptions = true, + showDefaultValues = true) +public final class RandomKeyGenerator implements Callable { + + @ParentCommand + private Freon freon; + + enum FreonOps { + VOLUME_CREATE, + BUCKET_CREATE, + KEY_CREATE, + KEY_WRITE + } + + private static final String RATIS = "ratis"; + + private static final String DURATION_FORMAT = "HH:mm:ss,SSS"; + + private static final int QUANTILES = 10; + + private static final Logger LOG = + LoggerFactory.getLogger(RandomKeyGenerator.class); + + private boolean completed = false; + private boolean exception = false; + + @Option(names = "--numOfThreads", + description = "number of threads to be launched for the run", + defaultValue = "10") + private int numOfThreads = 10; + + @Option(names = "--numOfVolumes", + description = "specifies number of Volumes to be created in offline mode", + defaultValue = "10") + private int numOfVolumes = 10; + + @Option(names = "--numOfBuckets", + description = "specifies number of Buckets to be created per Volume", + defaultValue = "1000") + private int numOfBuckets = 1000; + + @Option( + names = "--numOfKeys", + description = "specifies number of Keys to be created per Bucket", + defaultValue = "500000" + ) + private int numOfKeys = 500000; + + @Option( + names = "--keySize", + description = "Specifies the size of Key in bytes to be created", + defaultValue = "10240" + ) + private int keySize = 10240; + + @Option( + names = "--json", + description = "directory where json is created." + ) + private String jsonDir; + + @Option( + names = "--replicationType", + description = "Replication type (STAND_ALONE, RATIS)", + defaultValue = "STAND_ALONE" + ) + private ReplicationType type = ReplicationType.STAND_ALONE; + + @Option( + names = "--factor", + description = "Replication factor (ONE, THREE)", + defaultValue = "ONE" + ) + private ReplicationFactor factor = ReplicationFactor.ONE; + + private int threadPoolSize; + private byte[] keyValue = null; + + private boolean validateWrites; + + private OzoneClient ozoneClient; + private ObjectStore objectStore; + private ExecutorService processor; + + private long startTime; + private long jobStartTime; + + private AtomicLong volumeCreationTime; + private AtomicLong bucketCreationTime; + private AtomicLong keyCreationTime; + private AtomicLong keyWriteTime; + + private AtomicLong totalBytesWritten; + + private AtomicInteger numberOfVolumesCreated; + private AtomicInteger numberOfBucketsCreated; + private AtomicLong numberOfKeysAdded; + + private Long totalWritesValidated; + private Long writeValidationSuccessCount; + private Long writeValidationFailureCount; + + private BlockingQueue validationQueue; + private ArrayList histograms = new ArrayList<>(); + + private OzoneConfiguration ozoneConfiguration; + + RandomKeyGenerator() { + } + + @VisibleForTesting + RandomKeyGenerator(OzoneConfiguration ozoneConfiguration) { + this.ozoneConfiguration = ozoneConfiguration; + } + + public void init(OzoneConfiguration configuration) throws IOException { + startTime = System.nanoTime(); + jobStartTime = System.currentTimeMillis(); + volumeCreationTime = new AtomicLong(); + bucketCreationTime = new AtomicLong(); + keyCreationTime = new AtomicLong(); + keyWriteTime = new AtomicLong(); + totalBytesWritten = new AtomicLong(); + numberOfVolumesCreated = new AtomicInteger(); + numberOfBucketsCreated = new AtomicInteger(); + numberOfKeysAdded = new AtomicLong(); + ozoneClient = OzoneClientFactory.getClient(configuration); + objectStore = ozoneClient.getObjectStore(); + for (FreonOps ops : FreonOps.values()) { + histograms.add(ops.ordinal(), new Histogram(new UniformReservoir())); + } + } + + @Override + public Void call() throws Exception { + if (ozoneConfiguration != null) { + init(ozoneConfiguration); + } else { + init(freon.createOzoneConfiguration()); + } + + keyValue = + DFSUtil.string2Bytes(RandomStringUtils.randomAscii(keySize - 36)); + + LOG.info("Number of Threads: " + numOfThreads); + threadPoolSize = + min(numOfVolumes, numOfThreads); + processor = Executors.newFixedThreadPool(threadPoolSize); + addShutdownHook(); + + LOG.info("Number of Volumes: {}.", numOfVolumes); + LOG.info("Number of Buckets per Volume: {}.", numOfBuckets); + LOG.info("Number of Keys per Bucket: {}.", numOfKeys); + LOG.info("Key size: {} bytes", keySize); + for (int i = 0; i < numOfVolumes; i++) { + String volume = "vol-" + i + "-" + + RandomStringUtils.randomNumeric(5); + processor.submit(new OfflineProcessor(volume)); + } + + Thread validator = null; + if (validateWrites) { + totalWritesValidated = 0L; + writeValidationSuccessCount = 0L; + writeValidationFailureCount = 0L; + + validationQueue = + new ArrayBlockingQueue<>(numOfThreads); + validator = new Thread(new Validator()); + validator.start(); + LOG.info("Data validation is enabled."); + } + Thread progressbar = getProgressBarThread(); + LOG.info("Starting progress bar Thread."); + progressbar.start(); + processor.shutdown(); + processor.awaitTermination(Integer.MAX_VALUE, TimeUnit.MILLISECONDS); + completed = true; + progressbar.join(); + if (validateWrites) { + validator.join(); + } + ozoneClient.close(); + return null; + } + + private void parseOptions(CommandLine cmdLine) { + if (keySize < 1024) { + throw new IllegalArgumentException( + "keySize can not be less than 1024 bytes"); + } + + } + + /** + * Adds ShutdownHook to print statistics. + */ + private void addShutdownHook() { + Runtime.getRuntime().addShutdownHook( + new Thread(() -> printStats(System.out))); + } + + private Thread getProgressBarThread() { + Supplier currentValue; + long maxValue; + + currentValue = () -> numberOfKeysAdded.get(); + maxValue = numOfVolumes * + numOfBuckets * + numOfKeys; + + Thread progressBarThread = new Thread( + new ProgressBar(System.out, currentValue, maxValue)); + progressBarThread.setName("ProgressBar"); + return progressBarThread; + } + + /** + * Prints stats of {@link Freon} run to the PrintStream. + * + * @param out PrintStream + */ + private void printStats(PrintStream out) { + long endTime = System.nanoTime() - startTime; + String execTime = DurationFormatUtils + .formatDuration(TimeUnit.NANOSECONDS.toMillis(endTime), + DURATION_FORMAT); + + long volumeTime = TimeUnit.NANOSECONDS.toMillis(volumeCreationTime.get()) + / threadPoolSize; + String prettyAverageVolumeTime = + DurationFormatUtils.formatDuration(volumeTime, DURATION_FORMAT); + + long bucketTime = TimeUnit.NANOSECONDS.toMillis(bucketCreationTime.get()) + / threadPoolSize; + String prettyAverageBucketTime = + DurationFormatUtils.formatDuration(bucketTime, DURATION_FORMAT); + + long averageKeyCreationTime = + TimeUnit.NANOSECONDS.toMillis(keyCreationTime.get()) + / threadPoolSize; + String prettyAverageKeyCreationTime = DurationFormatUtils + .formatDuration(averageKeyCreationTime, DURATION_FORMAT); + + long averageKeyWriteTime = + TimeUnit.NANOSECONDS.toMillis(keyWriteTime.get()) / threadPoolSize; + String prettyAverageKeyWriteTime = DurationFormatUtils + .formatDuration(averageKeyWriteTime, DURATION_FORMAT); + + out.println(); + out.println("***************************************************"); + out.println("Status: " + (exception ? "Failed" : "Success")); + out.println("Git Base Revision: " + VersionInfo.getRevision()); + out.println("Number of Volumes created: " + numberOfVolumesCreated); + out.println("Number of Buckets created: " + numberOfBucketsCreated); + out.println("Number of Keys added: " + numberOfKeysAdded); + out.println("Ratis replication factor: " + factor.name()); + out.println("Ratis replication type: " + type.name()); + out.println( + "Average Time spent in volume creation: " + prettyAverageVolumeTime); + out.println( + "Average Time spent in bucket creation: " + prettyAverageBucketTime); + out.println( + "Average Time spent in key creation: " + prettyAverageKeyCreationTime); + out.println( + "Average Time spent in key write: " + prettyAverageKeyWriteTime); + out.println("Total bytes written: " + totalBytesWritten); + if (validateWrites) { + out.println("Total number of writes validated: " + + totalWritesValidated); + out.println("Writes validated: " + + (100.0 * totalWritesValidated / numberOfKeysAdded.get()) + + " %"); + out.println("Successful validation: " + + writeValidationSuccessCount); + out.println("Unsuccessful validation: " + + writeValidationFailureCount); + } + out.println("Total Execution time: " + execTime); + out.println("***************************************************"); + + if (jsonDir != null) { + + String[][] quantileTime = + new String[FreonOps.values().length][QUANTILES + 1]; + String[] deviations = new String[FreonOps.values().length]; + String[] means = new String[FreonOps.values().length]; + for (FreonOps ops : FreonOps.values()) { + Snapshot snapshot = histograms.get(ops.ordinal()).getSnapshot(); + for (int i = 0; i <= QUANTILES; i++) { + quantileTime[ops.ordinal()][i] = DurationFormatUtils.formatDuration( + TimeUnit.NANOSECONDS + .toMillis((long) snapshot.getValue((1.0 / QUANTILES) * i)), + DURATION_FORMAT); + } + deviations[ops.ordinal()] = DurationFormatUtils.formatDuration( + TimeUnit.NANOSECONDS.toMillis((long) snapshot.getStdDev()), + DURATION_FORMAT); + means[ops.ordinal()] = DurationFormatUtils.formatDuration( + TimeUnit.NANOSECONDS.toMillis((long) snapshot.getMean()), + DURATION_FORMAT); + } + + FreonJobInfo jobInfo = new FreonJobInfo().setExecTime(execTime) + .setGitBaseRevision(VersionInfo.getRevision()) + .setMeanVolumeCreateTime(means[FreonOps.VOLUME_CREATE.ordinal()]) + .setDeviationVolumeCreateTime( + deviations[FreonOps.VOLUME_CREATE.ordinal()]) + .setTenQuantileVolumeCreateTime( + quantileTime[FreonOps.VOLUME_CREATE.ordinal()]) + .setMeanBucketCreateTime(means[FreonOps.BUCKET_CREATE.ordinal()]) + .setDeviationBucketCreateTime( + deviations[FreonOps.BUCKET_CREATE.ordinal()]) + .setTenQuantileBucketCreateTime( + quantileTime[FreonOps.BUCKET_CREATE.ordinal()]) + .setMeanKeyCreateTime(means[FreonOps.KEY_CREATE.ordinal()]) + .setDeviationKeyCreateTime(deviations[FreonOps.KEY_CREATE.ordinal()]) + .setTenQuantileKeyCreateTime( + quantileTime[FreonOps.KEY_CREATE.ordinal()]) + .setMeanKeyWriteTime(means[FreonOps.KEY_WRITE.ordinal()]) + .setDeviationKeyWriteTime(deviations[FreonOps.KEY_WRITE.ordinal()]) + .setTenQuantileKeyWriteTime( + quantileTime[FreonOps.KEY_WRITE.ordinal()]); + String jsonName = + new SimpleDateFormat("yyyyMMddHHmmss").format(Time.now()) + ".json"; + String jsonPath = jsonDir + "/" + jsonName; + FileOutputStream os = null; + try { + os = new FileOutputStream(jsonPath); + ObjectMapper mapper = new ObjectMapper(); + mapper.setVisibility(PropertyAccessor.FIELD, + JsonAutoDetect.Visibility.ANY); + ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); + writer.writeValue(os, jobInfo); + } catch (FileNotFoundException e) { + out.println("Json File could not be created for the path: " + jsonPath); + out.println(e); + } catch (IOException e) { + out.println("Json object could not be created"); + out.println(e); + } finally { + try { + if (os != null) { + os.close(); + } + } catch (IOException e) { + LOG.warn("Could not close the output stream for json", e); + } + } + } + } + + /** + * Returns the number of volumes created. + * + * @return volume count. + */ + @VisibleForTesting + int getNumberOfVolumesCreated() { + return numberOfVolumesCreated.get(); + } + + /** + * Returns the number of buckets created. + * + * @return bucket count. + */ + @VisibleForTesting + int getNumberOfBucketsCreated() { + return numberOfBucketsCreated.get(); + } + + /** + * Returns the number of keys added. + * + * @return keys count. + */ + @VisibleForTesting + long getNumberOfKeysAdded() { + return numberOfKeysAdded.get(); + } + + /** + * Returns true if random validation of write is enabled. + * + * @return validateWrites + */ + @VisibleForTesting + boolean getValidateWrites() { + return validateWrites; + } + + /** + * Returns the number of keys validated. + * + * @return validated key count. + */ + @VisibleForTesting + long getTotalKeysValidated() { + return totalWritesValidated; + } + + /** + * Returns the number of successful validation. + * + * @return successful validation count. + */ + @VisibleForTesting + long getSuccessfulValidationCount() { + return writeValidationSuccessCount; + } + + /** + * Returns the number of unsuccessful validation. + * + * @return unsuccessful validation count. + */ + @VisibleForTesting + long getUnsuccessfulValidationCount() { + return writeValidationFailureCount; + } + + /** + * Returns the length of the common key value initialized. + * + * @return key value length initialized. + */ + @VisibleForTesting + long getKeyValueLength() { + return keyValue.length; + } + + /** + * Wrapper to hold ozone key-value pair. + */ + private static class KeyValue { + + /** + * Bucket name associated with the key-value. + */ + private OzoneBucket bucket; + /** + * Key name associated with the key-value. + */ + private String key; + /** + * Value associated with the key-value. + */ + private byte[] value; + + /** + * Constructs a new ozone key-value pair. + * + * @param key key part + * @param value value part + */ + KeyValue(OzoneBucket bucket, String key, byte[] value) { + this.bucket = bucket; + this.key = key; + this.value = value; + } + } + + private class OfflineProcessor implements Runnable { + + private int totalBuckets; + private int totalKeys; + private String volumeName; + + OfflineProcessor(String volumeName) { + this.totalBuckets = numOfBuckets; + this.totalKeys = numOfKeys; + this.volumeName = volumeName; + } + + @Override + public void run() { + LOG.trace("Creating volume: {}", volumeName); + long start = System.nanoTime(); + OzoneVolume volume; + try { + objectStore.createVolume(volumeName); + long volumeCreationDuration = System.nanoTime() - start; + volumeCreationTime.getAndAdd(volumeCreationDuration); + histograms.get(FreonOps.VOLUME_CREATE.ordinal()) + .update(volumeCreationDuration); + numberOfVolumesCreated.getAndIncrement(); + volume = objectStore.getVolume(volumeName); + } catch (IOException e) { + exception = true; + LOG.error("Could not create volume", e); + return; + } + + Long threadKeyWriteTime = 0L; + for (int j = 0; j < totalBuckets; j++) { + String bucketName = "bucket-" + j + "-" + + RandomStringUtils.randomNumeric(5); + try { + LOG.trace("Creating bucket: {} in volume: {}", + bucketName, volume.getName()); + start = System.nanoTime(); + volume.createBucket(bucketName); + long bucketCreationDuration = System.nanoTime() - start; + histograms.get(FreonOps.BUCKET_CREATE.ordinal()) + .update(bucketCreationDuration); + bucketCreationTime.getAndAdd(bucketCreationDuration); + numberOfBucketsCreated.getAndIncrement(); + OzoneBucket bucket = volume.getBucket(bucketName); + for (int k = 0; k < totalKeys; k++) { + String key = "key-" + k + "-" + + RandomStringUtils.randomNumeric(5); + byte[] randomValue = + DFSUtil.string2Bytes(UUID.randomUUID().toString()); + try { + LOG.trace("Adding key: {} in bucket: {} of volume: {}", + key, bucket, volume); + long keyCreateStart = System.nanoTime(); + OzoneOutputStream os = + bucket.createKey(key, keySize, type, factor); + long keyCreationDuration = System.nanoTime() - keyCreateStart; + histograms.get(FreonOps.KEY_CREATE.ordinal()) + .update(keyCreationDuration); + keyCreationTime.getAndAdd(keyCreationDuration); + long keyWriteStart = System.nanoTime(); + os.write(keyValue); + os.write(randomValue); + os.close(); + long keyWriteDuration = System.nanoTime() - keyWriteStart; + threadKeyWriteTime += keyWriteDuration; + histograms.get(FreonOps.KEY_WRITE.ordinal()) + .update(keyWriteDuration); + totalBytesWritten.getAndAdd(keySize); + numberOfKeysAdded.getAndIncrement(); + if (validateWrites) { + byte[] value = ArrayUtils.addAll(keyValue, randomValue); + boolean validate = validationQueue.offer( + new KeyValue(bucket, key, value)); + if (validate) { + LOG.trace("Key {}, is queued for validation.", key); + } + } + } catch (Exception e) { + exception = true; + LOG.error("Exception while adding key: {} in bucket: {}" + + " of volume: {}.", key, bucket, volume, e); + } + } + } catch (Exception e) { + exception = true; + LOG.error("Exception while creating bucket: {}" + + " in volume: {}.", bucketName, volume, e); + } + } + + keyWriteTime.getAndAdd(threadKeyWriteTime); + } + + } + + private final class FreonJobInfo { + + private String status; + private String gitBaseRevision; + private String jobStartTime; + private int numOfVolumes; + private int numOfBuckets; + private int numOfKeys; + private int numOfThreads; + private String dataWritten; + private String execTime; + private String replicationFactor; + private String replicationType; + + private int keySize; + + private String totalThroughputPerSecond; + + private String meanVolumeCreateTime; + private String deviationVolumeCreateTime; + private String[] tenQuantileVolumeCreateTime; + + private String meanBucketCreateTime; + private String deviationBucketCreateTime; + private String[] tenQuantileBucketCreateTime; + + private String meanKeyCreateTime; + private String deviationKeyCreateTime; + private String[] tenQuantileKeyCreateTime; + + private String meanKeyWriteTime; + private String deviationKeyWriteTime; + private String[] tenQuantileKeyWriteTime; + + private FreonJobInfo() { + this.status = exception ? "Failed" : "Success"; + this.numOfVolumes = RandomKeyGenerator.this.numOfVolumes; + this.numOfBuckets = RandomKeyGenerator.this.numOfBuckets; + this.numOfKeys = RandomKeyGenerator.this.numOfKeys; + this.numOfThreads = RandomKeyGenerator.this.numOfThreads; + this.keySize = RandomKeyGenerator.this.keySize; + this.jobStartTime = Time.formatTime(RandomKeyGenerator.this.jobStartTime); + this.replicationFactor = RandomKeyGenerator.this.factor.name(); + this.replicationType = RandomKeyGenerator.this.type.name(); + + long totalBytes = + (long) numOfVolumes * numOfBuckets * numOfKeys * keySize; + this.dataWritten = getInStorageUnits((double) totalBytes); + this.totalThroughputPerSecond = getInStorageUnits( + (totalBytes * 1.0) / TimeUnit.NANOSECONDS + .toSeconds( + RandomKeyGenerator.this.keyWriteTime.get() / threadPoolSize)); + } + + private String getInStorageUnits(Double value) { + double size; + OzoneQuota.Units unit; + if ((long) (value / OzoneConsts.TB) != 0) { + size = value / OzoneConsts.TB; + unit = OzoneQuota.Units.TB; + } else if ((long) (value / OzoneConsts.GB) != 0) { + size = value / OzoneConsts.GB; + unit = OzoneQuota.Units.GB; + } else if ((long) (value / OzoneConsts.MB) != 0) { + size = value / OzoneConsts.MB; + unit = OzoneQuota.Units.MB; + } else if ((long) (value / OzoneConsts.KB) != 0) { + size = value / OzoneConsts.KB; + unit = OzoneQuota.Units.KB; + } else { + size = value; + unit = OzoneQuota.Units.BYTES; + } + return size + " " + unit; + } + + public FreonJobInfo setGitBaseRevision(String gitBaseRevisionVal) { + gitBaseRevision = gitBaseRevisionVal; + return this; + } + + public FreonJobInfo setExecTime(String execTimeVal) { + execTime = execTimeVal; + return this; + } + + public FreonJobInfo setMeanKeyWriteTime(String deviationKeyWriteTimeVal) { + this.meanKeyWriteTime = deviationKeyWriteTimeVal; + return this; + } + + public FreonJobInfo setDeviationKeyWriteTime( + String deviationKeyWriteTimeVal) { + this.deviationKeyWriteTime = deviationKeyWriteTimeVal; + return this; + } + + public FreonJobInfo setTenQuantileKeyWriteTime( + String[] tenQuantileKeyWriteTimeVal) { + this.tenQuantileKeyWriteTime = tenQuantileKeyWriteTimeVal; + return this; + } + + public FreonJobInfo setMeanKeyCreateTime(String deviationKeyWriteTimeVal) { + this.meanKeyCreateTime = deviationKeyWriteTimeVal; + return this; + } + + public FreonJobInfo setDeviationKeyCreateTime( + String deviationKeyCreateTimeVal) { + this.deviationKeyCreateTime = deviationKeyCreateTimeVal; + return this; + } + + public FreonJobInfo setTenQuantileKeyCreateTime( + String[] tenQuantileKeyCreateTimeVal) { + this.tenQuantileKeyCreateTime = tenQuantileKeyCreateTimeVal; + return this; + } + + public FreonJobInfo setMeanBucketCreateTime( + String deviationKeyWriteTimeVal) { + this.meanBucketCreateTime = deviationKeyWriteTimeVal; + return this; + } + + public FreonJobInfo setDeviationBucketCreateTime( + String deviationBucketCreateTimeVal) { + this.deviationBucketCreateTime = deviationBucketCreateTimeVal; + return this; + } + + public FreonJobInfo setTenQuantileBucketCreateTime( + String[] tenQuantileBucketCreateTimeVal) { + this.tenQuantileBucketCreateTime = tenQuantileBucketCreateTimeVal; + return this; + } + + public FreonJobInfo setMeanVolumeCreateTime( + String deviationKeyWriteTimeVal) { + this.meanVolumeCreateTime = deviationKeyWriteTimeVal; + return this; + } + + public FreonJobInfo setDeviationVolumeCreateTime( + String deviationVolumeCreateTimeVal) { + this.deviationVolumeCreateTime = deviationVolumeCreateTimeVal; + return this; + } + + public FreonJobInfo setTenQuantileVolumeCreateTime( + String[] tenQuantileVolumeCreateTimeVal) { + this.tenQuantileVolumeCreateTime = tenQuantileVolumeCreateTimeVal; + return this; + } + + public String getJobStartTime() { + return jobStartTime; + } + + public int getNumOfVolumes() { + return numOfVolumes; + } + + public int getNumOfBuckets() { + return numOfBuckets; + } + + public int getNumOfKeys() { + return numOfKeys; + } + + public int getNumOfThreads() { + return numOfThreads; + } + + public String getExecTime() { + return execTime; + } + + public String getReplicationFactor() { + return replicationFactor; + } + + public String getReplicationType() { + return replicationType; + } + + public String getStatus() { + return status; + } + + public int getKeySize() { + return keySize; + } + + public String getGitBaseRevision() { + return gitBaseRevision; + } + + public String getDataWritten() { + return dataWritten; + } + + public String getTotalThroughputPerSecond() { + return totalThroughputPerSecond; + } + + public String getMeanVolumeCreateTime() { + return meanVolumeCreateTime; + } + + public String getDeviationVolumeCreateTime() { + return deviationVolumeCreateTime; + } + + public String[] getTenQuantileVolumeCreateTime() { + return tenQuantileVolumeCreateTime; + } + + public String getMeanBucketCreateTime() { + return meanBucketCreateTime; + } + + public String getDeviationBucketCreateTime() { + return deviationBucketCreateTime; + } + + public String[] getTenQuantileBucketCreateTime() { + return tenQuantileBucketCreateTime; + } + + public String getMeanKeyCreateTime() { + return meanKeyCreateTime; + } + + public String getDeviationKeyCreateTime() { + return deviationKeyCreateTime; + } + + public String[] getTenQuantileKeyCreateTime() { + return tenQuantileKeyCreateTime; + } + + public String getMeanKeyWriteTime() { + return meanKeyWriteTime; + } + + public String getDeviationKeyWriteTime() { + return deviationKeyWriteTime; + } + + public String[] getTenQuantileKeyWriteTime() { + return tenQuantileKeyWriteTime; + } + } + + private class ProgressBar implements Runnable { + + private static final long REFRESH_INTERVAL = 1000L; + + private PrintStream stream; + private Supplier currentValue; + private long maxValue; + + ProgressBar(PrintStream stream, Supplier currentValue, + long maxValue) { + this.stream = stream; + this.currentValue = currentValue; + this.maxValue = maxValue; + } + + @Override + public void run() { + try { + stream.println(); + long value; + while ((value = currentValue.get()) < maxValue) { + print(value); + if (completed) { + break; + } + Thread.sleep(REFRESH_INTERVAL); + } + if (exception) { + stream.println(); + stream.println("Incomplete termination, " + + "check log for exception."); + } else { + print(maxValue); + } + stream.println(); + } catch (InterruptedException e) { + } + } + + /** + * Given current value prints the progress bar. + * + * @param value + */ + private void print(long value) { + stream.print('\r'); + double percent = 100.0 * value / maxValue; + StringBuilder sb = new StringBuilder(); + sb.append(" " + String.format("%.2f", percent) + "% |"); + + for (int i = 0; i <= percent; i++) { + sb.append('█'); + } + for (int j = 0; j < 100 - percent; j++) { + sb.append(' '); + } + sb.append("| "); + sb.append(value + "/" + maxValue); + long timeInSec = TimeUnit.SECONDS.convert( + System.nanoTime() - startTime, TimeUnit.NANOSECONDS); + String timeToPrint = String.format("%d:%02d:%02d", timeInSec / 3600, + (timeInSec % 3600) / 60, timeInSec % 60); + sb.append(" Time: " + timeToPrint); + stream.print(sb); + } + } + + /** + * Validates the write done in ozone cluster. + */ + private class Validator implements Runnable { + + @Override + public void run() { + while (!completed) { + try { + KeyValue kv = validationQueue.poll(5, TimeUnit.SECONDS); + if (kv != null) { + + OzoneInputStream is = kv.bucket.readKey(kv.key); + byte[] value = new byte[kv.value.length]; + int length = is.read(value); + totalWritesValidated++; + if (length == kv.value.length && Arrays.equals(value, kv.value)) { + writeValidationSuccessCount++; + } else { + writeValidationFailureCount++; + LOG.warn("Data validation error for key {}/{}/{}", + kv.bucket.getVolumeName(), kv.bucket, kv.key); + LOG.warn("Expected checksum: {}, Actual checksum: {}", + DigestUtils.md5Hex(kv.value), + DigestUtils.md5Hex(value)); + } + } + } catch (IOException | InterruptedException ex) { + LOG.error("Exception while validating write: " + ex.getMessage()); + } + } + } + } + + @VisibleForTesting + public void setNumOfVolumes(int numOfVolumes) { + this.numOfVolumes = numOfVolumes; + } + + @VisibleForTesting + public void setNumOfBuckets(int numOfBuckets) { + this.numOfBuckets = numOfBuckets; + } + + @VisibleForTesting + public void setNumOfKeys(int numOfKeys) { + this.numOfKeys = numOfKeys; + } + + @VisibleForTesting + public void setNumOfThreads(int numOfThreads) { + this.numOfThreads = numOfThreads; + } + + @VisibleForTesting + public void setKeySize(int keySize) { + this.keySize = keySize; + } + + @VisibleForTesting + public void setType(ReplicationType type) { + this.type = type; + } + + @VisibleForTesting + public void setFactor(ReplicationFactor factor) { + this.factor = factor; + } + + @VisibleForTesting + public void setValidateWrites(boolean validateWrites) { + this.validateWrites = validateWrites; + } +} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genconf/GenerateOzoneRequiredConfigurations.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genconf/GenerateOzoneRequiredConfigurations.java index 6296c9dc57f..688b7358496 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genconf/GenerateOzoneRequiredConfigurations.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genconf/GenerateOzoneRequiredConfigurations.java @@ -25,6 +25,7 @@ import javax.xml.bind.Marshaller; import java.io.File; import java.net.URL; +import java.nio.file.Files; import java.nio.file.InvalidPathException; import java.nio.file.Paths; import java.util.ArrayList; @@ -94,18 +95,17 @@ public static void main(String[] args) { } /** - * Check if the path is valid. + * Check if the path is valid directory. * * @param path - * @return true, if path is valid, else return false + * @return true, if path is valid directory, else return false */ public static boolean isValidPath(String path) { try { - Paths.get(path); + return Files.isDirectory(Paths.get(path)); } catch (InvalidPathException | NullPointerException ex) { return false; } - return true; } /** @@ -129,12 +129,12 @@ public static boolean canWrite(String path) { public static int generateConfigurations(String path) throws JAXBException { if (!isValidPath(path)) { - System.out.println("Invalid path or insufficient permission"); + System.out.println("Invalid directory path."); return FAILURE; } if (!canWrite(path)) { - System.out.println("Invalid path or insufficient permission"); + System.out.println("Insufficient permission."); return FAILURE; } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/BenchMarkContainerStateMap.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/BenchMarkContainerStateMap.java index c344bbe1f6e..e680dd2a111 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/BenchMarkContainerStateMap.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/BenchMarkContainerStateMap.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.hdds.scm.container.common.helpers.PipelineID; import org.apache.hadoop.hdds.scm.container.states.ContainerStateMap; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.util.Time; @@ -45,14 +46,20 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.OPEN; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED; +/** + * Benchmarks ContainerStateMap class. + */ @State(Scope.Thread) public class BenchMarkContainerStateMap { private ContainerStateMap stateMap; private AtomicInteger containerID; + private AtomicInteger runCount; + private static int errorFrequency = 100; @Setup(Level.Trial) public void initialize() throws IOException { stateMap = new ContainerStateMap(); + runCount = new AtomicInteger(0); Pipeline pipeline = createSingleNodePipeline(UUID.randomUUID().toString()); Preconditions.checkNotNull(pipeline, "Pipeline cannot be null."); int currentCount = 1; @@ -60,7 +67,7 @@ public void initialize() throws IOException { try { ContainerInfo containerInfo = new ContainerInfo.Builder() .setState(CLOSED) - .setPipelineName(pipeline.getPipelineName()) + .setPipelineID(pipeline.getId()) .setReplicationType(pipeline.getType()) .setReplicationFactor(pipeline.getFactor()) // This is bytes allocated for blocks inside container, not the @@ -79,11 +86,11 @@ public void initialize() throws IOException { e.printStackTrace(); } } - for (int y = currentCount; y < 2000; y++) { + for (int y = currentCount; y < 50000; y++) { try { ContainerInfo containerInfo = new ContainerInfo.Builder() .setState(OPEN) - .setPipelineName(pipeline.getPipelineName()) + .setPipelineID(pipeline.getId()) .setReplicationType(pipeline.getType()) .setReplicationFactor(pipeline.getFactor()) // This is bytes allocated for blocks inside container, not the @@ -105,7 +112,7 @@ public void initialize() throws IOException { try { ContainerInfo containerInfo = new ContainerInfo.Builder() .setState(OPEN) - .setPipelineName(pipeline.getPipelineName()) + .setPipelineID(pipeline.getId()) .setReplicationType(pipeline.getType()) .setReplicationFactor(pipeline.getFactor()) // This is bytes allocated for blocks inside container, not the @@ -154,10 +161,10 @@ public static Pipeline createPipeline(String containerName, final Iterator i = ids.iterator(); Preconditions.checkArgument(i.hasNext()); final DatanodeDetails leader = i.next(); - String pipelineName = "TEST-" + UUID.randomUUID().toString().substring(5); final Pipeline pipeline = new Pipeline(leader.getUuidString(), OPEN, - ReplicationType.STAND_ALONE, ReplicationFactor.ONE, pipelineName); + ReplicationType.STAND_ALONE, ReplicationFactor.ONE, + PipelineID.randomId()); pipeline.addMember(leader); for (; i.hasNext();) { pipeline.addMember(i.next()); @@ -168,11 +175,17 @@ public static Pipeline createPipeline(String containerName, @Benchmark public void createContainerBenchMark(BenchMarkContainerStateMap state, Blackhole bh) throws IOException { + ContainerInfo containerInfo = getContainerInfo(state); + state.stateMap.addContainer(containerInfo); + } + + private ContainerInfo getContainerInfo(BenchMarkContainerStateMap state) + throws IOException { Pipeline pipeline = createSingleNodePipeline(UUID.randomUUID().toString()); int cid = state.containerID.incrementAndGet(); - ContainerInfo containerInfo = new ContainerInfo.Builder() + return new ContainerInfo.Builder() .setState(CLOSED) - .setPipelineName(pipeline.getPipelineName()) + .setPipelineID(pipeline.getId()) .setReplicationType(pipeline.getType()) .setReplicationFactor(pipeline.getFactor()) // This is bytes allocated for blocks inside container, not the @@ -185,14 +198,16 @@ public void createContainerBenchMark(BenchMarkContainerStateMap state, .setContainerID(cid) .setDeleteTransactionId(0) .build(); - state.stateMap.addContainer(containerInfo); } @Benchmark public void getMatchingContainerBenchMark(BenchMarkContainerStateMap state, - Blackhole bh) { + Blackhole bh) throws IOException { + if(runCount.incrementAndGet() % errorFrequency == 0) { + state.stateMap.addContainer(getContainerInfo(state)); + } bh.consume(state.stateMap - .getMatchingContainerIDs(OPEN, "BILBO", ReplicationFactor.ONE, + .getMatchingContainerIDs(OPEN, "OZONE", ReplicationFactor.ONE, ReplicationType.STAND_ALONE)); } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/BenchMarkDatanodeDispatcher.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/BenchMarkDatanodeDispatcher.java index 3d4426f82b5..5ac7e0accf8 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/BenchMarkDatanodeDispatcher.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/BenchMarkDatanodeDispatcher.java @@ -17,7 +17,12 @@ */ package org.apache.hadoop.ozone.genesis; -import org.apache.hadoop.hdds.scm.container.common.helpers.Pipeline; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; +import org.apache.hadoop.ozone.container.common.statemachine + .DatanodeStateMachine.DatanodeStates; +import org.apache.hadoop.ozone.container.common.statemachine.StateContext; +import org.apache.hadoop.ozone.container.common.volume.VolumeSet; import org.apache.ratis.shaded.com.google.protobuf.ByteString; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.FileUtils; @@ -25,13 +30,7 @@ import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.ozone.container.common.impl.ChunkManagerImpl; -import org.apache.hadoop.ozone.container.common.impl.ContainerManagerImpl; -import org.apache.hadoop.ozone.container.common.impl.Dispatcher; -import org.apache.hadoop.ozone.container.common.impl.KeyManagerImpl; -import org.apache.hadoop.ozone.container.common.interfaces.ContainerManager; import org.apache.hadoop.util.Time; import org.openjdk.jmh.annotations.Benchmark; @@ -44,19 +43,14 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Random; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; -import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_ROOT_PREFIX; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .ContainerCommandRequestProto; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .CreateContainerRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .ReadChunkRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos @@ -65,63 +59,51 @@ .PutKeyRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos .GetKeyRequestProto; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos - .ContainerData; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor; +/** + * Benchmarks DatanodeDispatcher class. + */ @State(Scope.Benchmark) public class BenchMarkDatanodeDispatcher { private String baseDir; private String datanodeUuid; - private Dispatcher dispatcher; - private Pipeline pipeline; + private HddsDispatcher dispatcher; private ByteString data; private Random random; private AtomicInteger containerCount; private AtomicInteger keyCount; private AtomicInteger chunkCount; - final int initContainers = 100; - final int initKeys = 50; - final int initChunks = 100; + private static final int INIT_CONTAINERS = 100; + private static final int INIT_KEYS = 50; + private static final int INIT_CHUNKS = 100; - List containers; - List keys; - List chunks; + private List containers; + private List keys; + private List chunks; @Setup(Level.Trial) public void initialize() throws IOException { datanodeUuid = UUID.randomUUID().toString(); - pipeline = new Pipeline("127.0.0.1", - LifeCycleState.OPEN, ReplicationType.STAND_ALONE, - ReplicationFactor.ONE, "SA-" + UUID.randomUUID()); // 1 MB of data data = ByteString.copyFromUtf8(RandomStringUtils.randomAscii(1048576)); random = new Random(); Configuration conf = new OzoneConfiguration(); - ContainerManager manager = new ContainerManagerImpl(); baseDir = System.getProperty("java.io.tmpdir") + File.separator + datanodeUuid; // data directory conf.set("dfs.datanode.data.dir", baseDir + File.separator + "data"); - // metadata directory - StorageLocation metadataDir = StorageLocation.parse( - baseDir+ File.separator + CONTAINER_ROOT_PREFIX); - List locations = Arrays.asList(metadataDir); - - manager - .init(conf, locations, GenesisUtil.createDatanodeDetails(datanodeUuid)); - manager.setChunkManager(new ChunkManagerImpl(manager)); - manager.setKeyManager(new KeyManagerImpl(manager, conf)); + ContainerSet containerSet = new ContainerSet(); + VolumeSet volumeSet = new VolumeSet(datanodeUuid, conf); - dispatcher = new Dispatcher(manager, conf); + dispatcher = new HddsDispatcher(conf, containerSet, volumeSet, + new StateContext(conf, DatanodeStates.RUNNING, null)); dispatcher.init(); containerCount = new AtomicInteger(); @@ -133,7 +115,7 @@ public void initialize() throws IOException { chunks = new ArrayList<>(); // Create containers - for (int x = 0; x < initContainers; x++) { + for (int x = 0; x < INIT_CONTAINERS; x++) { long containerID = Time.getUtcTime() + x; ContainerCommandRequestProto req = getCreateContainerCommand(containerID); dispatcher.dispatch(req); @@ -141,21 +123,21 @@ public void initialize() throws IOException { containerCount.getAndIncrement(); } - for (int x = 0; x < initKeys; x++) { + for (int x = 0; x < INIT_KEYS; x++) { keys.add(Time.getUtcTime()+x); } - for (int x = 0; x < initChunks; x++) { + for (int x = 0; x < INIT_CHUNKS; x++) { chunks.add("chunk-" + x); } // Add chunk and keys to the containers - for (int x = 0; x < initKeys; x++) { + for (int x = 0; x < INIT_KEYS; x++) { String chunkName = chunks.get(x); chunkCount.getAndIncrement(); long key = keys.get(x); keyCount.getAndIncrement(); - for (int y = 0; y < initContainers; y++) { + for (int y = 0; y < INIT_CONTAINERS; y++) { long containerID = containers.get(y); BlockID blockID = new BlockID(containerID, key); dispatcher @@ -171,17 +153,14 @@ public void cleanup() throws IOException { FileUtils.deleteDirectory(new File(baseDir)); } - private ContainerCommandRequestProto getCreateContainerCommand(long containerID) { - CreateContainerRequestProto.Builder createRequest = - CreateContainerRequestProto.newBuilder(); - createRequest.setContainerData( - ContainerData.newBuilder().setContainerID( - containerID).build()); - + private ContainerCommandRequestProto getCreateContainerCommand( + long containerID) { ContainerCommandRequestProto.Builder request = ContainerCommandRequestProto.newBuilder(); request.setCmdType(ContainerProtos.Type.CreateContainer); - request.setCreateContainer(createRequest); + request.setContainerID(containerID); + request.setCreateContainer( + ContainerProtos.CreateContainerRequestProto.getDefaultInstance()); request.setDatanodeUuid(datanodeUuid); request.setTraceID(containerID + "-trace"); return request.build(); @@ -198,6 +177,7 @@ private ContainerCommandRequestProto getWriteChunkCommand( ContainerCommandRequestProto.Builder request = ContainerCommandRequestProto .newBuilder(); request.setCmdType(ContainerProtos.Type.WriteChunk) + .setContainerID(blockID.getContainerID()) .setTraceID(getBlockTraceID(blockID)) .setDatanodeUuid(datanodeUuid) .setWriteChunk(writeChunkRequest); @@ -210,9 +190,11 @@ private ContainerCommandRequestProto getReadChunkCommand( .newBuilder() .setBlockID(blockID.getDatanodeBlockIDProtobuf()) .setChunkData(getChunkInfo(blockID, chunkName)); + ContainerCommandRequestProto.Builder request = ContainerCommandRequestProto .newBuilder(); request.setCmdType(ContainerProtos.Type.ReadChunk) + .setContainerID(blockID.getContainerID()) .setTraceID(getBlockTraceID(blockID)) .setDatanodeUuid(datanodeUuid) .setReadChunk(readChunkRequest); @@ -236,22 +218,24 @@ private ContainerCommandRequestProto getPutKeyCommand( PutKeyRequestProto.Builder putKeyRequest = PutKeyRequestProto .newBuilder() .setKeyData(getKeyData(blockID, chunkKey)); + ContainerCommandRequestProto.Builder request = ContainerCommandRequestProto .newBuilder(); request.setCmdType(ContainerProtos.Type.PutKey) + .setContainerID(blockID.getContainerID()) .setTraceID(getBlockTraceID(blockID)) .setDatanodeUuid(datanodeUuid) .setPutKey(putKeyRequest); return request.build(); } - private ContainerCommandRequestProto getGetKeyCommand( - BlockID blockID, String chunkKey) { + private ContainerCommandRequestProto getGetKeyCommand(BlockID blockID) { GetKeyRequestProto.Builder readKeyRequest = GetKeyRequestProto.newBuilder() - .setKeyData(getKeyData(blockID, chunkKey)); + .setBlockID(blockID.getDatanodeBlockIDProtobuf()); ContainerCommandRequestProto.Builder request = ContainerCommandRequestProto .newBuilder() .setCmdType(ContainerProtos.Type.GetKey) + .setContainerID(blockID.getContainerID()) .setTraceID(getBlockTraceID(blockID)) .setDatanodeUuid(datanodeUuid) .setGetKey(readKeyRequest); @@ -300,12 +284,11 @@ public void putKey(BenchMarkDatanodeDispatcher bmdd) { @Benchmark public void getKey(BenchMarkDatanodeDispatcher bmdd) { BlockID blockID = getRandomBlockID(); - String chunkKey = getNewChunkToWrite(); - bmdd.dispatcher.dispatch(getGetKeyCommand(blockID, chunkKey)); + bmdd.dispatcher.dispatch(getGetKeyCommand(blockID)); } // Chunks writes from benchmark only reaches certain containers - // Use initChunks instead of updated counters to guarantee + // Use INIT_CHUNKS instead of updated counters to guarantee // key/chunks are readable. private BlockID getRandomBlockID() { @@ -313,15 +296,15 @@ private BlockID getRandomBlockID() { } private long getRandomContainerID() { - return containers.get(random.nextInt(initContainers)); + return containers.get(random.nextInt(INIT_CONTAINERS)); } private long getRandomKeyID() { - return keys.get(random.nextInt(initKeys)); + return keys.get(random.nextInt(INIT_KEYS)); } private String getRandomChunkToRead() { - return chunks.get(random.nextInt(initChunks)); + return chunks.get(random.nextInt(INIT_CHUNKS)); } private String getNewChunkToWrite() { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/Genesis.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/Genesis.java index 0dc3db75939..3a8efe8e990 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/Genesis.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/genesis/Genesis.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.genesis; +import org.openjdk.jmh.profile.StackProfiler; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; @@ -39,15 +40,15 @@ private Genesis() { public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(BenchMarkContainerStateMap.class.getSimpleName()) - .include(BenchMarkMetadataStoreReads.class.getSimpleName()) - .include(BenchMarkMetadataStoreWrites.class.getSimpleName()) - .include(BenchMarkDatanodeDispatcher.class.getSimpleName()) +// .include(BenchMarkMetadataStoreReads.class.getSimpleName()) +// .include(BenchMarkMetadataStoreWrites.class.getSimpleName()) +// .include(BenchMarkDatanodeDispatcher.class.getSimpleName()) // Commenting this test out, till we support either a command line or a config // file based ability to run tests. // .include(BenchMarkRocksDbStore.class.getSimpleName()) .warmupIterations(5) .measurementIterations(20) - .addProfiler(GenesisMemoryProfiler.class) + .addProfiler(StackProfiler.class) .shouldDoGC(true) .forks(1) .build(); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/scm/cli/SQLCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/scm/cli/SQLCLI.java index 3884eddfc01..522fea9dfd4 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/scm/cli/SQLCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/scm/cli/SQLCLI.java @@ -19,7 +19,6 @@ import com.google.common.base.Preconditions; import com.google.common.primitives.Longs; -import com.google.protobuf.ByteString; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; @@ -31,7 +30,6 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OzoneAclInfo; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.BucketInfo; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.KeyInfo; @@ -56,15 +54,11 @@ import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; -import java.util.HashSet; -import java.util.Set; import static org.apache.hadoop.ozone.OzoneConsts.CONTAINER_DB_SUFFIX; import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; import static org.apache.hadoop.ozone.OzoneConsts.OM_USER_PREFIX; -import static org.apache.hadoop.ozone.OzoneConsts.OM_BUCKET_PREFIX; -import static org.apache.hadoop.ozone.OzoneConsts.OM_VOLUME_PREFIX; -import static org.apache.hadoop.ozone.OzoneConsts.OPEN_CONTAINERS_DB; /** * This is the CLI that can be use to convert an ozone metadata DB into @@ -275,9 +269,6 @@ public int run(String[] args) throws Exception { if (dbName.toString().endsWith(CONTAINER_DB_SUFFIX)) { LOG.info("Converting container DB"); convertContainerDB(dbPath, outPath); - } else if (dbName.toString().equals(OPEN_CONTAINERS_DB)) { - LOG.info("Converting open container DB"); - convertOpenContainerDB(dbPath, outPath); } else if (dbName.toString().equals(OM_DB_NAME)) { LOG.info("Converting om DB"); convertOMDB(dbPath, outPath); @@ -416,12 +407,15 @@ private void insertOMDB(Connection conn, KeyType type, String keyName, } } + // TODO: This has to be fixed. + // we don't have prefix anymore. now each key is written into different + // table. The logic has to be changed. private KeyType getKeyType(String key) { if (key.startsWith(OM_USER_PREFIX)) { return KeyType.USER; - } else if (key.startsWith(OM_VOLUME_PREFIX)) { - return key.replaceFirst(OM_VOLUME_PREFIX, "") - .contains(OM_BUCKET_PREFIX) ? KeyType.BUCKET : KeyType.VOLUME; + } else if (key.startsWith(OM_KEY_PREFIX)) { + return key.replaceFirst(OM_KEY_PREFIX, "") + .contains(OM_KEY_PREFIX) ? KeyType.BUCKET : KeyType.VOLUME; }else { return KeyType.KEY; } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/genconf/TestGenerateOzoneRequiredConfigurations.java b/hadoop-ozone/tools/src/test/java/org/apache/hadoop/ozone/genconf/TestGenerateOzoneRequiredConfigurations.java similarity index 78% rename from hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/genconf/TestGenerateOzoneRequiredConfigurations.java rename to hadoop-ozone/tools/src/test/java/org/apache/hadoop/ozone/genconf/TestGenerateOzoneRequiredConfigurations.java index c2f5eb7f766..8c75ebb1cb7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/genconf/TestGenerateOzoneRequiredConfigurations.java +++ b/hadoop-ozone/tools/src/test/java/org/apache/hadoop/ozone/genconf/TestGenerateOzoneRequiredConfigurations.java @@ -95,8 +95,8 @@ public void testGenerateConfigurationsThroughMainMethod() throws Exception { try (PrintStream ps = new PrintStream(outContent)) { System.setOut(ps); GenerateOzoneRequiredConfigurations.main(args); - Assert.assertThat(outContent.toString(), - CoreMatchers.containsString("ozone-site.xml has been generated at")); + Assert.assertThat(outContent.toString(), CoreMatchers.containsString( + "ozone-site.xml has been generated at")); System.setOut(oldStream); } } @@ -107,7 +107,9 @@ public void testGenerateConfigurationsThroughMainMethod() throws Exception { */ @Test public void generateConfigurationsFailure() throws Exception { - String[] args = new String[]{"-output", "/"}; + File tempPath = getRandomTempDir(); + tempPath.setReadOnly(); + String[] args = new String[]{"-output", tempPath.getAbsolutePath()}; GenerateOzoneRequiredConfigurations.main(args); Assert.assertEquals("Path is valid", @@ -118,10 +120,32 @@ public void generateConfigurationsFailure() throws Exception { Assert.assertEquals("Config file not generated", 1, GenerateOzoneRequiredConfigurations.generateConfigurations(args[1])); + tempPath.setWritable(true); + } + + /** + * Test to avoid generating ozone-site.xml when invalid permission. + * @throws Exception + */ + @Test + public void generateConfigurationsFailureForInvalidPath() throws Exception { + File tempPath = getRandomTempDir(); + tempPath.setReadOnly(); + String[] args = new String[]{"-output", + tempPath.getAbsolutePath() + "/ozone-site.xml"}; + GenerateOzoneRequiredConfigurations.main(args); + + Assert.assertEquals("Path is invalid", false, + GenerateOzoneRequiredConfigurations.isValidPath(args[1])); + + Assert.assertEquals("Config file not generated", 1, + GenerateOzoneRequiredConfigurations.generateConfigurations(args[1])); + tempPath.setWritable(true); } private File getRandomTempDir() throws IOException { - File tempDir = new File(outputBaseDir, RandomStringUtils.randomAlphanumeric(5)); + File tempDir = new File(outputBaseDir, + RandomStringUtils.randomAlphanumeric(5)); FileUtils.forceMkdir(tempDir); return tempDir; } diff --git a/hadoop-ozone/tools/src/test/java/org/apache/hadoop/ozone/genconf/package-info.java b/hadoop-ozone/tools/src/test/java/org/apache/hadoop/ozone/genconf/package-info.java new file mode 100644 index 00000000000..8f58a82a8de --- /dev/null +++ b/hadoop-ozone/tools/src/test/java/org/apache/hadoop/ozone/genconf/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.genconf; +/** + * Tests for ozone genconf tool + */ diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index 5e21b4a5971..fd67a71b2f2 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -115,15 +115,6 @@ org.apache.hadoop* - org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsStandardDoclet - - - org.apache.hadoop - hadoop-annotations - ${hadoop.version} - - - true false @@ -145,7 +136,7 @@ false - 3.0.3 + 3.1.1 -unstable @@ -417,5 +408,30 @@ + + doclet + + (,10) + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsStandardDoclet + + + org.apache.hadoop + hadoop-annotations + ${hadoop.version} + + + true + + + + + diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 8e28afec4e9..275ae6e1d8b 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -71,6 +71,10 @@ 1.9.13 2.9.5 + + 4.5.2 + 4.4.4 + 1.7.25 @@ -97,7 +101,7 @@ 1.0.0-M33 - 0.1.1-alpha-d7d7061-SNAPSHOT + 0.3.0-50588bd-SNAPSHOT 1.0-alpha-1 3.3.1 2.4.12 @@ -142,7 +146,7 @@ 1.0-beta-1 1.0-alpha-8 900 - 1.11.271 + 1.11.375 2.3.4 1.5 + **/ITestDynamoDBMetadataStoreScale.java @@ -216,6 +218,8 @@ **/ITestS3AFileContextStatistics.java **/ITestS3AHuge*.java **/ITestS3AEncryptionSSEC*.java + + **/ITestDynamoDBMetadataStoreScale.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java index 10201f00d3d..f9052fa97b9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java @@ -18,25 +18,29 @@ package org.apache.hadoop.fs.s3a; +import java.io.Closeable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + import com.amazonaws.AmazonClientException; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.AnonymousAWSCredentials; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; import org.apache.hadoop.io.IOUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.Closeable; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.stream.Collectors; - /** * A list of providers. * @@ -62,10 +66,18 @@ public static final String NO_AWS_CREDENTIAL_PROVIDERS = "No AWS Credential Providers"; + static final String + CREDENTIALS_REQUESTED_WHEN_CLOSED + = "Credentials requested after provider list was closed"; + private final List providers = new ArrayList<>(1); private boolean reuseLastProvider = true; private AWSCredentialsProvider lastProvider; + private final AtomicInteger refCount = new AtomicInteger(1); + + private final AtomicBoolean closed = new AtomicBoolean(false); + /** * Empty instance. This is not ready to be used. */ @@ -94,6 +106,9 @@ public void add(AWSCredentialsProvider p) { */ @Override public void refresh() { + if (isClosed()) { + return; + } for (AWSCredentialsProvider provider : providers) { provider.refresh(); } @@ -106,6 +121,11 @@ public void refresh() { */ @Override public AWSCredentials getCredentials() { + if (isClosed()) { + LOG.warn(CREDENTIALS_REQUESTED_WHEN_CLOSED); + throw new NoAuthWithAWSException( + CREDENTIALS_REQUESTED_WHEN_CLOSED); + } checkNotEmpty(); if (reuseLastProvider && lastProvider != null) { return lastProvider.getCredentials(); @@ -136,8 +156,7 @@ public AWSCredentials getCredentials() { if (lastException != null) { message += ": " + lastException; } - throw new AmazonClientException(message, lastException); - + throw new NoAuthWithAWSException(message, lastException); } /** @@ -156,7 +175,7 @@ public AWSCredentials getCredentials() { */ public void checkNotEmpty() { if (providers.isEmpty()) { - throw new AmazonClientException(NO_AWS_CREDENTIAL_PROVIDERS); + throw new NoAuthWithAWSException(NO_AWS_CREDENTIAL_PROVIDERS); } } @@ -178,8 +197,38 @@ public String listProviderNames() { */ @Override public String toString() { - return "AWSCredentialProviderList: " + - StringUtils.join(providers, " "); + return "AWSCredentialProviderList[" + + "refcount= " + refCount.get() + ": [" + + StringUtils.join(providers, ", ") + ']'; + } + + /** + * Get a reference to this object with an updated reference count. + * + * @return a reference to this + */ + public synchronized AWSCredentialProviderList share() { + Preconditions.checkState(!closed.get(), "Provider list is closed"); + refCount.incrementAndGet(); + return this; + } + + /** + * Get the current reference count. + * @return the current ref count + */ + @VisibleForTesting + public int getRefCount() { + return refCount.get(); + } + + /** + * Get the closed flag. + * @return true iff the list is closed. + */ + @VisibleForTesting + public boolean isClosed() { + return closed.get(); } /** @@ -190,9 +239,29 @@ public String toString() { */ @Override public void close() { - for(AWSCredentialsProvider p: providers) { + synchronized (this) { + if (closed.get()) { + // already closed: no-op + return; + } + int remainder = refCount.decrementAndGet(); + if (remainder != 0) { + // still actively used, or somehow things are + // now negative + LOG.debug("Not closing {}", this); + return; + } + // at this point, the closing is going to happen + LOG.debug("Closing {}", this); + closed.set(true); + } + + // do this outside the synchronized block. + for (AWSCredentialsProvider p : providers) { if (p instanceof Closeable) { - IOUtils.closeStream((Closeable)p); + IOUtils.closeStream((Closeable) p); + } else if (p instanceof AutoCloseable) { + S3AUtils.closeAutocloseables(LOG, (AutoCloseable)p); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/BasicAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/BasicAWSCredentialsProvider.java deleted file mode 100644 index 01bcc6a05e5..00000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/BasicAWSCredentialsProvider.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.AWSCredentials; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; - -/** - * BasicAWSCredentialsProvider supports static configuration of access key ID - * and secret access key for use with the AWS SDK. - * - */ -@InterfaceAudience.Private -@InterfaceStability.Stable -public class BasicAWSCredentialsProvider implements AWSCredentialsProvider { - public static final String NAME - = "org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider"; - private final String accessKey; - private final String secretKey; - - - public BasicAWSCredentialsProvider(String accessKey, String secretKey) { - this.accessKey = accessKey; - this.secretKey = secretKey; - } - - public AWSCredentials getCredentials() { - if (!StringUtils.isEmpty(accessKey) && !StringUtils.isEmpty(secretKey)) { - return new BasicAWSCredentials(accessKey, secretKey); - } - throw new CredentialInitializationException( - "Access key or secret key is null"); - } - - public void refresh() {} - - @Override - public String toString() { - return getClass().getSimpleName(); - } - -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index c52193698fb..3fc25daaec3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -84,10 +84,27 @@ private Constants() { public static final String ASSUMED_ROLE_SESSION_DURATION = "fs.s3a.assumed.role.session.duration"; - /** Simple Token Service Endpoint. If unset, uses the default endpoint. */ + /** Security Token Service Endpoint. If unset, uses the default endpoint. */ public static final String ASSUMED_ROLE_STS_ENDPOINT = "fs.s3a.assumed.role.sts.endpoint"; + /** + * Region for the STS endpoint; only relevant if the endpoint + * is set. + */ + public static final String ASSUMED_ROLE_STS_ENDPOINT_REGION = + "fs.s3a.assumed.role.sts.endpoint.region"; + + /** + * Default value for the STS endpoint region; needed for + * v4 signing. + */ + public static final String ASSUMED_ROLE_STS_ENDPOINT_REGION_DEFAULT = + "us-west-1"; + + /** + * Default duration of an assumed role. + */ public static final String ASSUMED_ROLE_SESSION_DURATION_DEFAULT = "30m"; /** list of providers to authenticate for the assumed role. */ @@ -288,13 +305,6 @@ private Constants() { public static final String SERVER_SIDE_ENCRYPTION_KEY = "fs.s3a.server-side-encryption.key"; - /** - * The original key name. Never used in ASF releases, - * but did get into downstream products. - */ - static final String OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY - = "fs.s3a.server-side-encryption-key"; - //override signature algorithm used for signing requests public static final String SIGNING_ALGORITHM = "fs.s3a.signing-algorithm"; @@ -401,6 +411,16 @@ private Constants() { public static final String S3GUARD_DDB_TABLE_NAME_KEY = "fs.s3a.s3guard.ddb.table"; + /** + * A prefix for adding tags to the DDB Table upon creation. + * + * For example: + * fs.s3a.s3guard.ddb.table.tag.mytag + */ + @InterfaceStability.Unstable + public static final String S3GUARD_DDB_TABLE_TAG = + "fs.s3a.s3guard.ddb.table.tag."; + /** * Test table name to use during DynamoDB integration test. * @@ -438,12 +458,20 @@ private Constants() { @InterfaceStability.Unstable public static final String S3GUARD_DDB_MAX_RETRIES = "fs.s3a.s3guard.ddb.max.retries"; + /** - * Max retries on batched DynamoDB operations before giving up and + * Max retries on batched/throttled DynamoDB operations before giving up and * throwing an IOException. Default is {@value}. See core-default.xml for * more detail. */ - public static final int S3GUARD_DDB_MAX_RETRIES_DEFAULT = 9; + public static final int S3GUARD_DDB_MAX_RETRIES_DEFAULT = + DEFAULT_MAX_ERROR_RETRIES; + + @InterfaceStability.Unstable + public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL = + "fs.s3a.s3guard.ddb.throttle.retry.interval"; + public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL_DEFAULT = + "100ms"; /** * Period of time (in milliseconds) to sleep between batches of writes. @@ -470,6 +498,24 @@ private Constants() { public static final String S3GUARD_METASTORE_LOCAL = "org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore"; + /** + * Maximum number of records in LocalMetadataStore. + */ + @InterfaceStability.Unstable + public static final String S3GUARD_METASTORE_LOCAL_MAX_RECORDS = + "fs.s3a.s3guard.local.max_records"; + public static final int DEFAULT_S3GUARD_METASTORE_LOCAL_MAX_RECORDS = 256; + + /** + * Time to live in milliseconds in LocalMetadataStore. + * If zero, time-based expiration is disabled. + */ + @InterfaceStability.Unstable + public static final String S3GUARD_METASTORE_LOCAL_ENTRY_TTL = + "fs.s3a.s3guard.local.ttl"; + public static final int DEFAULT_S3GUARD_METASTORE_LOCAL_ENTRY_TTL + = 10 * 1000; + /** * Use DynamoDB for the metadata: {@value}. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index f33b25eca99..ade317fd607 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -18,59 +18,45 @@ package org.apache.hadoop.fs.s3a; +import java.io.IOException; +import java.net.URI; + import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.S3ClientOptions; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.util.VersionInfo; import org.slf4j.Logger; -import java.io.IOException; -import java.net.URI; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; -import static org.apache.hadoop.fs.s3a.Constants.*; -import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProviderSet; -import static org.apache.hadoop.fs.s3a.S3AUtils.intOption; +import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; /** - * The default factory implementation, which calls the AWS SDK to configure - * and create an {@link AmazonS3Client} that communicates with the S3 service. + * The default {@link S3ClientFactory} implementation. + * This which calls the AWS SDK to configure and create an + * {@link AmazonS3Client} that communicates with the S3 service. */ -public class DefaultS3ClientFactory extends Configured implements - S3ClientFactory { +public class DefaultS3ClientFactory extends Configured + implements S3ClientFactory { protected static final Logger LOG = S3AFileSystem.LOG; @Override - public AmazonS3 createS3Client(URI name) throws IOException { + public AmazonS3 createS3Client(URI name, + final String bucket, + final AWSCredentialsProvider credentials) throws IOException { Configuration conf = getConf(); - AWSCredentialsProvider credentials = - createAWSCredentialProviderSet(name, conf); - final ClientConfiguration awsConf = createAwsConf(getConf()); - AmazonS3 s3 = newAmazonS3Client(credentials, awsConf); - return createAmazonS3Client(s3, conf, credentials, awsConf); + final ClientConfiguration awsConf = S3AUtils.createAwsConf(getConf(), bucket); + return configureAmazonS3Client( + newAmazonS3Client(credentials, awsConf), conf); } /** - * Create a new {@link ClientConfiguration}. - * @param conf The Hadoop configuration - * @return new AWS client configuration - */ - public static ClientConfiguration createAwsConf(Configuration conf) { - final ClientConfiguration awsConf = new ClientConfiguration(); - initConnectionSettings(conf, awsConf); - initProxySupport(conf, awsConf); - initUserAgent(conf, awsConf); - return awsConf; - } - - /** - * Wrapper around constructor for {@link AmazonS3} client. Override this to - * provide an extended version of the client + * Wrapper around constructor for {@link AmazonS3} client. + * Override this to provide an extended version of the client * @param credentials credentials to use * @param awsConf AWS configuration * @return new AmazonS3 client @@ -81,120 +67,17 @@ protected AmazonS3 newAmazonS3Client( } /** - * Initializes all AWS SDK settings related to connection management. - * - * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration - */ - private static void initConnectionSettings(Configuration conf, - ClientConfiguration awsConf) { - awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS, - DEFAULT_MAXIMUM_CONNECTIONS, 1)); - boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, - DEFAULT_SECURE_CONNECTIONS); - awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP); - awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES, - DEFAULT_MAX_ERROR_RETRIES, 0)); - awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT, - DEFAULT_ESTABLISH_TIMEOUT, 0)); - awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT, - DEFAULT_SOCKET_TIMEOUT, 0)); - int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER, - DEFAULT_SOCKET_SEND_BUFFER, 2048); - int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER, - DEFAULT_SOCKET_RECV_BUFFER, 2048); - awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer); - String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); - if (!signerOverride.isEmpty()) { - LOG.debug("Signer override = {}", signerOverride); - awsConf.setSignerOverride(signerOverride); - } - } - - /** - * Initializes AWS SDK proxy support if configured. - * - * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration - * @throws IllegalArgumentException if misconfigured - */ - private static void initProxySupport(Configuration conf, - ClientConfiguration awsConf) throws IllegalArgumentException { - String proxyHost = conf.getTrimmed(PROXY_HOST, ""); - int proxyPort = conf.getInt(PROXY_PORT, -1); - if (!proxyHost.isEmpty()) { - awsConf.setProxyHost(proxyHost); - if (proxyPort >= 0) { - awsConf.setProxyPort(proxyPort); - } else { - if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { - LOG.warn("Proxy host set without port. Using HTTPS default 443"); - awsConf.setProxyPort(443); - } else { - LOG.warn("Proxy host set without port. Using HTTP default 80"); - awsConf.setProxyPort(80); - } - } - String proxyUsername = conf.getTrimmed(PROXY_USERNAME); - String proxyPassword = conf.getTrimmed(PROXY_PASSWORD); - if ((proxyUsername == null) != (proxyPassword == null)) { - String msg = "Proxy error: " + PROXY_USERNAME + " or " + - PROXY_PASSWORD + " set without the other."; - LOG.error(msg); - throw new IllegalArgumentException(msg); - } - awsConf.setProxyUsername(proxyUsername); - awsConf.setProxyPassword(proxyPassword); - awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN)); - awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); - if (LOG.isDebugEnabled()) { - LOG.debug("Using proxy server {}:{} as user {} with password {} on " + - "domain {} as workstation {}", awsConf.getProxyHost(), - awsConf.getProxyPort(), - String.valueOf(awsConf.getProxyUsername()), - awsConf.getProxyPassword(), awsConf.getProxyDomain(), - awsConf.getProxyWorkstation()); - } - } else if (proxyPort >= 0) { - String msg = - "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; - LOG.error(msg); - throw new IllegalArgumentException(msg); - } - } - - /** - * Initializes the User-Agent header to send in HTTP requests to the S3 - * back-end. We always include the Hadoop version number. The user also - * may set an optional custom prefix to put in front of the Hadoop version - * number. The AWS SDK interally appends its own information, which seems - * to include the AWS SDK version, OS and JVM version. + * Configure S3 client from the Hadoop configuration. * - * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration - */ - private static void initUserAgent(Configuration conf, - ClientConfiguration awsConf) { - String userAgent = "Hadoop " + VersionInfo.getVersion(); - String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); - if (!userAgentPrefix.isEmpty()) { - userAgent = userAgentPrefix + ", " + userAgent; - } - LOG.debug("Using User-Agent: {}", userAgent); - awsConf.setUserAgentPrefix(userAgent); - } - - /** - * Creates an {@link AmazonS3Client} from the established configuration. + * This includes: endpoint, Path Access and possibly other + * options. * * @param conf Hadoop configuration - * @param credentials AWS credentials - * @param awsConf AWS SDK configuration * @return S3 client * @throws IllegalArgumentException if misconfigured */ - private static AmazonS3 createAmazonS3Client(AmazonS3 s3, Configuration conf, - AWSCredentialsProvider credentials, ClientConfiguration awsConf) + private static AmazonS3 configureAmazonS3Client(AmazonS3 s3, + Configuration conf) throws IllegalArgumentException { String endPoint = conf.getTrimmed(ENDPOINT, ""); if (!endPoint.isEmpty()) { @@ -206,21 +89,29 @@ private static AmazonS3 createAmazonS3Client(AmazonS3 s3, Configuration conf, throw new IllegalArgumentException(msg, e); } } - enablePathStyleAccessIfRequired(s3, conf); - return s3; + return applyS3ClientOptions(s3, conf); } /** - * Enables path-style access to S3 buckets if configured. By default, the + * Perform any tuning of the {@code S3ClientOptions} settings based on + * the Hadoop configuration. + * This is different from the general AWS configuration creation as + * it is unique to S3 connections. + * + * The {@link Constants#PATH_STYLE_ACCESS} option enables path-style access + * to S3 buckets if configured. By default, the * behavior is to use virtual hosted-style access with URIs of the form - * http://bucketname.s3.amazonaws.com. Enabling path-style access and a + * {@code http://bucketname.s3.amazonaws.com} + * Enabling path-style access and a * region-specific endpoint switches the behavior to use URIs of the form - * http://s3-eu-west-1.amazonaws.com/bucketname. - * + * {@code http://s3-eu-west-1.amazonaws.com/bucketname}. + * It is common to use this when connecting to private S3 servers, as it + * avoids the need to play with DNS entries. * @param s3 S3 client * @param conf Hadoop configuration + * @return the S3 client */ - private static void enablePathStyleAccessIfRequired(AmazonS3 s3, + private static AmazonS3 applyS3ClientOptions(AmazonS3 s3, Configuration conf) { final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false); if (pathStyleAccess) { @@ -229,5 +120,6 @@ private static void enablePathStyleAccessIfRequired(AmazonS3 s3, .setPathStyleAccess(true) .build()); } + return s3; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java index 99ed87da8c1..2cd1aae5baf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java @@ -114,6 +114,16 @@ public S3ObjectSummary summary() { /** Map of key to delay -> time it was created. */ private Map delayedPutKeys = new HashMap<>(); + /** + * Instantiate. + * This subclasses a deprecated constructor of the parent + * {@code AmazonS3Client} class; we can't use the builder API because, + * that only creates the consistent client. + * @param credentials credentials to auth. + * @param clientConfiguration connection settings + * @param conf hadoop configuration. + */ + @SuppressWarnings("deprecation") public InconsistentAmazonS3Client(AWSCredentialsProvider credentials, ClientConfiguration clientConfiguration, Configuration conf) { super(credentials, clientConfiguration); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java index 17d268bdcf6..932c472f5be 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java @@ -21,16 +21,27 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.s3.AmazonS3; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; /** * S3 Client factory used for testing with eventual consistency fault injection. + * This client is for testing only; it is in the production + * {@code hadoop-aws} module to enable integration tests to use this + * just by editing the Hadoop configuration used to bring up the client. */ @InterfaceAudience.Private @InterfaceStability.Unstable public class InconsistentS3ClientFactory extends DefaultS3ClientFactory { + /** + * Create the inconsistent client. + * Logs a warning that this is being done. + * @param credentials credentials to use + * @param awsConf AWS configuration + * @return an inconsistent client. + */ @Override protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials, ClientConfiguration awsConf) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java index a007ba156ad..45912a0ac3d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java @@ -130,8 +130,9 @@ public static void once(String action, String path, VoidOperation operation) } /** - * Execute an operation and ignore all raised IOExceptions; log at INFO. - * @param log log to log at info. + * Execute an operation and ignore all raised IOExceptions; log at INFO; + * full stack only at DEBUG. + * @param log log to use. * @param action action to include in log * @param path optional path to include in log * @param operation operation to execute @@ -145,13 +146,17 @@ public static void once(String action, String path, VoidOperation operation) try { once(action, path, operation); } catch (IOException e) { - log.info("{}: {}", toDescription(action, path), e.toString(), e); + String description = toDescription(action, path); + String error = e.toString(); + log.info("{}: {}", description, error); + log.debug("{}", description, e); } } /** - * Execute an operation and ignore all raised IOExceptions; log at INFO. - * @param log log to log at info. + * Execute an operation and ignore all raised IOExceptions; log at INFO; + * full stack only at DEBUG. + * @param log log to use. * @param action action to include in log * @param path optional path to include in log * @param operation operation to execute diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 737d7da95c2..e817f0d55de 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -77,8 +77,9 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.util.concurrent.ListeningExecutorService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import org.apache.commons.io.IOUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -124,9 +125,6 @@ import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.commons.lang3.StringUtils.isNotEmpty; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - /** * The core S3A Filesystem implementation. * @@ -205,17 +203,20 @@ private boolean useListV1; private MagicCommitIntegration committerIntegration; + private AWSCredentialProviderList credentials; + /** Add any deprecated keys. */ @SuppressWarnings("deprecation") private static void addDeprecatedKeys() { - Configuration.addDeprecations( - new Configuration.DeprecationDelta[]{ - // never shipped in an ASF release, but did get into the wild. - new Configuration.DeprecationDelta( - OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY, - SERVER_SIDE_ENCRYPTION_KEY) - }); - Configuration.reloadExistingConfigurations(); + // this is retained as a placeholder for when new deprecated keys + // need to be added. + Configuration.DeprecationDelta[] deltas = { + }; + + if (deltas.length > 0) { + Configuration.addDeprecations(deltas); + Configuration.reloadExistingConfigurations(); + } } static { @@ -252,8 +253,10 @@ public void initialize(URI name, Configuration originalConf) Class s3ClientFactoryClass = conf.getClass( S3_CLIENT_FACTORY_IMPL, DEFAULT_S3_CLIENT_FACTORY_IMPL, S3ClientFactory.class); + + credentials = createAWSCredentialProviderSet(name, conf); s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf) - .createS3Client(name); + .createS3Client(name, bucket, credentials); invoker = new Invoker(new S3ARetryPolicy(getConf()), onRetry); s3guardInvoker = new Invoker(new S3GuardExistsRetryPolicy(getConf()), onRetry); @@ -1128,6 +1131,7 @@ protected void entryPoint(Statistic operation) throws IOException { /** * Increment a statistic by 1. + * This increments both the instrumentation and storage statistics. * @param statistic The operation to increment */ protected void incrementStatistic(Statistic statistic) { @@ -1136,6 +1140,7 @@ protected void incrementStatistic(Statistic statistic) { /** * Increment a statistic by a specific value. + * This increments both the instrumentation and storage statistics. * @param statistic The operation to increment * @param count the count to increment */ @@ -1172,8 +1177,7 @@ public void operationRetried(Exception ex) { Statistic stat = isThrottleException(ex) ? STORE_IO_THROTTLED : IGNORED_ERRORS; - instrumentation.incrementCounter(stat, 1); - storageStatistics.incrementCounter(stat, 1); + incrementStatistic(stat); } /** @@ -1194,6 +1198,11 @@ public void operationRetried( /** * Callback from {@link Invoker} when an operation against a metastore * is retried. + * Always increments the {@link Statistic#S3GUARD_METADATASTORE_RETRY} + * statistic/counter; + * if it is a throttling exception will update the associated + * throttled metrics/statistics. + * * @param ex exception * @param retries number of retries * @param idempotent is the method idempotent @@ -1202,6 +1211,11 @@ public void metastoreOperationRetried(Exception ex, int retries, boolean idempotent) { operationRetried(ex); + incrementStatistic(S3GUARD_METADATASTORE_RETRY); + if (isThrottleException(ex)) { + incrementStatistic(S3GUARD_METADATASTORE_THROTTLED); + instrumentation.addValueToQuantiles(S3GUARD_METADATASTORE_THROTTLE_RATE, 1); + } } /** @@ -2470,12 +2484,11 @@ public void close() throws IOException { transfers.shutdownNow(true); transfers = null; } - if (metadataStore != null) { - metadataStore.close(); - metadataStore = null; - } - IOUtils.closeQuietly(instrumentation); + S3AUtils.closeAll(LOG, metadataStore, instrumentation); + metadataStore = null; instrumentation = null; + closeAutocloseables(LOG, credentials); + credentials = null; } } @@ -2885,6 +2898,7 @@ public String toString() { } sb.append(", boundedExecutor=").append(boundedThreadPool); sb.append(", unboundedExecutor=").append(unboundedThreadPool); + sb.append(", credentials=").append(credentials); sb.append(", statistics {") .append(statistics) .append("}"); @@ -3319,4 +3333,17 @@ public boolean hasCapability(String capability) { return false; } } + + /** + * Get a shared copy of the AWS credentials, with its reference + * counter updated. + * Caller is required to call {@code close()} on this after + * they have finished using it. + * @param purpose what is this for? This is initially for logging + * @return a reference to shared credentials. + */ + public AWSCredentialProviderList shareCredentials(final String purpose) { + LOG.debug("Sharing credentials for: {}", purpose); + return credentials.share(); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index 440739d9d15..68f98e4abea 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -36,6 +36,7 @@ import java.io.EOFException; import java.io.IOException; +import java.net.SocketTimeoutException; import static org.apache.commons.lang3.StringUtils.isNotEmpty; @@ -155,11 +156,11 @@ private void setInputPolicy(S3AInputPolicy inputPolicy) { * @throws IOException on any failure to open the object */ @Retries.OnceTranslated - private synchronized void reopen(String reason, long targetPos, long length) - throws IOException { + private synchronized void reopen(String reason, long targetPos, long length, + boolean forceAbort) throws IOException { if (wrappedStream != null) { - closeStream("reopen(" + reason + ")", contentRangeFinish, false); + closeStream("reopen(" + reason + ")", contentRangeFinish, forceAbort); } contentRangeFinish = calculateRequestLimit(inputPolicy, targetPos, @@ -324,7 +325,7 @@ private void lazySeek(long targetPos, long len) throws IOException { //re-open at specific location if needed if (wrappedStream == null) { - reopen("read from new offset", targetPos, len); + reopen("read from new offset", targetPos, len, false); } }); } @@ -367,8 +368,11 @@ public synchronized int read() throws IOException { b = wrappedStream.read(); } catch (EOFException e) { return -1; + } catch (SocketTimeoutException e) { + onReadFailure(e, 1, true); + b = wrappedStream.read(); } catch (IOException e) { - onReadFailure(e, 1); + onReadFailure(e, 1, false); b = wrappedStream.read(); } return b; @@ -393,12 +397,13 @@ public synchronized int read() throws IOException { * @throws IOException any exception thrown on the re-open attempt. */ @Retries.OnceTranslated - private void onReadFailure(IOException ioe, int length) throws IOException { + private void onReadFailure(IOException ioe, int length, boolean forceAbort) + throws IOException { LOG.info("Got exception while trying to read from stream {}" + " trying to recover: " + ioe, uri); streamStatistics.readException(); - reopen("failure recovery", pos, length); + reopen("failure recovery", pos, length, forceAbort); } /** @@ -446,8 +451,11 @@ public synchronized int read(byte[] buf, int off, int len) } catch (EOFException e) { // the base implementation swallows EOFs. return -1; + } catch (SocketTimeoutException e) { + onReadFailure(e, len, true); + bytes = wrappedStream.read(buf, off, len); } catch (IOException e) { - onReadFailure(e, len); + onReadFailure(e, len, false); bytes= wrappedStream.read(buf, off, len); } return bytes; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java index 26ecefd0592..84f9c9f1392 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java @@ -1032,15 +1032,14 @@ public void storeClosed() { * Throttled request. */ public void throttled() { - incrementCounter(S3GUARD_METADATASTORE_THROTTLED, 1); - addValueToQuantiles(S3GUARD_METADATASTORE_THROTTLE_RATE, 1); + // counters are incremented by owner. } /** * S3Guard is retrying after a (retryable) failure. */ public void retrying() { - incrementCounter(S3GUARD_METADATASTORE_RETRY, 1); + // counters are incremented by owner. } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AMultipartUploader.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AMultipartUploader.java index 34c88d43f6a..4a6cb8c0938 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AMultipartUploader.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AMultipartUploader.java @@ -17,15 +17,26 @@ */ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; import com.amazonaws.services.s3.model.PartETag; import com.amazonaws.services.s3.model.UploadPartRequest; import com.amazonaws.services.s3.model.UploadPartResult; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; + +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BBPartHandle; @@ -37,13 +48,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathHandle; import org.apache.hadoop.fs.UploadHandle; -import org.apache.hadoop.hdfs.DFSUtilClient; -import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import java.util.List; -import java.util.stream.Collectors; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; /** * MultipartUploader for S3AFileSystem. This uses the S3 multipart @@ -53,6 +59,10 @@ private final S3AFileSystem s3a; + /** Header for Parts: {@value}. */ + + public static final String HEADER = "S3A-part01"; + public S3AMultipartUploader(FileSystem fs, Configuration conf) { if (!(fs instanceof S3AFileSystem)) { throw new IllegalArgumentException( @@ -63,75 +73,73 @@ public S3AMultipartUploader(FileSystem fs, Configuration conf) { @Override public UploadHandle initialize(Path filePath) throws IOException { + final WriteOperationHelper writeHelper = s3a.getWriteOperationHelper(); String key = s3a.pathToKey(filePath); - InitiateMultipartUploadRequest request = - new InitiateMultipartUploadRequest(s3a.getBucket(), key); - LOG.debug("initialize request: {}", request); - InitiateMultipartUploadResult result = s3a.initiateMultipartUpload(request); - String uploadId = result.getUploadId(); + String uploadId = writeHelper.initiateMultiPartUpload(key); return BBUploadHandle.from(ByteBuffer.wrap( uploadId.getBytes(Charsets.UTF_8))); } @Override public PartHandle putPart(Path filePath, InputStream inputStream, - int partNumber, UploadHandle uploadId, long lengthInBytes) { - String key = s3a.pathToKey(filePath); - UploadPartRequest request = new UploadPartRequest(); + int partNumber, UploadHandle uploadId, long lengthInBytes) + throws IOException { byte[] uploadIdBytes = uploadId.toByteArray(); - request.setUploadId(new String(uploadIdBytes, 0, uploadIdBytes.length, - Charsets.UTF_8)); - request.setInputStream(inputStream); - request.setPartSize(lengthInBytes); - request.setPartNumber(partNumber); - request.setBucketName(s3a.getBucket()); - request.setKey(key); - LOG.debug("putPart request: {}", request); - UploadPartResult result = s3a.uploadPart(request); + checkUploadId(uploadIdBytes); + String key = s3a.pathToKey(filePath); + final WriteOperationHelper writeHelper = s3a.getWriteOperationHelper(); + String uploadIdString = new String(uploadIdBytes, 0, uploadIdBytes.length, + Charsets.UTF_8); + UploadPartRequest request = writeHelper.newUploadPartRequest(key, + uploadIdString, partNumber, (int) lengthInBytes, inputStream, null, 0L); + UploadPartResult result = writeHelper.uploadPart(request); String eTag = result.getETag(); - return BBPartHandle.from(ByteBuffer.wrap(eTag.getBytes(Charsets.UTF_8))); + return BBPartHandle.from( + ByteBuffer.wrap( + buildPartHandlePayload(eTag, lengthInBytes))); } @Override public PathHandle complete(Path filePath, - List> handles, UploadHandle uploadId) { - String key = s3a.pathToKey(filePath); - CompleteMultipartUploadRequest request = - new CompleteMultipartUploadRequest(); - request.setBucketName(s3a.getBucket()); - request.setKey(key); + List> handles, UploadHandle uploadId) + throws IOException { byte[] uploadIdBytes = uploadId.toByteArray(); - request.setUploadId(new String(uploadIdBytes, 0, uploadIdBytes.length, - Charsets.UTF_8)); - List eTags = handles - .stream() - .map(handle -> { - byte[] partEtagBytes = handle.getRight().toByteArray(); - return new PartETag(handle.getLeft(), - new String(partEtagBytes, 0, partEtagBytes.length, - Charsets.UTF_8)); - }) - .collect(Collectors.toList()); - request.setPartETags(eTags); - LOG.debug("Complete request: {}", request); - CompleteMultipartUploadResult completeMultipartUploadResult = - s3a.getAmazonS3Client().completeMultipartUpload(request); - - byte[] eTag = DFSUtilClient.string2Bytes( - completeMultipartUploadResult.getETag()); + checkUploadId(uploadIdBytes); + if (handles.isEmpty()) { + throw new IOException("Empty upload"); + } + + final WriteOperationHelper writeHelper = s3a.getWriteOperationHelper(); + String key = s3a.pathToKey(filePath); + + String uploadIdStr = new String(uploadIdBytes, 0, uploadIdBytes.length, + Charsets.UTF_8); + ArrayList eTags = new ArrayList<>(); + eTags.ensureCapacity(handles.size()); + long totalLength = 0; + for (Pair handle : handles) { + byte[] payload = handle.getRight().toByteArray(); + Pair result = parsePartHandlePayload(payload); + totalLength += result.getLeft(); + eTags.add(new PartETag(handle.getLeft(), result.getRight())); + } + AtomicInteger errorCount = new AtomicInteger(0); + CompleteMultipartUploadResult result = writeHelper.completeMPUwithRetries( + key, uploadIdStr, eTags, totalLength, errorCount); + + byte[] eTag = result.getETag().getBytes(Charsets.UTF_8); return (PathHandle) () -> ByteBuffer.wrap(eTag); } @Override - public void abort(Path filePath, UploadHandle uploadId) { + public void abort(Path filePath, UploadHandle uploadId) throws IOException { + final byte[] uploadIdBytes = uploadId.toByteArray(); + checkUploadId(uploadIdBytes); + final WriteOperationHelper writeHelper = s3a.getWriteOperationHelper(); String key = s3a.pathToKey(filePath); - byte[] uploadIdBytes = uploadId.toByteArray(); String uploadIdString = new String(uploadIdBytes, 0, uploadIdBytes.length, Charsets.UTF_8); - AbortMultipartUploadRequest request = new AbortMultipartUploadRequest(s3a - .getBucket(), key, uploadIdString); - LOG.debug("Abort request: {}", request); - s3a.getAmazonS3Client().abortMultipartUpload(request); + writeHelper.abortMultipartCommit(key, uploadIdString); } /** @@ -141,10 +149,59 @@ public void abort(Path filePath, UploadHandle uploadId) { @Override protected MultipartUploader createMultipartUploader(FileSystem fs, Configuration conf) { - if (fs.getScheme().equals("s3a")) { + if (FS_S3A.equals(fs.getScheme())) { return new S3AMultipartUploader(fs, conf); } return null; } } + + /** + * Build the payload for marshalling. + * @param eTag upload etag + * @param len length + * @return a byte array to marshall. + * @throws IOException error writing the payload + */ + @VisibleForTesting + static byte[] buildPartHandlePayload(String eTag, long len) + throws IOException { + Preconditions.checkArgument(StringUtils.isNotEmpty(eTag), + "Empty etag"); + Preconditions.checkArgument(len > 0, + "Invalid length"); + + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + try(DataOutputStream output = new DataOutputStream(bytes)) { + output.writeUTF(HEADER); + output.writeLong(len); + output.writeUTF(eTag); + } + return bytes.toByteArray(); + } + + /** + * Parse the payload marshalled as a part handle. + * @param data handle data + * @return the length and etag + * @throws IOException error reading the payload + */ + static Pair parsePartHandlePayload(byte[] data) + throws IOException { + + try(DataInputStream input = + new DataInputStream(new ByteArrayInputStream(data))) { + final String header = input.readUTF(); + if (!HEADER.equals(header)) { + throw new IOException("Wrong header string: \"" + header + "\""); + } + final long len = input.readLong(); + final String etag = input.readUTF(); + if (len <= 0) { + throw new IOException("Negative length"); + } + return Pair.of(len, etag); + } + } + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java index 2b361fd455b..1e475e1570c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java @@ -37,6 +37,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.net.ConnectTimeoutException; @@ -123,12 +124,7 @@ public S3ARetryPolicy(Configuration conf) { // and a separate policy for throttle requests, which are considered // repeatable, even for non-idempotent calls, as the service // rejected the call entirely - throttlePolicy = exponentialBackoffRetry( - conf.getInt(RETRY_THROTTLE_LIMIT, RETRY_THROTTLE_LIMIT_DEFAULT), - conf.getTimeDuration(RETRY_THROTTLE_INTERVAL, - RETRY_THROTTLE_INTERVAL_DEFAULT, - TimeUnit.MILLISECONDS), - TimeUnit.MILLISECONDS); + throttlePolicy = createThrottleRetryPolicy(conf); // client connectivity: fixed retries without care for idempotency connectivityFailure = fixedRetries; @@ -138,6 +134,22 @@ public S3ARetryPolicy(Configuration conf) { retryPolicy = retryByException(retryIdempotentCalls, policyMap); } + /** + * Create the throttling policy. + * This will be called from the S3ARetryPolicy constructor, so + * subclasses must assume they are not initialized. + * @param conf configuration to use. + * @return the retry policy for throttling events. + */ + protected RetryPolicy createThrottleRetryPolicy(final Configuration conf) { + return exponentialBackoffRetry( + conf.getInt(RETRY_THROTTLE_LIMIT, RETRY_THROTTLE_LIMIT_DEFAULT), + conf.getTimeDuration(RETRY_THROTTLE_INTERVAL, + RETRY_THROTTLE_INTERVAL_DEFAULT, + TimeUnit.MILLISECONDS), + TimeUnit.MILLISECONDS); + } + /** * Subclasses can override this like a constructor to change behavior: call * superclass method, then modify it as needed, and return it. @@ -154,8 +166,9 @@ public S3ARetryPolicy(Configuration conf) { policyMap.put(InterruptedException.class, fail); // note this does not pick up subclasses (like socket timeout) policyMap.put(InterruptedIOException.class, fail); - // interesting question: should this be retried ever? + // Access denial and auth exceptions are not retried policyMap.put(AccessDeniedException.class, fail); + policyMap.put(NoAuthWithAWSException.class, fail); policyMap.put(FileNotFoundException.class, fail); policyMap.put(InvalidRequestException.class, fail); @@ -204,6 +217,7 @@ public RetryAction shouldRetry(Exception exception, int retries, int failovers, boolean idempotent) throws Exception { + Preconditions.checkArgument(exception != null, "Null exception"); Exception ex = exception; if (exception instanceof AmazonClientException) { // uprate the amazon client exception for the purpose of exception diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index a5f7d754495..9318a5a4dce 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -21,10 +21,13 @@ import com.amazonaws.AbortedException; import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; +import com.amazonaws.ClientConfiguration; +import com.amazonaws.Protocol; import com.amazonaws.SdkBaseException; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; import com.amazonaws.auth.InstanceProfileCredentialsProvider; +import com.amazonaws.retry.RetryUtils; import com.amazonaws.services.dynamodbv2.model.AmazonDynamoDBException; import com.amazonaws.services.dynamodbv2.model.LimitExceededException; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputExceededException; @@ -44,15 +47,18 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.security.ProviderUtils; +import org.apache.hadoop.util.VersionInfo; import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; +import java.io.Closeable; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; @@ -174,11 +180,17 @@ public static IOException translateException(@Nullable String operation, // call considered an sign of connectivity failure return (EOFException)new EOFException(message).initCause(exception); } + if (exception instanceof NoAuthWithAWSException) { + // the exception raised by AWSCredentialProvider list if the + // credentials were not accepted. + return (AccessDeniedException)new AccessDeniedException(path, null, + exception.toString()).initCause(exception); + } return new AWSClientIOException(message, exception); } else { if (exception instanceof AmazonDynamoDBException) { // special handling for dynamo DB exceptions - return translateDynamoDBException(message, + return translateDynamoDBException(path, message, (AmazonDynamoDBException)exception); } IOException ioe; @@ -347,8 +359,10 @@ private static InterruptedIOException translateInterruptedException( /** * Is the exception an instance of a throttling exception. That * is an AmazonServiceException with a 503 response, any - * exception from DynamoDB for limits exceeded, or an - * {@link AWSServiceThrottledException}. + * exception from DynamoDB for limits exceeded, an + * {@link AWSServiceThrottledException}, + * or anything which the AWS SDK's RetryUtils considers to be + * a throttling exception. * @param ex exception to examine * @return true if it is considered a throttling exception */ @@ -357,7 +371,9 @@ public static boolean isThrottleException(Exception ex) { || ex instanceof ProvisionedThroughputExceededException || ex instanceof LimitExceededException || (ex instanceof AmazonServiceException - && 503 == ((AmazonServiceException)ex).getStatusCode()); + && 503 == ((AmazonServiceException)ex).getStatusCode()) + || (ex instanceof SdkBaseException + && RetryUtils.isThrottlingException((SdkBaseException) ex)); } /** @@ -373,20 +389,45 @@ public static boolean signifiesConnectionBroken(SdkBaseException ex) { /** * Translate a DynamoDB exception into an IOException. + * + * @param path path in the DDB * @param message preformatted message for the exception - * @param ex exception + * @param ddbException exception * @return an exception to throw. */ - public static IOException translateDynamoDBException(String message, - AmazonDynamoDBException ex) { - if (isThrottleException(ex)) { - return new AWSServiceThrottledException(message, ex); + public static IOException translateDynamoDBException(final String path, + final String message, + final AmazonDynamoDBException ddbException) { + if (isThrottleException(ddbException)) { + return new AWSServiceThrottledException(message, ddbException); } - if (ex instanceof ResourceNotFoundException) { + if (ddbException instanceof ResourceNotFoundException) { return (FileNotFoundException) new FileNotFoundException(message) - .initCause(ex); + .initCause(ddbException); + } + final int statusCode = ddbException.getStatusCode(); + final String errorCode = ddbException.getErrorCode(); + IOException result = null; + // 400 gets used a lot by DDB + if (statusCode == 400) { + switch (errorCode) { + case "AccessDeniedException": + result = (IOException) new AccessDeniedException( + path, + null, + ddbException.toString()) + .initCause(ddbException); + break; + + default: + result = new AWSBadRequestException(message, ddbException); + } + + } + if (result == null) { + result = new AWSServiceIOException(message, ddbException); } - return new AWSServiceIOException(message, ex); + return result; } /** @@ -556,9 +597,7 @@ public static AWSCredentialProviderList createAWSCredentialProviderSet( Class[] awsClasses = loadAWSProviderClasses(conf, AWS_CREDENTIALS_PROVIDER); if (awsClasses.length == 0) { - S3xLoginHelper.Login creds = getAWSAccessKeys(binding, conf); - credentials.add(new BasicAWSCredentialsProvider( - creds.getUser(), creds.getPassword())); + credentials.add(new SimpleAWSCredentialsProvider(binding, conf)); credentials.add(new EnvironmentVariableCredentialsProvider()); credentials.add(InstanceProfileCredentialsProvider.getInstance()); } else { @@ -689,7 +728,6 @@ public static AWSCredentialsProvider createAWSCredentialProvider( /** * Return the access key and secret for S3 API use. - * Credentials may exist in configuration, within credential providers * or indicated in the UserInfo of the name URI param. * @param name the URI for which we need the access keys; may be null * @param conf the Configuration object to interrogate for keys. @@ -698,25 +736,19 @@ public static AWSCredentialsProvider createAWSCredentialProvider( */ public static S3xLoginHelper.Login getAWSAccessKeys(URI name, Configuration conf) throws IOException { - S3xLoginHelper.Login login = - S3xLoginHelper.extractLoginDetailsWithWarnings(name); + S3xLoginHelper.rejectSecretsInURIs(name); Configuration c = ProviderUtils.excludeIncompatibleCredentialProviders( conf, S3AFileSystem.class); String bucket = name != null ? name.getHost() : ""; - // build the secrets. as getPassword() uses the last arg as - // the return value if non-null, the ordering of - // login -> bucket -> base is critical + // get the secrets from the configuration - // get the bucket values - String accessKey = lookupPassword(bucket, c, ACCESS_KEY, - login.getUser()); + // get the access key + String accessKey = lookupPassword(bucket, c, ACCESS_KEY); - // finally the base - String secretKey = lookupPassword(bucket, c, SECRET_KEY, - login.getPassword()); + // and the secret + String secretKey = lookupPassword(bucket, c, SECRET_KEY); - // and override with any per bucket values return new S3xLoginHelper.Login(accessKey, secretKey); } @@ -732,12 +764,54 @@ public static AWSCredentialsProvider createAWSCredentialProvider( * @throws IOException on any IO problem * @throws IllegalArgumentException bad arguments */ + @Deprecated public static String lookupPassword( String bucket, Configuration conf, String baseKey, String overrideVal) throws IOException { + return lookupPassword(bucket, conf, baseKey, overrideVal, ""); + } + + /** + * Get a password from a configuration, including JCEKS files, handling both + * the absolute key and bucket override. + * @param bucket bucket or "" if none known + * @param conf configuration + * @param baseKey base key to look up, e.g "fs.s3a.secret.key" + * @return a password or "". + * @throws IOException on any IO problem + * @throws IllegalArgumentException bad arguments + */ + public static String lookupPassword( + String bucket, + Configuration conf, + String baseKey) + throws IOException { + return lookupPassword(bucket, conf, baseKey, null, ""); + } + + /** + * Get a password from a configuration, including JCEKS files, handling both + * the absolute key and bucket override. + * @param bucket bucket or "" if none known + * @param conf configuration + * @param baseKey base key to look up, e.g "fs.s3a.secret.key" + * @param overrideVal override value: if non empty this is used instead of + * querying the configuration. + * @param defVal value to return if there is no password + * @return a password or the value of defVal. + * @throws IOException on any IO problem + * @throws IllegalArgumentException bad arguments + */ + public static String lookupPassword( + String bucket, + Configuration conf, + String baseKey, + String overrideVal, + String defVal) + throws IOException { String initialVal; Preconditions.checkArgument(baseKey.startsWith(FS_S3A_PREFIX), "%s does not start with $%s", baseKey, FS_S3A_PREFIX); @@ -757,7 +831,7 @@ public static String lookupPassword( // no bucket, make the initial value the override value initialVal = overrideVal; } - return getPassword(conf, baseKey, initialVal); + return getPassword(conf, baseKey, initialVal, defVal); } /** @@ -1059,6 +1133,134 @@ public static void deleteWithWarning(FileSystem fs, } } + /** + * Create a new AWS {@code ClientConfiguration}. + * All clients to AWS services MUST use this for consistent setup + * of connectivity, UA, proxy settings. + * @param conf The Hadoop configuration + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @return new AWS client configuration + */ + public static ClientConfiguration createAwsConf(Configuration conf, + String bucket) + throws IOException { + final ClientConfiguration awsConf = new ClientConfiguration(); + initConnectionSettings(conf, awsConf); + initProxySupport(conf, bucket, awsConf); + initUserAgent(conf, awsConf); + return awsConf; + } + + /** + * Initializes all AWS SDK settings related to connection management. + * + * @param conf Hadoop configuration + * @param awsConf AWS SDK configuration + */ + public static void initConnectionSettings(Configuration conf, + ClientConfiguration awsConf) { + awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS, + DEFAULT_MAXIMUM_CONNECTIONS, 1)); + boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, + DEFAULT_SECURE_CONNECTIONS); + awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP); + awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES, + DEFAULT_MAX_ERROR_RETRIES, 0)); + awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT, + DEFAULT_ESTABLISH_TIMEOUT, 0)); + awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT, + DEFAULT_SOCKET_TIMEOUT, 0)); + int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER, + DEFAULT_SOCKET_SEND_BUFFER, 2048); + int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER, + DEFAULT_SOCKET_RECV_BUFFER, 2048); + awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer); + String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); + if (!signerOverride.isEmpty()) { + LOG.debug("Signer override = {}", signerOverride); + awsConf.setSignerOverride(signerOverride); + } + } + + /** + * Initializes AWS SDK proxy support in the AWS client configuration + * if the S3A settings enable it. + * + * @param conf Hadoop configuration + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @param awsConf AWS SDK configuration to update + * @throws IllegalArgumentException if misconfigured + * @throws IOException problem getting username/secret from password source. + */ + public static void initProxySupport(Configuration conf, + String bucket, + ClientConfiguration awsConf) throws IllegalArgumentException, + IOException { + String proxyHost = conf.getTrimmed(PROXY_HOST, ""); + int proxyPort = conf.getInt(PROXY_PORT, -1); + if (!proxyHost.isEmpty()) { + awsConf.setProxyHost(proxyHost); + if (proxyPort >= 0) { + awsConf.setProxyPort(proxyPort); + } else { + if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { + LOG.warn("Proxy host set without port. Using HTTPS default 443"); + awsConf.setProxyPort(443); + } else { + LOG.warn("Proxy host set without port. Using HTTP default 80"); + awsConf.setProxyPort(80); + } + } + final String proxyUsername = lookupPassword(bucket, conf, PROXY_USERNAME, + null, null); + final String proxyPassword = lookupPassword(bucket, conf, PROXY_PASSWORD, + null, null); + if ((proxyUsername == null) != (proxyPassword == null)) { + String msg = "Proxy error: " + PROXY_USERNAME + " or " + + PROXY_PASSWORD + " set without the other."; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + awsConf.setProxyUsername(proxyUsername); + awsConf.setProxyPassword(proxyPassword); + awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN)); + awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); + if (LOG.isDebugEnabled()) { + LOG.debug("Using proxy server {}:{} as user {} with password {} on " + + "domain {} as workstation {}", awsConf.getProxyHost(), + awsConf.getProxyPort(), + String.valueOf(awsConf.getProxyUsername()), + awsConf.getProxyPassword(), awsConf.getProxyDomain(), + awsConf.getProxyWorkstation()); + } + } else if (proxyPort >= 0) { + String msg = + "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + } + + /** + * Initializes the User-Agent header to send in HTTP requests to AWS + * services. We always include the Hadoop version number. The user also + * may set an optional custom prefix to put in front of the Hadoop version + * number. The AWS SDK internally appends its own information, which seems + * to include the AWS SDK version, OS and JVM version. + * + * @param conf Hadoop configuration + * @param awsConf AWS SDK configuration to update + */ + private static void initUserAgent(Configuration conf, + ClientConfiguration awsConf) { + String userAgent = "Hadoop " + VersionInfo.getVersion(); + String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); + if (!userAgentPrefix.isEmpty()) { + userAgent = userAgentPrefix + ", " + userAgent; + } + LOG.debug("Using User-Agent: {}", userAgent); + awsConf.setUserAgentPrefix(userAgent); + } /** * An interface for use in lambda-expressions working with @@ -1200,10 +1402,7 @@ static void patchSecurityCredentialProviders(Configuration conf) { static String getServerSideEncryptionKey(String bucket, Configuration conf) { try { - return lookupPassword(bucket, conf, - SERVER_SIDE_ENCRYPTION_KEY, - getPassword(conf, OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY, - null, null)); + return lookupPassword(bucket, conf, SERVER_SIDE_ENCRYPTION_KEY); } catch (IOException e) { LOG.error("Cannot retrieve " + SERVER_SIDE_ENCRYPTION_KEY, e); return ""; @@ -1225,7 +1424,7 @@ static S3AEncryptionMethods getEncryptionAlgorithm(String bucket, Configuration conf) throws IOException { S3AEncryptionMethods sse = S3AEncryptionMethods.getMethod( lookupPassword(bucket, conf, - SERVER_SIDE_ENCRYPTION_ALGORITHM, null)); + SERVER_SIDE_ENCRYPTION_ALGORITHM)); String sseKey = getServerSideEncryptionKey(bucket, conf); int sseKeyLen = StringUtils.isBlank(sseKey) ? 0 : sseKey.length(); String diagnostics = passwordDiagnostics(sseKey, "key"); @@ -1289,18 +1488,40 @@ private static String passwordDiagnostics(String pass, String description) { * @param closeables the objects to close */ public static void closeAll(Logger log, - java.io.Closeable... closeables) { - for (java.io.Closeable c : closeables) { + Closeable... closeables) { + if (log == null) { + log = LOG; + } + for (Closeable c : closeables) { if (c != null) { try { - if (log != null) { - log.debug("Closing {}", c); - } + log.debug("Closing {}", c); c.close(); } catch (Exception e) { - if (log != null && log.isDebugEnabled()) { - log.debug("Exception in closing {}", c, e); - } + log.debug("Exception in closing {}", c, e); + } + } + } + } + /** + * Close the Closeable objects and ignore any Exception or + * null pointers. + * (This is the SLF4J equivalent of that in {@code IOUtils}). + * @param log the log to log at debug level. Can be null. + * @param closeables the objects to close + */ + public static void closeAutocloseables(Logger log, + AutoCloseable... closeables) { + if (log == null) { + log = LOG; + } + for (AutoCloseable c : closeables) { + if (c != null) { + try { + log.debug("Closing {}", c); + c.close(); + } catch (Exception e) { + log.debug("Exception in closing {}", c, e); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 9abb362ed46..b237e850d2e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.net.URI; +import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.s3.AmazonS3; import org.apache.hadoop.classification.InterfaceAudience; @@ -37,9 +38,13 @@ * Creates a new {@link AmazonS3} client. * * @param name raw input S3A file system URI + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @param credentialSet credentials to use * @return S3 client * @throws IOException IO problem */ - AmazonS3 createS3Client(URI name) throws IOException; + AmazonS3 createS3Client(URI name, + final String bucket, + final AWSCredentialsProvider credentialSet) throws IOException; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java index 7f9e57e2e24..b31b72a5213 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java @@ -21,10 +21,12 @@ import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; + import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.security.ProviderUtils; import java.io.IOException; @@ -32,10 +34,10 @@ import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY; import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY; +import static org.apache.hadoop.fs.s3a.S3AUtils.getAWSAccessKeys; /** * Support simple credentials for authenticating with AWS. - * Keys generated in URLs are not supported. * * Please note that users may reference this class name from configuration * property fs.s3a.aws.credentials.provider. Therefore, changing the class name @@ -49,26 +51,17 @@ = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"; private String accessKey; private String secretKey; - private IOException lookupIOE; - public SimpleAWSCredentialsProvider(URI uri, Configuration conf) { - try { - String bucket = uri != null ? uri.getHost() : ""; - Configuration c = ProviderUtils.excludeIncompatibleCredentialProviders( - conf, S3AFileSystem.class); - this.accessKey = S3AUtils.lookupPassword(bucket, c, ACCESS_KEY, null); - this.secretKey = S3AUtils.lookupPassword(bucket, c, SECRET_KEY, null); - } catch (IOException e) { - lookupIOE = e; - } + public SimpleAWSCredentialsProvider(URI uri, Configuration conf) + throws IOException { + + S3xLoginHelper.Login login = getAWSAccessKeys(uri, conf); + this.accessKey = login.getUser(); + this.secretKey = login.getPassword(); } + @Override public AWSCredentials getCredentials() { - if (lookupIOE != null) { - // propagate any initialization problem - throw new CredentialInitializationException(lookupIOE.toString(), - lookupIOE); - } if (!StringUtils.isEmpty(accessKey) && !StringUtils.isEmpty(secretKey)) { return new BasicAWSCredentials(accessKey, secretKey); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java index 3b89bde1982..d42f68e9053 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/TemporaryAWSCredentialsProvider.java @@ -50,32 +50,26 @@ private String accessKey; private String secretKey; private String sessionToken; - private IOException lookupIOE; - public TemporaryAWSCredentialsProvider(Configuration conf) { + public TemporaryAWSCredentialsProvider(Configuration conf) + throws IOException { this(null, conf); } - public TemporaryAWSCredentialsProvider(URI uri, Configuration conf) { - try { + public TemporaryAWSCredentialsProvider(URI uri, Configuration conf) + throws IOException { + // determine the bucket String bucket = uri != null ? uri.getHost(): ""; Configuration c = ProviderUtils.excludeIncompatibleCredentialProviders( conf, S3AFileSystem.class); - this.accessKey = lookupPassword(bucket, c, ACCESS_KEY, null); - this.secretKey = lookupPassword(bucket, c, SECRET_KEY, null); - this.sessionToken = lookupPassword(bucket, c, SESSION_TOKEN, null); - } catch (IOException e) { - lookupIOE = e; - } + this.accessKey = lookupPassword(bucket, c, ACCESS_KEY); + this.secretKey = lookupPassword(bucket, c, SECRET_KEY); + this.sessionToken = lookupPassword(bucket, c, SESSION_TOKEN); } + @Override public AWSCredentials getCredentials() { - if (lookupIOE != null) { - // propagate any initialization problem - throw new CredentialInitializationException(lookupIOE.toString(), - lookupIOE); - } if (!StringUtils.isEmpty(accessKey) && !StringUtils.isEmpty(secretKey) && !StringUtils.isEmpty(sessionToken)) { return new BasicSessionCredentials(accessKey, secretKey, sessionToken); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java index 46ca65c2036..a85a87f7f44 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java @@ -219,6 +219,10 @@ private CompleteMultipartUploadResult finalizeMultipartUpload( List partETags, long length, Retried retrying) throws IOException { + if (partETags.isEmpty()) { + throw new IOException( + "No upload parts in multipart upload to " + destKey); + } return invoker.retry("Completing multipart commit", destKey, true, retrying, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java index fdaf9bd544c..e5a363952f6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java @@ -24,9 +24,11 @@ import java.util.Locale; import java.util.concurrent.TimeUnit; +import com.amazonaws.AmazonClientException; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; +import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; import com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException; import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; @@ -37,6 +39,9 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.Invoker; +import org.apache.hadoop.fs.s3a.S3ARetryPolicy; import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider; import org.apache.hadoop.security.UserGroupInformation; @@ -77,17 +82,21 @@ private final String arn; + private final AWSCredentialProviderList credentialsToSTS; + + private final Invoker invoker; + /** * Instantiate. * This calls {@link #getCredentials()} to fail fast on the inner * role credential retrieval. - * @param uri URI of endpoint. + * @param fsUri URI of the filesystem. * @param conf configuration * @throws IOException on IO problems and some parameter checking * @throws IllegalArgumentException invalid parameters * @throws AWSSecurityTokenServiceException problems getting credentials */ - public AssumedRoleCredentialProvider(URI uri, Configuration conf) + public AssumedRoleCredentialProvider(URI fsUri, Configuration conf) throws IOException { arn = conf.getTrimmed(ASSUMED_ROLE_ARN, ""); @@ -99,13 +108,14 @@ public AssumedRoleCredentialProvider(URI uri, Configuration conf) Class[] awsClasses = loadAWSProviderClasses(conf, ASSUMED_ROLE_CREDENTIALS_PROVIDER, SimpleAWSCredentialsProvider.class); - AWSCredentialProviderList credentials = new AWSCredentialProviderList(); + credentialsToSTS = new AWSCredentialProviderList(); for (Class aClass : awsClasses) { if (this.getClass().equals(aClass)) { throw new IOException(E_FORBIDDEN_PROVIDER); } - credentials.add(createAWSCredentialProvider(conf, aClass, uri)); + credentialsToSTS.add(createAWSCredentialProvider(conf, aClass, fsUri)); } + LOG.debug("Credentials to obtain role credentials: {}", credentialsToSTS); // then the STS binding sessionName = conf.getTrimmed(ASSUMED_ROLE_SESSION_NAME, @@ -122,14 +132,27 @@ public AssumedRoleCredentialProvider(URI uri, Configuration conf) LOG.debug("Scope down policy {}", policy); builder.withScopeDownPolicy(policy); } - String epr = conf.get(ASSUMED_ROLE_STS_ENDPOINT, ""); - if (StringUtils.isNotEmpty(epr)) { - LOG.debug("STS Endpoint: {}", epr); - builder.withServiceEndpoint(epr); - } - LOG.debug("Credentials to obtain role credentials: {}", credentials); - builder.withLongLivedCredentialsProvider(credentials); + String endpoint = conf.get(ASSUMED_ROLE_STS_ENDPOINT, ""); + String region = conf.get(ASSUMED_ROLE_STS_ENDPOINT_REGION, + ASSUMED_ROLE_STS_ENDPOINT_REGION_DEFAULT); + AWSSecurityTokenServiceClientBuilder stsbuilder = + STSClientFactory.builder( + conf, + fsUri.getHost(), + credentialsToSTS, + endpoint, + region); + // the STS client is not tracked for a shutdown in close(), because it + // (currently) throws an UnsupportedOperationException in shutdown(). + builder.withStsClient(stsbuilder.build()); + + //now build the provider stsProvider = builder.build(); + + // to handle STS throttling by the AWS account, we + // need to retry + invoker = new Invoker(new S3ARetryPolicy(conf), this::operationRetried); + // and force in a fail-fast check just to keep the stack traces less // convoluted getCredentials(); @@ -143,7 +166,17 @@ public AssumedRoleCredentialProvider(URI uri, Configuration conf) @Override public AWSCredentials getCredentials() { try { - return stsProvider.getCredentials(); + return invoker.retryUntranslated("getCredentials", + true, + stsProvider::getCredentials); + } catch (IOException e) { + // this is in the signature of retryUntranslated; + // its hard to see how this could be raised, but for + // completeness, it is wrapped as an Amazon Client Exception + // and rethrown. + throw new AmazonClientException( + "getCredentials failed: " + e, + e); } catch (AWSSecurityTokenServiceException e) { LOG.error("Failed to get credentials for role {}", arn, e); @@ -161,7 +194,7 @@ public void refresh() { */ @Override public void close() { - stsProvider.close(); + S3AUtils.closeAutocloseables(LOG, stsProvider, credentialsToSTS); } @Override @@ -205,4 +238,23 @@ static String sanitize(String session) { return r.toString(); } + /** + * Callback from {@link Invoker} when an operation is retried. + * @param text text of the operation + * @param ex exception + * @param retries number of retries + * @param idempotent is the method idempotent + */ + public void operationRetried( + String text, + Exception ex, + int retries, + boolean idempotent) { + if (retries == 0) { + // log on the first retry attempt of the credential access. + // At worst, this means one log entry every intermittent renewal + // time. + LOG.info("Retried {}", text); + } + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java new file mode 100644 index 00000000000..f48e17a6210 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/NoAuthWithAWSException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.auth; + +import com.amazonaws.AmazonClientException; + +/** + * A specific subclass of {@code AmazonClientException} which can + * be used in the retry logic to fail fast when there is any + * authentication problem. + */ +public class NoAuthWithAWSException extends AmazonClientException { + + public NoAuthWithAWSException(final String message, final Throwable t) { + super(message, t); + } + + public NoAuthWithAWSException(final String message) { + super(message); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RoleModel.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RoleModel.java index ca2c993a200..d4568b0dc05 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RoleModel.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RoleModel.java @@ -205,6 +205,14 @@ public static Policy policy(Statement... statements) { return new Policy(statements); } + /** + * From a set of statements, create a policy. + * @param statements statements + * @return the policy + */ + public static Policy policy(final List statements) { + return new Policy(statements); + } /** * Effect options. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RolePolicies.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RolePolicies.java index 6711eee25a6..34ed2958e49 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RolePolicies.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/RolePolicies.java @@ -29,6 +29,55 @@ private RolePolicies() { } + /** All KMS operations: {@value}.*/ + public static final String KMS_ALL_OPERATIONS = "kms:*"; + + /** KMS encryption. This is Not used by SSE-KMS: {@value}. */ + public static final String KMS_ENCRYPT = "kms:Encrypt"; + + /** + * Decrypt data encrypted with SSE-KMS: {@value}. + */ + public static final String KMS_DECRYPT = "kms:Decrypt"; + + /** + * Arn for all KMS keys: {@value}. + */ + public static final String KMS_ALL_KEYS = "arn:aws:kms:*"; + + /** + * This is used by S3 to generate a per-object encryption key and + * the encrypted value of this, the latter being what it tags + * the object with for later decryption: {@value}. + */ + public static final String KMS_GENERATE_DATA_KEY = "kms:GenerateDataKey"; + + /** + * Actions needed to read and write SSE-KMS data. + */ + private static final String[] KMS_KEY_RW = + new String[]{KMS_DECRYPT, KMS_GENERATE_DATA_KEY}; + + /** + * Actions needed to read SSE-KMS data. + */ + private static final String[] KMS_KEY_READ = + new String[] {KMS_DECRYPT}; + + /** + * Statement to allow KMS R/W access access, so full use of + * SSE-KMS. + */ + public static final Statement STATEMENT_ALLOW_SSE_KMS_RW = + statement(true, KMS_ALL_KEYS, KMS_KEY_RW); + + /** + * Statement to allow read access to KMS keys, so the ability + * to read SSE-KMS data,, but not decrypt it. + */ + public static final Statement STATEMENT_ALLOW_SSE_KMS_READ = + statement(true, KMS_ALL_KEYS, KMS_KEY_READ); + /** * All S3 operations: {@value}. */ @@ -52,7 +101,6 @@ private RolePolicies() { public static final String S3_LIST_BUCKET_MULTPART_UPLOADS = "s3:ListBucketMultipartUploads"; - /** * List multipart upload is needed for the S3A Commit protocols. */ @@ -97,6 +145,8 @@ private RolePolicies() { public static final String S3_GET_OBJECT_VERSION = "s3:GetObjectVersion"; + public static final String S3_GET_BUCKET_LOCATION = "s3:GetBucketLocation"; + public static final String S3_GET_OBJECT_VERSION_ACL = "s3:GetObjectVersionAcl"; @@ -128,7 +178,8 @@ private RolePolicies() { public static final String S3_RESTORE_OBJECT = "s3:RestoreObject"; /** - * Actions needed to read data from S3 through S3A. + * Actions needed to read a file in S3 through S3A, excluding + * S3Guard and SSE-KMS. */ public static final String[] S3_PATH_READ_OPERATIONS = new String[]{ @@ -136,18 +187,20 @@ private RolePolicies() { }; /** - * Actions needed to read data from S3 through S3A. + * Base actions needed to read data from S3 through S3A, + * excluding SSE-KMS data and S3Guard-ed buckets. */ public static final String[] S3_ROOT_READ_OPERATIONS = new String[]{ S3_LIST_BUCKET, S3_LIST_BUCKET_MULTPART_UPLOADS, - S3_GET_OBJECT, + S3_ALL_GET, }; /** * Actions needed to write data to an S3A Path. - * This includes the appropriate read operations. + * This includes the appropriate read operations, but + * not SSE-KMS or S3Guard support. */ public static final String[] S3_PATH_RW_OPERATIONS = new String[]{ @@ -163,6 +216,7 @@ private RolePolicies() { * This is purely the extra operations needed for writing atop * of the read operation set. * Deny these and a path is still readable, but not writeable. + * Excludes: SSE-KMS and S3Guard permissions. */ public static final String[] S3_PATH_WRITE_OPERATIONS = new String[]{ @@ -173,6 +227,7 @@ private RolePolicies() { /** * Actions needed for R/W IO from the root of a bucket. + * Excludes: SSE-KMS and S3Guard permissions. */ public static final String[] S3_ROOT_RW_OPERATIONS = new String[]{ @@ -190,26 +245,57 @@ private RolePolicies() { */ public static final String DDB_ALL_OPERATIONS = "dynamodb:*"; - public static final String DDB_ADMIN = "dynamodb:*"; + /** + * Operations needed for DDB/S3Guard Admin. + * For now: make this {@link #DDB_ALL_OPERATIONS}. + */ + public static final String DDB_ADMIN = DDB_ALL_OPERATIONS; + /** + * Permission for DDB describeTable() operation: {@value}. + * This is used during initialization. + */ + public static final String DDB_DESCRIBE_TABLE = "dynamodb:DescribeTable"; - public static final String DDB_BATCH_WRITE = "dynamodb:BatchWriteItem"; + /** + * Permission to query the DDB table: {@value}. + */ + public static final String DDB_QUERY = "dynamodb:Query"; /** - * All DynamoDB tables: {@value}. + * Permission for DDB operation to get a record: {@value}. */ - public static final String ALL_DDB_TABLES = "arn:aws:dynamodb:::*"; + public static final String DDB_GET_ITEM = "dynamodb:GetItem"; + /** + * Permission for DDB write record operation: {@value}. + */ + public static final String DDB_PUT_ITEM = "dynamodb:PutItem"; + /** + * Permission for DDB update single item operation: {@value}. + */ + public static final String DDB_UPDATE_ITEM = "dynamodb:UpdateItem"; - public static final String WILDCARD = "*"; + /** + * Permission for DDB delete operation: {@value}. + */ + public static final String DDB_DELETE_ITEM = "dynamodb:DeleteItem"; /** - * Allow all S3 Operations. + * Permission for DDB operation: {@value}. */ - public static final Statement STATEMENT_ALL_S3 = statement(true, - S3_ALL_BUCKETS, - S3_ALL_OPERATIONS); + public static final String DDB_BATCH_GET_ITEM = "dynamodb:BatchGetItem"; + + /** + * Batch write permission for DDB: {@value}. + */ + public static final String DDB_BATCH_WRITE_ITEM = "dynamodb:BatchWriteItem"; + + /** + * All DynamoDB tables: {@value}. + */ + public static final String ALL_DDB_TABLES = "arn:aws:dynamodb:*"; /** * Statement to allow all DDB access. @@ -218,11 +304,36 @@ private RolePolicies() { ALL_DDB_TABLES, DDB_ALL_OPERATIONS); /** - * Allow all S3 and S3Guard operations. + * Statement to allow all client operations needed for S3Guard, + * but none of the admin operations. + */ + public static final Statement STATEMENT_S3GUARD_CLIENT = statement(true, + ALL_DDB_TABLES, + DDB_BATCH_GET_ITEM, + DDB_BATCH_WRITE_ITEM, + DDB_DELETE_ITEM, + DDB_DESCRIBE_TABLE, + DDB_GET_ITEM, + DDB_PUT_ITEM, + DDB_QUERY, + DDB_UPDATE_ITEM + ); + + /** + * Allow all S3 Operations. + * This does not cover DDB or S3-KMS + */ + public static final Statement STATEMENT_ALL_S3 = statement(true, + S3_ALL_BUCKETS, + S3_ALL_OPERATIONS); + + /** + * Policy for all S3 and S3Guard operations, and SSE-KMS. */ public static final Policy ALLOW_S3_AND_SGUARD = policy( STATEMENT_ALL_S3, - STATEMENT_ALL_DDB + STATEMENT_ALL_DDB, + STATEMENT_ALLOW_SSE_KMS_RW ); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java new file mode 100644 index 00000000000..10bf88c61f9 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/STSClientFactory.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.auth; + +import java.io.IOException; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3AUtils; + +/** + * Factory for creating STS Clients. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class STSClientFactory { + + private static final Logger LOG = + LoggerFactory.getLogger(STSClientFactory.class); + + /** + * Create the builder ready for any final configuration options. + * Picks up connection settings from the Hadoop configuration, including + * proxy secrets. + * @param conf Configuration to act as source of options. + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @param credentials AWS credential chain to use + * @param stsEndpoint optional endpoint "https://sns.us-west-1.amazonaws.com" + * @param stsRegion the region, e.g "us-west-1" + * @return the builder to call {@code build()} + * @throws IOException problem reading proxy secrets + */ + public static AWSSecurityTokenServiceClientBuilder builder( + final Configuration conf, + final String bucket, + final AWSCredentialsProvider credentials, final String stsEndpoint, + final String stsRegion) throws IOException { + Preconditions.checkArgument(credentials != null, "No credentials"); + final AWSSecurityTokenServiceClientBuilder builder + = AWSSecurityTokenServiceClientBuilder.standard(); + final ClientConfiguration awsConf = S3AUtils.createAwsConf(conf, bucket); + builder.withClientConfiguration(awsConf); + builder.withCredentials(credentials); + if (StringUtils.isNotEmpty(stsEndpoint)) { + LOG.debug("STS Endpoint ={}", stsEndpoint); + builder.withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(stsEndpoint, stsRegion)); + } + return builder; + } + +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java index 5f1ddfa6fc2..d2501da6aad 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitter.java @@ -292,7 +292,7 @@ public String toString() { final StringBuilder sb = new StringBuilder( "AbstractS3ACommitter{"); sb.append("role=").append(role); - sb.append(", name").append(getName()); + sb.append(", name=").append(getName()); sb.append(", outputPath=").append(getOutputPath()); sb.append(", workPath=").append(workPath); sb.append('}'); @@ -532,8 +532,14 @@ protected void abortPendingUploadsInCleanup( new DurationInfo(LOG, "Aborting all pending commits under %s", dest)) { CommitOperations ops = getCommitOperations(); - List pending = ops - .listPendingUploadsUnderPath(dest); + List pending; + try { + pending = ops.listPendingUploadsUnderPath(dest); + } catch (IOException e) { + // raised if the listPendingUploads call failed. + maybeIgnore(suppressExceptions, "aborting pending uploads", e); + return; + } Tasks.foreach(pending) .executeWith(buildThreadPool(getJobContext())) .suppressExceptions(suppressExceptions) @@ -656,7 +662,7 @@ protected void maybeIgnore( } /** - * Execute an operation; maybe suppress any raised IOException. + * Log or rethrow a caught IOException. * @param suppress should raised IOEs be suppressed? * @param action action (for logging when the IOE is suppressed. * @param ex exception @@ -667,7 +673,7 @@ protected void maybeIgnore( String action, IOException ex) throws IOException { if (suppress) { - LOG.info(action, ex); + LOG.debug(action, ex); } else { throw ex; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/S3ACommitterFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/S3ACommitterFactory.java index 6b170f9ef43..36d0af187d3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/S3ACommitterFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/S3ACommitterFactory.java @@ -77,9 +77,20 @@ public PathOutputCommitter createTaskCommitter(S3AFileSystem fileSystem, AbstractS3ACommitterFactory factory = chooseCommitterFactory(fileSystem, outputPath, context.getConfiguration()); - return factory != null ? - factory.createTaskCommitter(fileSystem, outputPath, context) - : createFileOutputCommitter(outputPath, context); + if (factory != null) { + PathOutputCommitter committer = factory.createTaskCommitter( + fileSystem, outputPath, context); + LOG.info("Using committer {} to output data to {}", + (committer instanceof AbstractS3ACommitter + ? ((AbstractS3ACommitter) committer).getName() + : committer.toString()), + outputPath); + return committer; + } else { + LOG.warn("Using standard FileOutputCommitter to commit work." + + " This is slow and potentially unsafe."); + return createFileOutputCommitter(outputPath, context); + } } /** @@ -104,6 +115,7 @@ private AbstractS3ACommitterFactory chooseCommitterFactory( String name = fsConf.getTrimmed(FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE); name = taskConf.getTrimmed(FS_S3A_COMMITTER_NAME, name); + LOG.debug("Committer option is {}", name); switch (name) { case COMMITTER_NAME_FILE: factory = null; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicS3GuardCommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicS3GuardCommitter.java index c3051416fe6..c956a980641 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicS3GuardCommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/magic/MagicS3GuardCommitter.java @@ -285,4 +285,11 @@ public Path getTempTaskAttemptPath(TaskAttemptContext context) { return CommitUtilsWithMR.getTempTaskAttemptPath(context, getOutputPath()); } + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "MagicCommitter{"); + sb.append('}'); + return sb.toString(); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/DirectoryStagingCommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/DirectoryStagingCommitter.java index 3eda24fbe20..23bb06bd923 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/DirectoryStagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/DirectoryStagingCommitter.java @@ -27,13 +27,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.s3a.commit.files.SinglePendingCommit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.TaskAttemptContext; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; -import static org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants.*; /** * This commits to a directory. @@ -70,10 +68,8 @@ public void setupJob(JobContext context) throws IOException { if (getConflictResolutionMode(context, fs.getConf()) == ConflictResolution.FAIL && fs.exists(outputPath)) { - LOG.debug("Failing commit by task attempt {} to write" - + " to existing output path {}", - context.getJobID(), getOutputPath()); - throw new PathExistsException(outputPath.toString(), E_DEST_EXISTS); + throw failDestinationExists(outputPath, + "Setting job as " + getRole()); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/PartitionedStagingCommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/PartitionedStagingCommitter.java index bfaf4434d1a..b51bcb5f9c3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/PartitionedStagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/PartitionedStagingCommitter.java @@ -31,14 +31,12 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.s3a.commit.PathCommitException; import org.apache.hadoop.fs.s3a.commit.files.SinglePendingCommit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.TaskAttemptContext; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; -import static org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants.*; /** * Partitioned committer. @@ -100,11 +98,8 @@ protected int commitTaskInternal(TaskAttemptContext context, Path partitionPath = getFinalPath(partition + "/file", context).getParent(); if (fs.exists(partitionPath)) { - LOG.debug("Failing commit by task attempt {} to write" - + " to existing path {} under {}", - context.getTaskAttemptID(), partitionPath, getOutputPath()); - throw new PathExistsException(partitionPath.toString(), - E_DEST_EXISTS); + throw failDestinationExists(partitionPath, + "Committing task " + context.getTaskAttemptID()); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/Paths.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/Paths.java index d5d256aefb2..a941572f1e7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/Paths.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/Paths.java @@ -167,13 +167,15 @@ public static Path getLocalTaskAttemptTempDir(final Configuration conf, return FileSystem.getLocal(conf).makeQualified( allocator.getLocalPathForWrite(uuid, conf)); }); - } catch (ExecutionException e) { - throw new RuntimeException(e.getCause()); - } catch (UncheckedExecutionException e) { - if (e.getCause() instanceof RuntimeException) { - throw (RuntimeException) e.getCause(); + } catch (ExecutionException | UncheckedExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof RuntimeException) { + throw (RuntimeException) cause; } - throw new RuntimeException(e); + if (cause instanceof IOException) { + throw (IOException) cause; + } + throw new IOException(e); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/StagingCommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/StagingCommitter.java index 2182eaa2dd4..6d02e866edb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/StagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/StagingCommitter.java @@ -36,6 +36,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathExistsException; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.commit.AbstractS3ACommitter; import org.apache.hadoop.fs.s3a.commit.CommitConstants; @@ -500,6 +502,10 @@ public void setupJob(JobContext context) throws IOException { listAndFilter(attemptFS, wrappedJobAttemptPath, false, HIDDEN_FILE_FILTER)); + } catch (FileNotFoundException e) { + // this can mean the job was aborted early on, so don't confuse people + // with long stack traces that aren't the underlying problem. + maybeIgnore(suppressExceptions, "Pending upload directory not found", e); } catch (IOException e) { // unable to work with endpoint, if suppressing errors decide our actions maybeIgnore(suppressExceptions, "Listing pending uploads", e); @@ -565,13 +571,13 @@ protected void abortJobInternal(JobContext context, } /** - * Delete the working paths of a job. Does not attempt to clean up - * the work of the wrapped committer. + * Delete the working paths of a job. *

    *
  1. The job attempt path
  2. - *
  3. $dest/__temporary
  4. + *
  5. {@code $dest/__temporary}
  6. *
  7. the local working directory for staged files
  8. *
+ * Does not attempt to clean up the work of the wrapped committer. * @param context job context * @throws IOException IO failure */ @@ -835,6 +841,44 @@ public final ConflictResolution getConflictResolutionMode( return conflictResolution; } + /** + * Generate a {@link PathExistsException} because the destination exists. + * Lists some of the child entries first, to help diagnose the problem. + * @param path path which exists + * @param description description (usually task/job ID) + * @return an exception to throw + */ + protected PathExistsException failDestinationExists(final Path path, + final String description) { + + LOG.error("{}: Failing commit by job {} to write" + + " to existing output path {}.", + description, + getJobContext().getJobID(), path); + // List the first 10 descendants, to give some details + // on what is wrong but not overload things if there are many files. + try { + int limit = 10; + RemoteIterator lf + = getDestFS().listFiles(path, true); + LOG.info("Partial Directory listing"); + while (limit > 0 && lf.hasNext()) { + limit--; + LocatedFileStatus status = lf.next(); + LOG.info("{}: {}", + status.getPath(), + status.isDirectory() + ? " dir" + : ("file size " + status.getLen() + " bytes")); + } + } catch (IOException e) { + LOG.info("Discarding exception raised when listing {}: " + e, path); + LOG.debug("stack trace ", e); + } + return new PathExistsException(path.toString(), + description + ": " + InternalCommitterConstants.E_DEST_EXISTS); + } + /** * Get the conflict mode option string. * @param context context with the config diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java new file mode 100644 index 00000000000..a67fc4e22fc --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.s3a.Tristate; + +/** + * {@code DDBPathMetadata} wraps {@link PathMetadata} and adds the + * isAuthoritativeDir flag to provide support for authoritative directory + * listings in {@link DynamoDBMetadataStore}. + */ +public class DDBPathMetadata extends PathMetadata { + + private boolean isAuthoritativeDir; + + public DDBPathMetadata(PathMetadata pmd, boolean isAuthoritativeDir) { + super(pmd.getFileStatus(), pmd.isEmptyDirectory(), pmd.isDeleted()); + this.isAuthoritativeDir = isAuthoritativeDir; + } + + public DDBPathMetadata(PathMetadata pmd) { + super(pmd.getFileStatus(), pmd.isEmptyDirectory(), pmd.isDeleted()); + this.isAuthoritativeDir = false; + } + + public DDBPathMetadata(FileStatus fileStatus) { + super(fileStatus); + this.isAuthoritativeDir = false; + } + + public DDBPathMetadata(FileStatus fileStatus, Tristate isEmptyDir, + boolean isDeleted) { + super(fileStatus, isEmptyDir, isDeleted); + this.isAuthoritativeDir = false; + } + + public DDBPathMetadata(FileStatus fileStatus, Tristate isEmptyDir, + boolean isDeleted, boolean isAuthoritativeDir) { + super(fileStatus, isEmptyDir, isDeleted); + this.isAuthoritativeDir = isAuthoritativeDir; + } + + public boolean isAuthoritativeDir() { + return isAuthoritativeDir; + } + + public void setAuthoritativeDir(boolean authoritativeDir) { + isAuthoritativeDir = authoritativeDir; + } + + @Override + public boolean equals(Object o) { + return super.equals(o); + } + + @Override public int hashCode() { + return super.hashCode(); + } + +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java index 91e64cddf60..9e1d2f41b51 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java @@ -34,10 +34,9 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.s3a.DefaultS3ClientFactory; +import org.apache.hadoop.fs.s3a.S3AUtils; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY; -import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProviderSet; /** * Interface to create a DynamoDB client. @@ -58,10 +57,14 @@ * it will indicate an error. * * @param defaultRegion the default region of the AmazonDynamoDB client + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @param credentials credentials to use for authentication. * @return a new DynamoDB client * @throws IOException if any IO error happens */ - AmazonDynamoDB createDynamoDBClient(String defaultRegion) throws IOException; + AmazonDynamoDB createDynamoDBClient(final String defaultRegion, + final String bucket, + final AWSCredentialsProvider credentials) throws IOException; /** * The default implementation for creating an AmazonDynamoDB. @@ -69,16 +72,15 @@ class DefaultDynamoDBClientFactory extends Configured implements DynamoDBClientFactory { @Override - public AmazonDynamoDB createDynamoDBClient(String defaultRegion) + public AmazonDynamoDB createDynamoDBClient(String defaultRegion, + final String bucket, + final AWSCredentialsProvider credentials) throws IOException { Preconditions.checkNotNull(getConf(), "Should have been configured before usage"); final Configuration conf = getConf(); - final AWSCredentialsProvider credentials = - createAWSCredentialProviderSet(null, conf); - final ClientConfiguration awsConf = - DefaultS3ClientFactory.createAwsConf(conf); + final ClientConfiguration awsConf = S3AUtils.createAwsConf(conf, bucket); final String region = getRegion(conf, defaultRegion); LOG.debug("Creating DynamoDB client in region {}", region); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index 116827dd4f1..7c826c11db7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -22,18 +22,27 @@ import java.io.IOException; import java.io.InterruptedIOException; import java.net.URI; +import java.nio.file.AccessDeniedException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Date; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.Set; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; import com.amazonaws.AmazonClientException; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.document.BatchWriteItemOutcome; import com.amazonaws.services.dynamodbv2.document.DynamoDB; @@ -54,6 +63,8 @@ import com.amazonaws.services.dynamodbv2.model.ResourceInUseException; import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException; import com.amazonaws.services.dynamodbv2.model.TableDescription; +import com.amazonaws.services.dynamodbv2.model.Tag; +import com.amazonaws.services.dynamodbv2.model.TagResourceRequest; import com.amazonaws.services.dynamodbv2.model.WriteRequest; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -67,21 +78,23 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.AWSServiceThrottledException; import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3AInstrumentation; -import org.apache.hadoop.fs.s3a.S3ARetryPolicy; import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.Tristate; +import org.apache.hadoop.fs.s3a.auth.RolePolicies; import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; import static org.apache.hadoop.fs.s3a.Constants.*; -import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; +import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*; import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.*; @@ -187,10 +200,6 @@ public static final String E_INCOMPATIBLE_VERSION = "Database table is from an incompatible S3Guard version."; - /** Initial delay for retries when batched operations get throttled by - * DynamoDB. Value is {@value} msec. */ - public static final long MIN_RETRY_SLEEP_MSEC = 100; - @VisibleForTesting static final String DESCRIPTION = "S3Guard metadata store in DynamoDB"; @@ -203,17 +212,33 @@ @VisibleForTesting static final String TABLE = "table"; + @VisibleForTesting + static final String HINT_DDB_IOPS_TOO_LOW + = " This may be because the write threshold of DynamoDB is set too low."; + + @VisibleForTesting + static final String THROTTLING = "Throttling"; + private static ValueMap deleteTrackingValueMap = new ValueMap().withBoolean(":false", false); + private AmazonDynamoDB amazonDynamoDB; private DynamoDB dynamoDB; + private AWSCredentialProviderList credentials; private String region; private Table table; private String tableName; private Configuration conf; private String username; - private RetryPolicy dataAccessRetryPolicy; + /** + * This policy is mostly for batched writes, not for processing + * exceptions in invoke() calls. + * It also has a role purpose in {@link #getVersionMarkerItem()}; + * look at that method for the details. + */ + private RetryPolicy batchWriteRetryPolicy; + private S3AInstrumentation.S3GuardInstrumentation instrumentation; /** Owner FS: only valid if configured with an owner FS. */ @@ -224,8 +249,15 @@ Invoker.NO_OP ); - /** Data access can have its own policies. */ - private Invoker dataAccess; + /** Invoker for read operations. */ + private Invoker readOp; + + /** Invoker for write operations. */ + private Invoker writeOp; + + private final AtomicLong readThrottleEvents = new AtomicLong(0); + private final AtomicLong writeThrottleEvents = new AtomicLong(0); + private final AtomicLong batchWriteCapacityExceededEvents = new AtomicLong(0); /** * Total limit on the number of throttle events after which @@ -242,50 +274,74 @@ * A utility function to create DynamoDB instance. * @param conf the file system configuration * @param s3Region region of the associated S3 bucket (if any). + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @param credentials credentials. * @return DynamoDB instance. * @throws IOException I/O error. */ - private static DynamoDB createDynamoDB(Configuration conf, String s3Region) + private DynamoDB createDynamoDB( + final Configuration conf, + final String s3Region, + final String bucket, + final AWSCredentialsProvider credentials) throws IOException { - Preconditions.checkNotNull(conf); - final Class cls = conf.getClass( - S3GUARD_DDB_CLIENT_FACTORY_IMPL, - S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT, - DynamoDBClientFactory.class); - LOG.debug("Creating DynamoDB client {} with S3 region {}", cls, s3Region); - final AmazonDynamoDB dynamoDBClient = ReflectionUtils.newInstance(cls, conf) - .createDynamoDBClient(s3Region); - return new DynamoDB(dynamoDBClient); + if (amazonDynamoDB == null) { + Preconditions.checkNotNull(conf); + final Class cls = + conf.getClass(S3GUARD_DDB_CLIENT_FACTORY_IMPL, + S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT, DynamoDBClientFactory.class); + LOG.debug("Creating DynamoDB client {} with S3 region {}", cls, s3Region); + amazonDynamoDB = ReflectionUtils.newInstance(cls, conf) + .createDynamoDBClient(s3Region, bucket, credentials); + } + return new DynamoDB(amazonDynamoDB); } + /** + * {@inheritDoc}. + * The credentials for authenticating with S3 are requested from the + * FS via {@link S3AFileSystem#shareCredentials(String)}; this will + * increment the reference counter of these credentials. + * @param fs {@code S3AFileSystem} associated with the MetadataStore + * @throws IOException on a failure + */ @Override @Retries.OnceRaw public void initialize(FileSystem fs) throws IOException { Preconditions.checkNotNull(fs, "Null filesystem"); Preconditions.checkArgument(fs instanceof S3AFileSystem, "DynamoDBMetadataStore only supports S3A filesystem."); - owner = (S3AFileSystem) fs; - instrumentation = owner.getInstrumentation().getS3GuardInstrumentation(); + bindToOwnerFilesystem((S3AFileSystem) fs); final String bucket = owner.getBucket(); - conf = owner.getConf(); String confRegion = conf.getTrimmed(S3GUARD_DDB_REGION_KEY); if (!StringUtils.isEmpty(confRegion)) { region = confRegion; LOG.debug("Overriding S3 region with configured DynamoDB region: {}", region); } else { - region = owner.getBucketLocation(); + try { + region = owner.getBucketLocation(); + } catch (AccessDeniedException e) { + // access denied here == can't call getBucket. Report meaningfully + URI uri = owner.getUri(); + LOG.error("Failed to get bucket location from S3 bucket {}", + uri); + throw (IOException)new AccessDeniedException( + "S3 client role lacks permission " + + RolePolicies.S3_GET_BUCKET_LOCATION + " for " + uri) + .initCause(e); + } LOG.debug("Inferring DynamoDB region from S3 bucket: {}", region); } - username = owner.getUsername(); - dynamoDB = createDynamoDB(conf, region); + credentials = owner.shareCredentials("s3guard"); + dynamoDB = createDynamoDB(conf, region, bucket, credentials); // use the bucket as the DynamoDB table name if not specified in config tableName = conf.getTrimmed(S3GUARD_DDB_TABLE_NAME_KEY, bucket); initDataAccessRetries(conf); // set up a full retry policy - invoker = new Invoker(new S3ARetryPolicy(conf), + invoker = new Invoker(new S3GuardDataAccessRetryPolicy(conf), this::retryEvent ); @@ -294,6 +350,20 @@ public void initialize(FileSystem fs) throws IOException { instrumentation.initialized(); } + /** + * Declare that this table is owned by the specific S3A FS instance. + * This will bind some fields to the values provided by the owner, + * including wiring up the instrumentation. + * @param fs owner filesystem + */ + @VisibleForTesting + void bindToOwnerFilesystem(final S3AFileSystem fs) { + owner = fs; + conf = owner.getConf(); + instrumentation = owner.getInstrumentation().getS3GuardInstrumentation(); + username = owner.getUsername(); + } + /** * Performs one-time initialization of the metadata store via configuration. * @@ -311,6 +381,9 @@ public void initialize(FileSystem fs) throws IOException { * must declare the table name and region in the * {@link Constants#S3GUARD_DDB_TABLE_NAME_KEY} and * {@link Constants#S3GUARD_DDB_REGION_KEY} respectively. + * It also creates a new credential provider list from the configuration, + * using the base fs.s3a.* options, as there is no bucket to infer per-bucket + * settings from. * * @see #initialize(FileSystem) * @throws IOException if there is an error @@ -327,7 +400,8 @@ public void initialize(Configuration config) throws IOException { region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY); Preconditions.checkArgument(!StringUtils.isEmpty(region), "No DynamoDB region configured"); - dynamoDB = createDynamoDB(conf, region); + credentials = createAWSCredentialProviderSet(null, conf); + dynamoDB = createDynamoDB(conf, region, null, credentials); username = UserGroupInformation.getCurrentUser().getShortUserName(); initDataAccessRetries(conf); @@ -338,16 +412,23 @@ public void initialize(Configuration config) throws IOException { /** * Set retry policy. This is driven by the value of * {@link Constants#S3GUARD_DDB_MAX_RETRIES} with an exponential backoff - * between each attempt of {@link #MIN_RETRY_SLEEP_MSEC} milliseconds. + * between each attempt of {@link Constants#S3GUARD_DDB_THROTTLE_RETRY_INTERVAL} + * milliseconds. * @param config configuration for data access */ private void initDataAccessRetries(Configuration config) { - int maxRetries = config.getInt(S3GUARD_DDB_MAX_RETRIES, - S3GUARD_DDB_MAX_RETRIES_DEFAULT); - dataAccessRetryPolicy = RetryPolicies - .exponentialBackoffRetry(maxRetries, MIN_RETRY_SLEEP_MSEC, + batchWriteRetryPolicy = RetryPolicies + .exponentialBackoffRetry( + config.getInt(S3GUARD_DDB_MAX_RETRIES, + S3GUARD_DDB_MAX_RETRIES_DEFAULT), + conf.getTimeDuration(S3GUARD_DDB_THROTTLE_RETRY_INTERVAL, + S3GUARD_DDB_THROTTLE_RETRY_INTERVAL_DEFAULT, + TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS); - dataAccess = new Invoker(dataAccessRetryPolicy, this::retryEvent); + final RetryPolicy throttledRetryRetryPolicy + = new S3GuardDataAccessRetryPolicy(config); + readOp = new Invoker(throttledRetryRetryPolicy, this::readRetryEvent); + writeOp = new Invoker(throttledRetryRetryPolicy, this::writeRetryEvent); } @Override @@ -387,12 +468,18 @@ private void innerDelete(final Path path, boolean tombstone) boolean idempotent = S3AFileSystem.DELETE_CONSIDERED_IDEMPOTENT; if (tombstone) { Item item = PathMetadataDynamoDBTranslation.pathMetadataToItem( - PathMetadata.tombstone(path)); - invoker.retry("Put tombstone", path.toString(), idempotent, + new DDBPathMetadata(PathMetadata.tombstone(path))); + writeOp.retry( + "Put tombstone", + path.toString(), + idempotent, () -> table.putItem(item)); } else { PrimaryKey key = pathToKey(path); - invoker.retry("Delete key", path.toString(), idempotent, + writeOp.retry( + "Delete key", + path.toString(), + idempotent, () -> table.deleteItem(key)); } } @@ -416,28 +503,38 @@ public void deleteSubtree(Path path) throws IOException { } } - @Retries.OnceRaw - private Item getConsistentItem(PrimaryKey key) { + /** + * Get a consistent view of an item. + * @param path path to look up in the database + * @param path entry + * @return the result + * @throws IOException failure + */ + @Retries.RetryTranslated + private Item getConsistentItem(final Path path) throws IOException { + PrimaryKey key = pathToKey(path); final GetItemSpec spec = new GetItemSpec() .withPrimaryKey(key) .withConsistentRead(true); // strictly consistent read - return table.getItem(spec); + return readOp.retry("get", + path.toString(), + true, + () -> table.getItem(spec)); } @Override - @Retries.OnceTranslated - public PathMetadata get(Path path) throws IOException { + @Retries.RetryTranslated + public DDBPathMetadata get(Path path) throws IOException { return get(path, false); } @Override - @Retries.OnceTranslated - public PathMetadata get(Path path, boolean wantEmptyDirectoryFlag) + @Retries.RetryTranslated + public DDBPathMetadata get(Path path, boolean wantEmptyDirectoryFlag) throws IOException { checkPath(path); LOG.debug("Get from table {} in region {}: {}", tableName, region, path); - return Invoker.once("get", path.toString(), - () -> innerGet(path, wantEmptyDirectoryFlag)); + return innerGet(path, wantEmptyDirectoryFlag); } /** @@ -447,17 +544,17 @@ public PathMetadata get(Path path, boolean wantEmptyDirectoryFlag) * MetadataStore that it should try to compute the empty directory flag. * @return metadata for {@code path}, {@code null} if not found * @throws IOException IO problem - * @throws AmazonClientException dynamo DB level problem */ - @Retries.OnceRaw - private PathMetadata innerGet(Path path, boolean wantEmptyDirectoryFlag) + @Retries.RetryTranslated + private DDBPathMetadata innerGet(Path path, boolean wantEmptyDirectoryFlag) throws IOException { - final PathMetadata meta; + final DDBPathMetadata meta; if (path.isRoot()) { // Root does not persist in the table - meta = new PathMetadata(makeDirStatus(username, path)); + meta = + new DDBPathMetadata(makeDirStatus(username, path)); } else { - final Item item = getConsistentItem(pathToKey(path)); + final Item item = getConsistentItem(path); meta = itemToPathMetadata(item, username); LOG.debug("Get from table {} in region {} returning for {}: {}", tableName, region, path, meta); @@ -472,8 +569,10 @@ private PathMetadata innerGet(Path path, boolean wantEmptyDirectoryFlag) .withConsistentRead(true) .withFilterExpression(IS_DELETED + " = :false") .withValueMap(deleteTrackingValueMap); - final ItemCollection items = table.query(spec); - boolean hasChildren = items.iterator().hasNext(); + boolean hasChildren = readOp.retry("get/hasChildren", + path.toString(), + true, + () -> table.query(spec).iterator().hasNext()); // When this class has support for authoritative // (fully-cached) directory listings, we may also be able to answer // TRUE here. Until then, we don't know if we have full listing or @@ -500,13 +599,16 @@ private FileStatus makeDirStatus(String owner, Path path) { } @Override - @Retries.OnceTranslated + @Retries.RetryTranslated public DirListingMetadata listChildren(final Path path) throws IOException { checkPath(path); LOG.debug("Listing table {} in region {}: {}", tableName, region, path); // find the children in the table - return Invoker.once("listChildren", path.toString(), + return readOp.retry( + "listChildren", + path.toString(), + true, () -> { final QuerySpec spec = new QuerySpec() .withHashKey(pathToParentKeyAttribute(path)) @@ -515,15 +617,22 @@ public DirListingMetadata listChildren(final Path path) throws IOException { final List metas = new ArrayList<>(); for (Item item : items) { - PathMetadata meta = itemToPathMetadata(item, username); + DDBPathMetadata meta = itemToPathMetadata(item, username); metas.add(meta); } + + DDBPathMetadata dirPathMeta = get(path); + boolean isAuthoritative = false; + if(dirPathMeta != null) { + isAuthoritative = dirPathMeta.isAuthoritativeDir(); + } + LOG.trace("Listing table {} in region {} for {} returning {}", tableName, region, path, metas); - return (metas.isEmpty() && get(path) == null) + return (metas.isEmpty() && dirPathMeta == null) ? null - : new DirListingMetadata(path, metas, false); + : new DirListingMetadata(path, metas, isAuthoritative); }); } @@ -532,24 +641,25 @@ public DirListingMetadata listChildren(final Path path) throws IOException { * @param pathsToCreate paths to create * @return the full ancestry paths */ - Collection completeAncestry( - Collection pathsToCreate) { + Collection completeAncestry( + Collection pathsToCreate) { // Key on path to allow fast lookup - Map ancestry = new HashMap<>(); + Map ancestry = new HashMap<>(); - for (PathMetadata meta : pathsToCreate) { + for (DDBPathMetadata meta : pathsToCreate) { Preconditions.checkArgument(meta != null); Path path = meta.getFileStatus().getPath(); if (path.isRoot()) { break; } - ancestry.put(path, meta); + ancestry.put(path, new DDBPathMetadata(meta)); Path parent = path.getParent(); while (!parent.isRoot() && !ancestry.containsKey(parent)) { LOG.debug("auto-create ancestor path {} for child path {}", parent, path); final FileStatus status = makeDirStatus(parent, username); - ancestry.put(parent, new PathMetadata(status, Tristate.FALSE, false)); + ancestry.put(parent, new DDBPathMetadata(status, Tristate.FALSE, + false)); parent = parent.getParent(); } } @@ -557,7 +667,7 @@ public DirListingMetadata listChildren(final Path path) throws IOException { } @Override - @Retries.OnceTranslated + @Retries.RetryTranslated public void move(Collection pathsToDelete, Collection pathsToCreate) throws IOException { if (pathsToDelete == null && pathsToCreate == null) { @@ -576,35 +686,35 @@ public void move(Collection pathsToDelete, // Following code is to maintain this invariant by putting all ancestor // directories of the paths to create. // ancestor paths that are not explicitly added to paths to create - Collection newItems = new ArrayList<>(); + Collection newItems = new ArrayList<>(); if (pathsToCreate != null) { - newItems.addAll(completeAncestry(pathsToCreate)); + newItems.addAll(completeAncestry(pathMetaToDDBPathMeta(pathsToCreate))); } if (pathsToDelete != null) { for (Path meta : pathsToDelete) { - newItems.add(PathMetadata.tombstone(meta)); + newItems.add(new DDBPathMetadata(PathMetadata.tombstone(meta))); } } - Invoker.once("move", tableName, - () -> processBatchWriteRequest(null, pathMetadataToItem(newItems))); + processBatchWriteRequest(null, pathMetadataToItem(newItems)); } /** * Helper method to issue a batch write request to DynamoDB. * - * The retry logic here is limited to repeating the write operations - * until all items have been written; there is no other attempt - * at recovery/retry. Throttling is handled internally. + * As well as retrying on the operation invocation, incomplete + * batches are retried until all have been deleted. * @param keysToDelete primary keys to be deleted; can be null * @param itemsToPut new items to be put; can be null + * @return the number of iterations needed to complete the call. */ - @Retries.OnceRaw("Outstanding batch items are updated with backoff") - private void processBatchWriteRequest(PrimaryKey[] keysToDelete, + @Retries.RetryTranslated("Outstanding batch items are updated with backoff") + private int processBatchWriteRequest(PrimaryKey[] keysToDelete, Item[] itemsToPut) throws IOException { final int totalToDelete = (keysToDelete == null ? 0 : keysToDelete.length); final int totalToPut = (itemsToPut == null ? 0 : itemsToPut.length); int count = 0; + int batches = 0; while (count < totalToDelete + totalToPut) { final TableWriteItems writeItems = new TableWriteItems(tableName); int numToDelete = 0; @@ -629,34 +739,66 @@ private void processBatchWriteRequest(PrimaryKey[] keysToDelete, count += numToPut; } - BatchWriteItemOutcome res = dynamoDB.batchWriteItem(writeItems); + // if there's a retry and another process updates things then it's not + // quite idempotent, but this was the case anyway + batches++; + BatchWriteItemOutcome res = writeOp.retry( + "batch write", + "", + true, + () -> dynamoDB.batchWriteItem(writeItems)); // Check for unprocessed keys in case of exceeding provisioned throughput Map> unprocessed = res.getUnprocessedItems(); int retryCount = 0; while (!unprocessed.isEmpty()) { - retryBackoff(retryCount++); - res = dynamoDB.batchWriteItemUnprocessed(unprocessed); + batchWriteCapacityExceededEvents.incrementAndGet(); + batches++; + retryBackoffOnBatchWrite(retryCount++); + // use a different reference to keep the compiler quiet + final Map> upx = unprocessed; + res = writeOp.retry( + "batch write", + "", + true, + () -> dynamoDB.batchWriteItemUnprocessed(upx)); unprocessed = res.getUnprocessedItems(); } } + return batches; } /** * Put the current thread to sleep to implement exponential backoff * depending on retryCount. If max retries are exceeded, throws an * exception instead. + * * @param retryCount number of retries so far * @throws IOException when max retryCount is exceeded. */ - private void retryBackoff(int retryCount) throws IOException { + private void retryBackoffOnBatchWrite(int retryCount) throws IOException { try { // Our RetryPolicy ignores everything but retryCount here. - RetryPolicy.RetryAction action = dataAccessRetryPolicy.shouldRetry(null, + RetryPolicy.RetryAction action = batchWriteRetryPolicy.shouldRetry( + null, retryCount, 0, true); if (action.action == RetryPolicy.RetryAction.RetryDecision.FAIL) { - throw new IOException( - String.format("Max retries exceeded (%d) for DynamoDB", - retryCount)); + // Create an AWSServiceThrottledException, with a fake inner cause + // which we fill in to look like a real exception so + // error messages look sensible + AmazonServiceException cause = new AmazonServiceException( + "Throttling"); + cause.setServiceName("S3Guard"); + cause.setStatusCode(AWSServiceThrottledException.STATUS_CODE); + cause.setErrorCode(THROTTLING); // used in real AWS errors + cause.setErrorType(AmazonServiceException.ErrorType.Service); + cause.setErrorMessage(THROTTLING); + cause.setRequestId("n/a"); + throw new AWSServiceThrottledException( + String.format("Max retries during batch write exceeded" + + " (%d) for DynamoDB." + + HINT_DDB_IOPS_TOO_LOW, + retryCount), + cause); } else { LOG.debug("Sleeping {} msec before next retry", action.delayMillis); Thread.sleep(action.delayMillis); @@ -666,12 +808,12 @@ private void retryBackoff(int retryCount) throws IOException { } catch (IOException e) { throw e; } catch (Exception e) { - throw new IOException("Unexpected exception", e); + throw new IOException("Unexpected exception " + e, e); } } @Override - @Retries.OnceRaw + @Retries.RetryTranslated public void put(PathMetadata meta) throws IOException { // For a deeply nested path, this method will automatically create the full // ancestry and save respective item in DynamoDB table. @@ -687,9 +829,13 @@ public void put(PathMetadata meta) throws IOException { } @Override - @Retries.OnceRaw + @Retries.RetryTranslated public void put(Collection metas) throws IOException { + innerPut(pathMetaToDDBPathMeta(metas)); + } + @Retries.OnceRaw + private void innerPut(Collection metas) throws IOException { Item[] items = pathMetadataToItem(completeAncestry(metas)); LOG.debug("Saving batch of {} items to table {}, region {}", items.length, tableName, region); @@ -699,11 +845,12 @@ public void put(Collection metas) throws IOException { /** * Helper method to get full path of ancestors that are nonexistent in table. */ - @Retries.OnceRaw - private Collection fullPathsToPut(PathMetadata meta) + @VisibleForTesting + @Retries.RetryTranslated + Collection fullPathsToPut(DDBPathMetadata meta) throws IOException { checkPathMetadata(meta); - final Collection metasToPut = new ArrayList<>(); + final Collection metasToPut = new ArrayList<>(); // root path is not persisted if (!meta.getFileStatus().getPath().isRoot()) { metasToPut.add(meta); @@ -713,10 +860,11 @@ public void put(Collection metas) throws IOException { // first existent ancestor Path path = meta.getFileStatus().getPath().getParent(); while (path != null && !path.isRoot()) { - final Item item = getConsistentItem(pathToKey(path)); + final Item item = getConsistentItem(path); if (!itemExists(item)) { final FileStatus status = makeDirStatus(path, username); - metasToPut.add(new PathMetadata(status, Tristate.FALSE, false)); + metasToPut.add(new DDBPathMetadata(status, Tristate.FALSE, false, + meta.isAuthoritativeDir())); path = path.getParent(); } else { break; @@ -751,25 +899,23 @@ static FileStatus makeDirStatus(Path f, String owner) { * @throws IOException IO problem */ @Override - @Retries.OnceTranslated("retry(listFullPaths); once(batchWrite)") + @Retries.RetryTranslated public void put(DirListingMetadata meta) throws IOException { LOG.debug("Saving to table {} in region {}: {}", tableName, region, meta); // directory path Path path = meta.getPath(); - PathMetadata p = new PathMetadata(makeDirStatus(path, username), - meta.isEmpty(), false); + DDBPathMetadata ddbPathMeta = + new DDBPathMetadata(makeDirStatus(path, username), meta.isEmpty(), + false, meta.isAuthoritative()); // First add any missing ancestors... - final Collection metasToPut = invoker.retry( - "paths to put", path.toString(), true, - () -> fullPathsToPut(p)); + final Collection metasToPut = fullPathsToPut(ddbPathMeta); // next add all children of the directory - metasToPut.addAll(meta.getListing()); + metasToPut.addAll(pathMetaToDDBPathMeta(meta.getListing())); - Invoker.once("put", path.toString(), - () -> processBatchWriteRequest(null, pathMetadataToItem(metasToPut))); + processBatchWriteRequest(null, pathMetadataToItem(metasToPut)); } @Override @@ -777,15 +923,20 @@ public synchronized void close() { if (instrumentation != null) { instrumentation.storeClosed(); } - if (dynamoDB != null) { - LOG.debug("Shutting down {}", this); - dynamoDB.shutdown(); - dynamoDB = null; + try { + if (dynamoDB != null) { + LOG.debug("Shutting down {}", this); + dynamoDB.shutdown(); + dynamoDB = null; + } + } finally { + closeAutocloseables(LOG, credentials); + credentials = null; } } @Override - @Retries.OnceTranslated + @Retries.RetryTranslated public void destroy() throws IOException { if (table == null) { LOG.info("In destroy(): no table to delete"); @@ -794,10 +945,11 @@ public void destroy() throws IOException { LOG.info("Deleting DynamoDB table {} in region {}", tableName, region); Preconditions.checkNotNull(dynamoDB, "Not connected to DynamoDB"); try { - table.delete(); + invoker.retry("delete", null, true, + () -> table.delete()); table.waitForDelete(); - } catch (ResourceNotFoundException rnfe) { - LOG.info("ResourceNotFoundException while deleting DynamoDB table {} in " + } catch (FileNotFoundException rnfe) { + LOG.info("FileNotFoundException while deleting DynamoDB table {} in " + "region {}. This may indicate that the table does not exist, " + "or has been deleted by another concurrent thread or process.", tableName, region); @@ -807,53 +959,82 @@ public void destroy() throws IOException { tableName, ie); throw new InterruptedIOException("Table " + tableName + " in region " + region + " has not been deleted"); - } catch (AmazonClientException e) { - throw translateException("destroy", tableName, e); } } - @Retries.OnceRaw + @Retries.RetryTranslated private ItemCollection expiredFiles(long modTime, - String keyPrefix) { + String keyPrefix) throws IOException { String filterExpression = "mod_time < :mod_time and begins_with(parent, :parent)"; String projectionExpression = "parent,child"; ValueMap map = new ValueMap() .withLong(":mod_time", modTime) .withString(":parent", keyPrefix); - return table.scan(filterExpression, projectionExpression, null, map); + return readOp.retry( + "scan", + keyPrefix, + true, + () -> table.scan(filterExpression, projectionExpression, null, map)); } @Override - @Retries.OnceRaw("once(batchWrite)") + @Retries.RetryTranslated public void prune(long modTime) throws IOException { prune(modTime, "/"); } + /** + * Prune files, in batches. There's a sleep between each batch. + * @param modTime Oldest modification time to allow + * @param keyPrefix The prefix for the keys that should be removed + * @throws IOException Any IO/DDB failure. + * @throws InterruptedIOException if the prune was interrupted + */ @Override - @Retries.OnceRaw("once(batchWrite)") + @Retries.RetryTranslated public void prune(long modTime, String keyPrefix) throws IOException { int itemCount = 0; try { Collection deletionBatch = new ArrayList<>(S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT); - int delay = conf.getInt(S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY, - S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_DEFAULT); + long delay = conf.getTimeDuration( + S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY, + S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_DEFAULT, + TimeUnit.MILLISECONDS); + Set parentPathSet = new HashSet<>(); for (Item item : expiredFiles(modTime, keyPrefix)) { - PathMetadata md = PathMetadataDynamoDBTranslation + DDBPathMetadata md = PathMetadataDynamoDBTranslation .itemToPathMetadata(item, username); Path path = md.getFileStatus().getPath(); deletionBatch.add(path); + + // add parent path of what we remove + Path parentPath = path.getParent(); + if (parentPath != null) { + parentPathSet.add(parentPath); + } + itemCount++; if (deletionBatch.size() == S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT) { Thread.sleep(delay); processBatchWriteRequest(pathToKey(deletionBatch), null); + + // set authoritative false for each pruned dir listing + removeAuthoritativeDirFlag(parentPathSet); + parentPathSet.clear(); + deletionBatch.clear(); } } - if (deletionBatch.size() > 0) { + // final batch of deletes + if (!deletionBatch.isEmpty()) { Thread.sleep(delay); processBatchWriteRequest(pathToKey(deletionBatch), null); + + // set authoritative false for each pruned dir listing + removeAuthoritativeDirFlag(parentPathSet); + parentPathSet.clear(); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); @@ -863,6 +1044,71 @@ public void prune(long modTime, String keyPrefix) throws IOException { S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT); } + private void removeAuthoritativeDirFlag(Set pathSet) + throws IOException { + AtomicReference rIOException = new AtomicReference<>(); + + Set metas = pathSet.stream().map(path -> { + try { + DDBPathMetadata ddbPathMetadata = get(path); + if(ddbPathMetadata == null) { + return null; + } + LOG.debug("Setting false isAuthoritativeDir on {}", ddbPathMetadata); + ddbPathMetadata.setAuthoritativeDir(false); + return ddbPathMetadata; + } catch (IOException e) { + String msg = String.format("IOException while getting PathMetadata " + + "on path: %s.", path); + LOG.error(msg, e); + rIOException.set(e); + return null; + } + }).filter(Objects::nonNull).collect(Collectors.toSet()); + + try { + LOG.debug("innerPut on metas: {}", metas); + innerPut(metas); + } catch (IOException e) { + String msg = String.format("IOException while setting false " + + "authoritative directory flag on: %s.", metas); + LOG.error(msg, e); + rIOException.set(e); + } + + if (rIOException.get() != null) { + throw rIOException.get(); + } + } + + /** + * Add tags from configuration to the existing DynamoDB table. + */ + @Retries.OnceRaw + public void tagTable() { + List tags = new ArrayList<>(); + Map tagProperties = + conf.getPropsWithPrefix(S3GUARD_DDB_TABLE_TAG); + for (Map.Entry tagMapEntry : tagProperties.entrySet()) { + Tag tag = new Tag().withKey(tagMapEntry.getKey()) + .withValue(tagMapEntry.getValue()); + tags.add(tag); + } + if (tags.isEmpty()) { + return; + } + + TagResourceRequest tagResourceRequest = new TagResourceRequest() + .withResourceArn(table.getDescription().getTableArn()) + .withTags(tags); + getAmazonDynamoDB().tagResource(tagResourceRequest); + } + + @VisibleForTesting + public AmazonDynamoDB getAmazonDynamoDB() { + return amazonDynamoDB; + } + @Override public String toString() { return getClass().getSimpleName() + '{' @@ -946,19 +1192,34 @@ void initTable() throws IOException { * Get the version mark item in the existing DynamoDB table. * * As the version marker item may be created by another concurrent thread or - * process, we sleep and retry a limited times before we fail to get it. - * This does not include handling any failure other than "item not found", - * so this method is tagged as "OnceRaw" + * process, we sleep and retry a limited number times if the lookup returns + * with a null value. + * DDB throttling is always retried. */ - @Retries.OnceRaw - private Item getVersionMarkerItem() throws IOException { + @VisibleForTesting + @Retries.RetryTranslated + Item getVersionMarkerItem() throws IOException { final PrimaryKey versionMarkerKey = createVersionMarkerPrimaryKey(VERSION_MARKER); int retryCount = 0; - Item versionMarker = table.getItem(versionMarkerKey); + // look for a version marker, with usual throttling/failure retries. + Item versionMarker = queryVersionMarker(versionMarkerKey); while (versionMarker == null) { + // The marker was null. + // Two possibilities + // 1. This isn't a S3Guard table. + // 2. This is a S3Guard table in construction; another thread/process + // is about to write/actively writing the version marker. + // So that state #2 is handled, batchWriteRetryPolicy is used to manage + // retries. + // This will mean that if the cause is actually #1, failure will not + // be immediate. As this will ultimately result in a failure to + // init S3Guard and the S3A FS, this isn't going to be a performance + // bottleneck -simply a slightly slower failure report than would otherwise + // be seen. + // "if your settings are broken, performance is not your main issue" try { - RetryPolicy.RetryAction action = dataAccessRetryPolicy.shouldRetry(null, + RetryPolicy.RetryAction action = batchWriteRetryPolicy.shouldRetry(null, retryCount, 0, true); if (action.action == RetryPolicy.RetryAction.RetryDecision.FAIL) { break; @@ -967,14 +1228,29 @@ private Item getVersionMarkerItem() throws IOException { Thread.sleep(action.delayMillis); } } catch (Exception e) { - throw new IOException("initTable: Unexpected exception", e); + throw new IOException("initTable: Unexpected exception " + e, e); } retryCount++; - versionMarker = table.getItem(versionMarkerKey); + versionMarker = queryVersionMarker(versionMarkerKey); } return versionMarker; } + /** + * Issue the query to get the version marker, with throttling for overloaded + * DDB tables. + * @param versionMarkerKey key to look up + * @return the marker + * @throws IOException failure + */ + @Retries.RetryTranslated + private Item queryVersionMarker(final PrimaryKey versionMarkerKey) + throws IOException { + return readOp.retry("getVersionMarkerItem", + VERSION_MARKER, true, + () -> table.getItem(versionMarkerKey)); + } + /** * Verify that a table version is compatible with this S3Guard client. * @param tableName name of the table (for error messages) @@ -1051,6 +1327,7 @@ private void createTable(ProvisionedThroughput capacity) throws IOException { final Item marker = createVersionMarker(VERSION_MARKER, VERSION, System.currentTimeMillis()); putItem(marker); + tagTable(); } /** @@ -1059,7 +1336,7 @@ private void createTable(ProvisionedThroughput capacity) throws IOException { * @return the outcome. */ @Retries.OnceRaw - PutItemOutcome putItem(Item item) { + private PutItemOutcome putItem(Item item) { LOG.debug("Putting item {}", item); return table.putItem(item); } @@ -1106,6 +1383,11 @@ String getRegion() { return region; } + @VisibleForTesting + public String getTableName() { + return tableName; + } + @VisibleForTesting DynamoDB getDynamoDB() { return dynamoDB; @@ -1156,7 +1438,7 @@ private static void checkPathMetadata(PathMetadata meta) { map.put(WRITE_CAPACITY, throughput.getWriteCapacityUnits().toString()); map.put(TABLE, desc.toString()); map.put(MetadataStoreCapabilities.PERSISTS_AUTHORITATIVE_BIT, - Boolean.toString(false)); + Boolean.toString(true)); } else { map.put("name", "DynamoDB Metadata Store"); map.put(TABLE, "none"); @@ -1164,8 +1446,8 @@ private static void checkPathMetadata(PathMetadata meta) { } map.put("description", DESCRIPTION); map.put("region", region); - if (dataAccessRetryPolicy != null) { - map.put("retryPolicy", dataAccessRetryPolicy.toString()); + if (batchWriteRetryPolicy != null) { + map.put("retryPolicy", batchWriteRetryPolicy.toString()); } return map; } @@ -1220,6 +1502,38 @@ private long getLongParam(Map parameters, } } + /** + * Callback on a read operation retried. + * @param text text of the operation + * @param ex exception + * @param attempts number of attempts + * @param idempotent is the method idempotent (this is assumed to be true) + */ + void readRetryEvent( + String text, + IOException ex, + int attempts, + boolean idempotent) { + readThrottleEvents.incrementAndGet(); + retryEvent(text, ex, attempts, true); + } + + /** + * Callback on a write operation retried. + * @param text text of the operation + * @param ex exception + * @param attempts number of attempts + * @param idempotent is the method idempotent (this is assumed to be true) + */ + void writeRetryEvent( + String text, + IOException ex, + int attempts, + boolean idempotent) { + writeThrottleEvents.incrementAndGet(); + retryEvent(text, ex, attempts, idempotent); + } + /** * Callback from {@link Invoker} when an operation is retried. * @param text text of the operation @@ -1262,4 +1576,31 @@ void retryEvent( } } + /** + * Get the count of read throttle events. + * @return the current count of read throttle events. + */ + @VisibleForTesting + public long getReadThrottleEventCount() { + return readThrottleEvents.get(); + } + + /** + * Get the count of write throttle events. + * @return the current count of write throttle events. + */ + @VisibleForTesting + public long getWriteThrottleEventCount() { + return writeThrottleEvents.get(); + } + + @VisibleForTesting + public long getBatchWriteCapacityExceededCount() { + return batchWriteCapacityExceededEvents.get(); + } + + @VisibleForTesting + public Invoker getInvoker() { + return invoker; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java index f0ffb446238..1a7f02896c6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java @@ -24,7 +24,6 @@ import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -41,6 +40,8 @@ import java.util.Map; import java.util.concurrent.TimeUnit; +import static org.apache.hadoop.fs.s3a.Constants.*; + /** * This is a local, in-memory implementation of MetadataStore. * This is not a coherent cache across processes. It is only @@ -60,23 +61,6 @@ public class LocalMetadataStore implements MetadataStore { public static final Logger LOG = LoggerFactory.getLogger(MetadataStore.class); - public static final int DEFAULT_MAX_RECORDS = 256; - public static final int DEFAULT_CACHE_ENTRY_TTL_MSEC = 10 * 1000; - - /** - * Maximum number of records. - */ - @InterfaceStability.Evolving - public static final String CONF_MAX_RECORDS = - "fs.metadatastore.local.max_records"; - - /** - * Time to live in milliseconds. If zero, time-based expiration is - * disabled. - */ - @InterfaceStability.Evolving - public static final String CONF_CACHE_ENTRY_TTL = - "fs.metadatastore.local.ttl"; /** Contains directory and file listings. */ private Cache localCache; @@ -101,11 +85,13 @@ public void initialize(FileSystem fileSystem) throws IOException { @Override public void initialize(Configuration conf) throws IOException { Preconditions.checkNotNull(conf); - int maxRecords = conf.getInt(CONF_MAX_RECORDS, DEFAULT_MAX_RECORDS); + int maxRecords = conf.getInt(S3GUARD_METASTORE_LOCAL_MAX_RECORDS, + DEFAULT_S3GUARD_METASTORE_LOCAL_MAX_RECORDS); if (maxRecords < 4) { maxRecords = 4; } - int ttl = conf.getInt(CONF_CACHE_ENTRY_TTL, DEFAULT_CACHE_ENTRY_TTL_MSEC); + int ttl = conf.getInt(S3GUARD_METASTORE_LOCAL_ENTRY_TTL, + DEFAULT_S3GUARD_METASTORE_LOCAL_ENTRY_TTL); CacheBuilder builder = CacheBuilder.newBuilder().maximumSize(maxRecords); if (ttl >= 0) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java index 8515bfbad4d..46f406fd3ec 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java @@ -22,6 +22,8 @@ import java.net.URI; import java.util.Arrays; import java.util.Collection; +import java.util.List; +import java.util.stream.Collectors; import com.amazonaws.services.dynamodbv2.document.Item; import com.amazonaws.services.dynamodbv2.document.KeyAttribute; @@ -64,6 +66,7 @@ @VisibleForTesting static final String BLOCK_SIZE = "block_size"; static final String IS_DELETED = "is_deleted"; + static final String IS_AUTHORITATIVE = "is_authoritative"; /** Table version field {@value} in version marker item. */ @VisibleForTesting @@ -99,12 +102,27 @@ } /** - * Converts a DynamoDB item to a {@link PathMetadata}. + * Converts a DynamoDB item to a {@link DDBPathMetadata}. * * @param item DynamoDB item to convert - * @return {@code item} converted to a {@link PathMetadata} + * @return {@code item} converted to a {@link DDBPathMetadata} */ - static PathMetadata itemToPathMetadata(Item item, String username) + static DDBPathMetadata itemToPathMetadata(Item item, String username) + throws IOException { + return itemToPathMetadata(item, username, false); + } + + /** + * Converts a DynamoDB item to a {@link DDBPathMetadata}. + * Can ignore {@code IS_AUTHORITATIVE} flag if {@code ignoreIsAuthFlag} is + * true. + * + * @param item DynamoDB item to convert + * @param ignoreIsAuthFlag if true, ignore the authoritative flag on item + * @return {@code item} converted to a {@link DDBPathMetadata} + */ + static DDBPathMetadata itemToPathMetadata(Item item, String username, + boolean ignoreIsAuthFlag) throws IOException { if (item == null) { return null; @@ -125,8 +143,13 @@ static PathMetadata itemToPathMetadata(Item item, String username) Path path = new Path(parent, childStr); boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR); + boolean isAuthoritativeDir = false; final FileStatus fileStatus; if (isDir) { + if (!ignoreIsAuthFlag) { + isAuthoritativeDir = item.hasAttribute(IS_AUTHORITATIVE) + && item.getBoolean(IS_AUTHORITATIVE); + } fileStatus = DynamoDBMetadataStore.makeDirStatus(path, username); } else { long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0; @@ -138,21 +161,40 @@ static PathMetadata itemToPathMetadata(Item item, String username) boolean isDeleted = item.hasAttribute(IS_DELETED) && item.getBoolean(IS_DELETED); - return new PathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted); + return new DDBPathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted, + isAuthoritativeDir); + } + + /** + * Converts a {@link DDBPathMetadata} to a DynamoDB item. + * + * @param meta {@link DDBPathMetadata} to convert + * @return {@code meta} converted to DynamoDB item + */ + static Item pathMetadataToItem(DDBPathMetadata meta) { + return pathMetadataToItem(meta, false); } /** - * Converts a {@link PathMetadata} to a DynamoDB item. + * Converts a {@link DDBPathMetadata} to a DynamoDB item. + * + * Can ignore {@code IS_AUTHORITATIVE} flag if {@code ignoreIsAuthFlag} is + * true. * - * @param meta {@link PathMetadata} to convert + * @param meta {@link DDBPathMetadata} to convert + * @param ignoreIsAuthFlag if true, ignore the authoritative flag on item * @return {@code meta} converted to DynamoDB item */ - static Item pathMetadataToItem(PathMetadata meta) { + static Item pathMetadataToItem(DDBPathMetadata meta, + boolean ignoreIsAuthFlag) { Preconditions.checkNotNull(meta); final FileStatus status = meta.getFileStatus(); final Item item = new Item().withPrimaryKey(pathToKey(status.getPath())); if (status.isDirectory()) { item.withBoolean(IS_DIR, true); + if (!ignoreIsAuthFlag) { + item.withBoolean(IS_AUTHORITATIVE, meta.isAuthoritativeDir()); + } } else { item.withLong(FILE_LENGTH, status.getLen()) .withLong(MOD_TIME, status.getModificationTime()) @@ -214,18 +256,19 @@ static Long extractCreationTimeFromMarker(Item marker) throws IOException { } /** - * Converts a collection {@link PathMetadata} to a collection DynamoDB items. + * Converts a collection {@link DDBPathMetadata} to a collection DynamoDB + * items. * - * @see #pathMetadataToItem(PathMetadata) + * @see #pathMetadataToItem(DDBPathMetadata) */ - static Item[] pathMetadataToItem(Collection metas) { + static Item[] pathMetadataToItem(Collection metas) { if (metas == null) { return null; } final Item[] items = new Item[metas.size()]; int i = 0; - for (PathMetadata meta : metas) { + for (DDBPathMetadata meta : metas) { items[i++] = pathMetadataToItem(meta); } return items; @@ -301,4 +344,10 @@ static PrimaryKey pathToKey(Path path) { private PathMetadataDynamoDBTranslation() { } + static List pathMetaToDDBPathMeta( + Collection pathMetadatas) { + return pathMetadatas.stream().map(p -> new DDBPathMetadata(p)) + .collect(Collectors.toList()); + } + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index 19cfe1b34f1..cc55951869a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -235,6 +235,10 @@ public static S3AFileStatus putAndReturn(MetadataStore ms, changed = changed || updated; } + // If dirMeta is not authoritative, but isAuthoritative is true the + // directory metadata should be updated. Treat it as a change. + changed = changed || (!dirMeta.isAuthoritative() && isAuthoritative); + if (changed && isAuthoritative) { dirMeta.setAuthoritative(true); // This is the full directory contents ms.put(dirMeta); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardDataAccessRetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardDataAccessRetryPolicy.java new file mode 100644 index 00000000000..915b94a0b7b --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardDataAccessRetryPolicy.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3ARetryPolicy; +import org.apache.hadoop.io.retry.RetryPolicy; + +import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.io.retry.RetryPolicies.exponentialBackoffRetry; + +/** + * A Retry policy whose throttling comes from the S3Guard config options. + */ +public class S3GuardDataAccessRetryPolicy extends S3ARetryPolicy { + + public S3GuardDataAccessRetryPolicy(final Configuration conf) { + super(conf); + } + + protected RetryPolicy createThrottleRetryPolicy(final Configuration conf) { + return exponentialBackoffRetry( + conf.getInt(S3GUARD_DDB_MAX_RETRIES, S3GUARD_DDB_MAX_RETRIES_DEFAULT), + conf.getTimeDuration(S3GUARD_DDB_THROTTLE_RETRY_INTERVAL, + S3GUARD_DDB_THROTTLE_RETRY_INTERVAL_DEFAULT, + TimeUnit.MILLISECONDS), + TimeUnit.MILLISECONDS); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 19dc32a896e..13161212ac1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -118,6 +118,7 @@ public static final String REGION_FLAG = "region"; public static final String READ_FLAG = "read"; public static final String WRITE_FLAG = "write"; + public static final String TAG_FLAG = "tag"; /** * Constructor a S3Guard tool with HDFS configuration. @@ -382,6 +383,7 @@ public final int run(String[] args) throws Exception { " -" + REGION_FLAG + " REGION - Service region for connections\n" + " -" + READ_FLAG + " UNIT - Provisioned read throughput units\n" + " -" + WRITE_FLAG + " UNIT - Provisioned write through put units\n" + + " -" + TAG_FLAG + " key=value; list of tags to tag dynamo table\n" + "\n" + " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" + " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" + @@ -393,6 +395,8 @@ public final int run(String[] args) throws Exception { getCommandFormat().addOptionWithValue(READ_FLAG); // write capacity. getCommandFormat().addOptionWithValue(WRITE_FLAG); + // tag + getCommandFormat().addOptionWithValue(TAG_FLAG); } @Override @@ -420,6 +424,23 @@ public int run(String[] args, PrintStream out) throws Exception { getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, writeCapacity); } + String tags = getCommandFormat().getOptValue(TAG_FLAG); + if (tags != null && !tags.isEmpty()) { + String[] stringList = tags.split(";"); + Map tagsKV = new HashMap<>(); + for(String kv : stringList) { + if(kv.isEmpty() || !kv.contains("=")){ + continue; + } + String[] kvSplit = kv.split("="); + tagsKV.put(kvSplit[0], kvSplit[1]); + } + + for (Map.Entry kv : tagsKV.entrySet()) { + getConf().set(S3GUARD_DDB_TABLE_TAG + kv.getKey(), kv.getValue()); + } + } + // Validate parameters. try { parseDynamoDBRegion(paths); @@ -479,6 +500,20 @@ public String getUsage() { public int run(String[] args, PrintStream out) throws Exception { List paths = parseArgs(args); Map options = new HashMap<>(); + String s3Path = paths.get(0); + + // Check if DynamoDB url is set from arguments. + String metadataStoreUri = getCommandFormat().getOptValue(META_FLAG); + if(metadataStoreUri == null || metadataStoreUri.isEmpty()) { + // If not set, check if filesystem is guarded by creating an + // S3AFileSystem and check if hasMetadataStore is true + try (S3AFileSystem s3AFileSystem = (S3AFileSystem) + S3AFileSystem.newInstance(toUri(s3Path), getConf())){ + Preconditions.checkState(s3AFileSystem.hasMetadataStore(), + "The S3 bucket is unguarded. " + getName() + + " can not be used on an unguarded bucket."); + } + } String readCap = getCommandFormat().getOptValue(READ_FLAG); if (StringUtils.isNotEmpty(readCap)) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java index 9e2f34def31..60d4b764070 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java @@ -18,56 +18,57 @@ package org.apache.hadoop.fs.s3native; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Objects; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.UnsupportedEncodingException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URLDecoder; -import java.util.Objects; import static org.apache.commons.lang3.StringUtils.equalsIgnoreCase; /** * Class to aid logging in to S3 endpoints. * It is in S3N so that it can be used across all S3 filesystems. + * + * The core function of this class was the extraction and decoding of user:secret + * information from filesystems URIs. As this is no longer supported, + * its role has been reduced to checking for secrets in the URI and rejecting + * them where found. */ +@InterfaceAudience.Private +@InterfaceStability.Evolving public final class S3xLoginHelper { - private static final Logger LOG = - LoggerFactory.getLogger(S3xLoginHelper.class); private S3xLoginHelper() { } public static final String LOGIN_WARNING = "The Filesystem URI contains login details." - +" This is insecure and may be unsupported in future."; - - public static final String PLUS_WARNING = - "Secret key contains a special character that should be URL encoded! " + - "Attempting to resolve..."; - - public static final String PLUS_UNENCODED = "+"; - public static final String PLUS_ENCODED = "%2B"; + +" This authentication mechanism is no longer supported."; /** - * Build the filesystem URI. This can include stripping down of part - * of the URI. + * Build the filesystem URI. * @param uri filesystem uri * @return the URI to use as the basis for FS operation and qualifying paths. * @throws IllegalArgumentException if the URI is in some way invalid. */ public static URI buildFSURI(URI uri) { + // look for login secrets and fail if they are present. + rejectSecretsInURIs(uri); Objects.requireNonNull(uri, "null uri"); Objects.requireNonNull(uri.getScheme(), "null uri.getScheme()"); if (uri.getHost() == null && uri.getAuthority() != null) { - Objects.requireNonNull(uri.getHost(), "null uri host." + - " This can be caused by unencoded / in the password string"); + Objects.requireNonNull(uri.getHost(), "null uri host."); } Objects.requireNonNull(uri.getHost(), "null uri host."); return URI.create(uri.getScheme() + "://" + uri.getHost()); @@ -86,17 +87,14 @@ public static String toString(URI pathUri) { } /** - * Extract the login details from a URI, logging a warning if - * the URI contains these. + * Extract the login details from a URI, raising an exception if + * the URI contains them. * @param name URI of the filesystem, can be null - * @return a login tuple, possibly empty. + * @throws IllegalArgumentException if there is a secret in the URI. */ - public static Login extractLoginDetailsWithWarnings(URI name) { + public static void rejectSecretsInURIs(URI name) { Login login = extractLoginDetails(name); - if (login.hasLogin()) { - LOG.warn(LOGIN_WARNING); - } - return login; + Preconditions.checkArgument(!login.hasLogin(), LOGIN_WARNING); } /** @@ -104,43 +102,34 @@ public static Login extractLoginDetailsWithWarnings(URI name) { * @param name URI of the filesystem, may be null * @return a login tuple, possibly empty. */ - public static Login extractLoginDetails(URI name) { + @VisibleForTesting + static Login extractLoginDetails(URI name) { if (name == null) { return Login.EMPTY; } - try { - String authority = name.getAuthority(); - if (authority == null) { - return Login.EMPTY; - } - int loginIndex = authority.indexOf('@'); - if (loginIndex < 0) { - // no login - return Login.EMPTY; - } - String login = authority.substring(0, loginIndex); - int loginSplit = login.indexOf(':'); - if (loginSplit > 0) { - String user = login.substring(0, loginSplit); - String encodedPassword = login.substring(loginSplit + 1); - if (encodedPassword.contains(PLUS_UNENCODED)) { - LOG.warn(PLUS_WARNING); - encodedPassword = encodedPassword.replaceAll("\\" + PLUS_UNENCODED, - PLUS_ENCODED); - } - String password = URLDecoder.decode(encodedPassword, - "UTF-8"); - return new Login(user, password); - } else if (loginSplit == 0) { - // there is no user, just a password. In this case, there's no login - return Login.EMPTY; - } else { - return new Login(login, ""); - } - } catch (UnsupportedEncodingException e) { - // this should never happen; translate it if it does. - throw new RuntimeException(e); + String authority = name.getAuthority(); + if (authority == null) { + return Login.EMPTY; + } + int loginIndex = authority.indexOf('@'); + if (loginIndex < 0) { + // no login + return Login.EMPTY; + } + String login = authority.substring(0, loginIndex); + int loginSplit = login.indexOf(':'); + if (loginSplit > 0) { + String user = login.substring(0, loginSplit); + String encodedPassword = login.substring(loginSplit + 1); + return new Login(user, encodedPassword.isEmpty()? "": "password removed"); + } else if (loginSplit == 0) { + // there is no user, just a password. In this case, there's no login + return Login.EMPTY; + } else { + // loginSplit < 0: there is no ":". + // return a login with a null password + return new Login(login, ""); } } @@ -159,7 +148,7 @@ public static URI canonicalizeUri(URI uri, int defaultPort) { // reconstruct the uri with the default port set try { uri = new URI(uri.getScheme(), - null, + uri.getUserInfo(), uri.getHost(), defaultPort, uri.getPath(), @@ -262,10 +251,10 @@ public Login(String user, String password) { /** * Predicate to verify login details are defined. - * @return true if the username is defined (not null, not empty). + * @return true if the instance contains login information. */ public boolean hasLogin() { - return StringUtils.isNotEmpty(user); + return StringUtils.isNotEmpty(password) || StringUtils.isNotEmpty(user); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/resources/META-INF/org.apache.hadoop.fs.MultipartUploaderFactory b/hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.MultipartUploaderFactory similarity index 100% rename from hadoop-tools/hadoop-aws/src/main/resources/META-INF/org.apache.hadoop.fs.MultipartUploaderFactory rename to hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.MultipartUploaderFactory diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md index 3afd63fbd8e..8af045776c3 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md @@ -29,7 +29,7 @@ assumed roles for different buckets. *IAM Assumed Roles are unlikely to be supported by third-party systems supporting the S3 APIs.* -## Using IAM Assumed Roles +## Using IAM Assumed Roles ### Before You Begin @@ -40,6 +40,8 @@ are, how to configure their policies, etc. * You need a pair of long-lived IAM User credentials, not the root account set. * Have the AWS CLI installed, and test that it works there. * Give the role access to S3, and, if using S3Guard, to DynamoDB. +* For working with data encrypted with SSE-KMS, the role must +have access to the appropriate KMS keys. Trying to learn how IAM Assumed Roles work by debugging stack traces from the S3A client is "suboptimal". @@ -51,7 +53,7 @@ To use assumed roles, the client must be configured to use the in the configuration option `fs.s3a.aws.credentials.provider`. This AWS Credential provider will read in the `fs.s3a.assumed.role` options needed to connect to the -Session Token Service [Assumed Role API](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html), +Security Token Service [Assumed Role API](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html), first authenticating with the full credentials, then assuming the specific role specified. It will then refresh this login at the configured rate of `fs.s3a.assumed.role.session.duration` @@ -69,7 +71,7 @@ which uses `fs.s3a.access.key` and `fs.s3a.secret.key`. Note: although you can list other AWS credential providers in to the Assumed Role Credential Provider, it can only cause confusion. -### Using Assumed Roles +### Configuring Assumed Roles To use assumed roles, the S3A client credentials provider must be set to the `AssumedRoleCredentialProvider`, and `fs.s3a.assumed.role.arn` to @@ -78,7 +80,6 @@ the previously created ARN. ```xml fs.s3a.aws.credentials.provider - org.apache.hadoop.fs.s3a.AssumedRoleCredentialProvider org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider @@ -159,7 +160,18 @@ Here are the full set of configuration options. fs.s3a.assumed.role.sts.endpoint - AWS Simple Token Service Endpoint. If unset, uses the default endpoint. + AWS Security Token Service Endpoint. If unset, uses the default endpoint. + Only used if AssumedRoleCredentialProvider is the AWS credential provider. + +
+ + + fs.s3a.assumed.role.sts.endpoint.region + us-west-1 + + AWS Security Token Service Endpoint's region; + Needed if fs.s3a.assumed.role.sts.endpoint points to an endpoint + other than the default one and the v4 signature is used. Only used if AssumedRoleCredentialProvider is the AWS credential provider. @@ -194,39 +206,101 @@ These lists represent the minimum actions to which the client's principal must have in order to work with a bucket. -### Read Access Permissions +### Read Access Permissions Permissions which must be granted when reading from a bucket: -| Action | S3A operations | -|--------|----------| -| `s3:ListBucket` | `listStatus()`, `getFileStatus()` and elsewhere | -| `s3:GetObject` | `getFileStatus()`, `open()` and elsewhere | -| `s3:ListBucketMultipartUploads` | Aborting/cleaning up S3A commit operations| +``` +s3:Get* +s3:ListBucket +``` + +When using S3Guard, the client needs the appropriate +DynamoDB access permissions + +To use SSE-KMS encryption, the client needs the +SSE-KMS Permissions to access the +KMS key(s). + +### Write Access Permissions + +These permissions must all be granted for write access: + +``` +s3:Get* +s3:Delete* +s3:Put* +s3:ListBucket +s3:ListBucketMultipartUploads +s3:AbortMultipartUpload +``` + +### SSE-KMS Permissions + +When to read data encrypted using SSE-KMS, the client must have + `kms:Decrypt` permission for the specific key a file was encrypted with. + +``` +kms:Decrypt +``` + +To write data using SSE-KMS, the client must have all the following permissions. + +``` +kms:Decrypt +kms:GenerateDataKey +``` +This includes renaming: renamed files are encrypted with the encryption key +of the current S3A client; it must decrypt the source file first. -The `s3:ListBucketMultipartUploads` is only needed when committing work -via the [S3A committers](committers.html). -However, it must be granted to the root path in order to safely clean up jobs. -It is simplest to permit this in all buckets, even if it is only actually -needed when writing data. +If the caller doesn't have these permissions, the operation will fail with an +`AccessDeniedException`: the S3 Store does not provide the specifics of +the cause of the failure. +### S3Guard Permissions -### Write Access Permissions +To use S3Guard, all clients must have a subset of the +[AWS DynamoDB Permissions](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/api-permissions-reference.html). -These permissions must *also* be granted for write access: +To work with buckets protected with S3Guard, the client must have +all the following rights on the DynamoDB Table used to protect that bucket. +``` +dynamodb:BatchGetItem +dynamodb:BatchWriteItem +dynamodb:DeleteItem +dynamodb:DescribeTable +dynamodb:GetItem +dynamodb:PutItem +dynamodb:Query +dynamodb:UpdateItem +``` -| Action | S3A operations | -|--------|----------| -| `s3:PutObject` | `mkdir()`, `create()`, `rename()`, `delete()` | -| `s3:DeleteObject` | `mkdir()`, `create()`, `rename()`, `delete()` | -| `s3:AbortMultipartUpload` | S3A committer `abortJob()` and `cleanup()` operations | -| `s3:ListMultipartUploadParts` | S3A committer `abortJob()` and `cleanup()` operations | +This is true, *even if the client only has read access to the data*. +For the `hadoop s3guard` table management commands, _extra_ permissions are required: -### Mixed Permissions in a single S3 Bucket +``` +dynamodb:CreateTable +dynamodb:DescribeLimits +dynamodb:DeleteTable +dynamodb:Scan +dynamodb:TagResource +dynamodb:UntagResource +dynamodb:UpdateTable +``` + +Without these permissions, tables cannot be created, destroyed or have their IO capacity +changed through the `s3guard set-capacity` call. +The `dynamodb:Scan` permission is needed for `s3guard prune` + +The `dynamodb:CreateTable` permission is needed by a client it tries to +create the DynamoDB table on startup, that is +`fs.s3a.s3guard.ddb.table.create` is `true` and the table does not already exist. + +### Mixed Permissions in a single S3 Bucket Mixing permissions down the "directory tree" is limited only to the extent of supporting writeable directories under @@ -274,7 +348,7 @@ This example has the base bucket read only, and a directory underneath, "Action" : [ "s3:ListBucket", "s3:ListBucketMultipartUploads", - "s3:GetObject" + "s3:Get*" ], "Resource" : "arn:aws:s3:::example-bucket/*" }, { @@ -320,7 +394,7 @@ the command line before trying to use the S3A client. `hadoop fs -mkdirs -p s3a://bucket/path/p1/` -### IOException: "Unset property fs.s3a.assumed.role.arn" +### IOException: "Unset property fs.s3a.assumed.role.arn" The Assumed Role Credential Provider is enabled, but `fs.s3a.assumed.role.arn` is unset. @@ -339,7 +413,7 @@ java.io.IOException: Unset property fs.s3a.assumed.role.arn at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:474) ``` -### "Not authorized to perform sts:AssumeRole" +### "Not authorized to perform sts:AssumeRole" This can arise if the role ARN set in `fs.s3a.assumed.role.arn` is invalid or one to which the caller has no access. @@ -399,7 +473,8 @@ Caused by: com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceExc The value of `fs.s3a.assumed.role.session.duration` is out of range. ``` -java.lang.IllegalArgumentException: Assume Role session duration should be in the range of 15min - 1Hr +java.lang.IllegalArgumentException: Assume Role session duration should be in the range of 15min +- 1Hr at com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider$Builder.withRoleSessionDurationSeconds(STSAssumeRoleSessionCredentialsProvider.java:437) at org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider.(AssumedRoleCredentialProvider.java:86) ``` @@ -603,7 +678,7 @@ Caused by: com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceExc ### `AccessDeniedException/InvalidClientTokenId`: "The security token included in the request is invalid" -The credentials used to authenticate with the AWS Simple Token Service are invalid. +The credentials used to authenticate with the AWS Security Token Service are invalid. ``` [ERROR] Failures: @@ -682,26 +757,7 @@ org.apache.hadoop.fs.s3a.AWSBadRequestException: Instantiate org.apache.hadoop.f at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3354) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:474) at org.apache.hadoop.fs.Path.getFileSystem(Path.java:361) - at org.apache.hadoop.fs.s3a.ITestAssumeRole.lambda$expectFileSystemFailure$0(ITestAssumeRole.java:70) - at org.apache.hadoop.fs.s3a.ITestAssumeRole.lambda$interceptC$1(ITestAssumeRole.java:84) - at org.apache.hadoop.test.LambdaTestUtils.intercept(LambdaTestUtils.java:491) - at org.apache.hadoop.test.LambdaTestUtils.intercept(LambdaTestUtils.java:377) - at org.apache.hadoop.test.LambdaTestUtils.intercept(LambdaTestUtils.java:446) - at org.apache.hadoop.fs.s3a.ITestAssumeRole.interceptC(ITestAssumeRole.java:82) - at org.apache.hadoop.fs.s3a.ITestAssumeRole.expectFileSystemFailure(ITestAssumeRole.java:68) - at org.apache.hadoop.fs.s3a.ITestAssumeRole.testAssumeRoleBadSession(ITestAssumeRole.java:216) - at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) - at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) - at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) - at java.lang.reflect.Method.invoke(Method.java:498) - at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) - at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) - at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) - at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) - at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) - at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) - at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55) - at org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74) + Caused by: com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException: 1 validation error detected: Value 'Session Names cannot Hava Spaces!' at 'roleSessionName' failed to satisfy constraint: @@ -742,10 +798,11 @@ Caused by: com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceExc ### `java.nio.file.AccessDeniedException` within a FileSystem API call If an operation fails with an `AccessDeniedException`, then the role does not have -the permission for the S3 Operation invoked during the call +the permission for the S3 Operation invoked during the call. ``` -java.nio.file.AccessDeniedException: s3a://bucket/readonlyDir: rename(s3a://bucket/readonlyDir, s3a://bucket/renameDest) +java.nio.file.AccessDeniedException: s3a://bucket/readonlyDir: + rename(s3a://bucket/readonlyDir, s3a://bucket/renameDest) on s3a://bucket/readonlyDir: com.amazonaws.services.s3.model.AmazonS3Exception: Access Denied (Service: Amazon S3; Status Code: 403; Error Code: AccessDenied; Request ID: 2805F2ABF5246BB1; @@ -795,3 +852,33 @@ check the path for the operation. Make sure that all the read and write permissions are allowed for any bucket/path to which data is being written to, and read permissions for all buckets read from. + +If the bucket is using SSE-KMS to encrypt data: + +1. The caller must have the `kms:Decrypt` permission to read the data. +1. The caller needs `kms:Decrypt` and `kms:GenerateDataKey`. + +Without permissions, the request fails *and there is no explicit message indicating +that this is an encryption-key issue*. + +### `AccessDeniedException` + `AmazonDynamoDBException` + +``` +java.nio.file.AccessDeniedException: bucket1: + com.amazonaws.services.dynamodbv2.model.AmazonDynamoDBException: + User: arn:aws:sts::980678866538:assumed-role/s3guard-test-role/test is not authorized to perform: + dynamodb:DescribeTable on resource: arn:aws:dynamodb:us-west-1:980678866538:table/bucket1 + (Service: AmazonDynamoDBv2; Status Code: 400; +``` + +The caller is trying to access an S3 bucket which uses S3Guard, but the caller +lacks the relevant DynamoDB access permissions. + +The `dynamodb:DescribeTable` operation is the first one used in S3Guard to access, +the DynamoDB table, so it is often the first to fail. It can be a sign +that the role has no permissions at all to access the table named in the exception, +or just that this specific permission has been omitted. + +If the role policy requested for the assumed role didn't ask for any DynamoDB +permissions, this is where all attempts to work with a S3Guarded bucket will +fail. Check the value of `fs.s3a.assumed.role.policy` diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committer_architecture.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committer_architecture.md index e4ba75d98c9..3071754836c 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committer_architecture.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committer_architecture.md @@ -230,7 +230,6 @@ None: directories are created on demand. Rename task attempt path to task committed path. ```python - def needsTaskCommit(fs, jobAttemptPath, taskAttemptPath, dest): return fs.exists(taskAttemptPath) @@ -276,12 +275,12 @@ def commitJob(fs, jobAttemptDir, dest): (See below for details on `mergePaths()`) -A failure during job abort cannot be recovered from except by re-executing +A failure during job commit cannot be recovered from except by re-executing the entire query: ```python def isCommitJobRepeatable() : - return True + return False ``` Accordingly, it is a failure point in the protocol. With a low number of files @@ -307,12 +306,28 @@ def cleanupJob(fs, dest): ``` -### Job Recovery +### Job Recovery Before `commitJob()` -1. Data under task committed paths is retained -1. All directories under `$dest/_temporary/$appAttemptId/_temporary/` are deleted. +For all committers, the recovery process takes place in the application +master. +1. The job history file of the previous attempt is loaded and scanned +to determine which tasks were recorded as having succeeded. +1. For each successful task, the job committer has its `recoverTask()` method +invoked with a `TaskAttemptContext` built from the previous attempt's details. +1. If the method does not raise an exception, it is considered to have been +recovered, and not to be re-executed. +1. All other tasks are queued for execution. -Uncommitted/unexecuted tasks are (re)executed. +For the v1 committer, task recovery is straightforward. +The directory of the committed task from the previous attempt is +moved under the directory of the current application attempt. + +```python +def recoverTask(tac): + oldAttemptId = appAttemptId - 1 + fs.rename('$dest/_temporary/oldAttemptId/${tac.taskId}', + '$dest/_temporary/appAttemptId/${tac.taskId}') +``` This significantly improves time to recover from Job driver (here MR AM) failure. The only lost work is that of all tasks in progress -those which had generated @@ -330,6 +345,11 @@ failure simply by rerunning the entire job. This is implicitly the strategy in Spark, which does not attempt to recover any in-progress jobs. The faster your queries, the simpler your recovery strategy needs to be. +### Job Recovery During `commitJob()` + +This is not possible; a failure during job commit requires the entire job +to be re-executed after cleaning up the destination directory. + ### `mergePaths(FileSystem fs, FileStatus src, Path dest)` Algorithm `mergePaths()` is the core algorithm to merge data; it is somewhat confusing @@ -352,24 +372,23 @@ def mergePathsV1(fs, src, dest) : fs.delete(dest, recursive = True) fs.rename(src.getPath, dest) else : - # destination is directory, choose action on source type - if src.isDirectory : - if not toStat is None : - if not toStat.isDirectory : - # Destination exists and is not a directory - fs.delete(dest) - fs.rename(src.getPath(), dest) - else : - # Destination exists and is a directory - # merge all children under destination directory - for child in fs.listStatus(src.getPath) : - mergePathsV1(fs, child, dest + child.getName) - else : - # destination does not exist + # src is directory, choose action on dest type + if not toStat is None : + if not toStat.isDirectory : + # Destination exists and is not a directory + fs.delete(dest) fs.rename(src.getPath(), dest) + else : + # Destination exists and is a directory + # merge all children under destination directory + for child in fs.listStatus(src.getPath) : + mergePathsV1(fs, child, dest + child.getName) + else : + # destination does not exist + fs.rename(src.getPath(), dest) ``` -## v2 commit algorithm +## The v2 Commit Algorithm The v2 algorithm directly commits task output into the destination directory. @@ -506,12 +525,31 @@ Cost: `O(1)` for normal filesystems, `O(files)` for object stores. As no data is written to the destination directory, a task can be cleaned up by deleting the task attempt directory. -### v2 Job Recovery +### v2 Job Recovery Before `commitJob()` + + +Because the data has been renamed into the destination directory, all tasks +recorded as having being committed have no recovery needed at all: + +```python +def recoverTask(tac): +``` + +All active and queued tasks are scheduled for execution. + +There is a weakness here, the same one on a failure during `commitTask()`: +it is only safe to repeat a task which failed during that commit operation +if the name of all generated files are constant across all task attempts. + +If the Job AM fails while a task attempt has been instructed to commit, +and that commit is not recorded as having completed, the state of that +in-progress task is unknown...really it isn't be safe to recover the +job at this point. + -Because the data has been renamed into the destination directory, it is nominally -recoverable. However, this assumes that the number and name of generated -files are constant on retried tasks. +### v2 Job Recovery During `commitJob()` +This is straightforward: `commitJob()` is re-invoked. ## How MapReduce uses the committer in a task @@ -896,7 +934,7 @@ and metadata. POST bucket.s3.aws.com/path?uploads - An UploadId is returned + An `UploadId` is returned 1. Caller uploads one or more parts. @@ -994,7 +1032,7 @@ Task outputs are directed to the local FS by `getTaskAttemptPath` and `getWorkPa The single-directory and partitioned committers handle conflict resolution by checking whether target paths exist in S3 before uploading any data. -There are 3 conflict resolution modes, controlled by setting `fs.s3a.committer.staging.conflict-mode`: +There are three conflict resolution modes, controlled by setting `fs.s3a.committer.staging.conflict-mode`: * `fail`: Fail a task if an output directory or partition already exists. (Default) * `append`: Upload data files without checking whether directories or partitions already exist. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md index 392cde2f801..09e123d6eda 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md @@ -371,7 +371,7 @@ Put differently: start with the Directory Committer. To use an S3A committer, the property `mapreduce.outputcommitter.factory.scheme.s3a` must be set to the S3A committer factory, `org.apache.hadoop.fs.s3a.commit.staging.S3ACommitterFactory`. -This is done in `core-default.xml` +This is done in `mapred-default.xml` ```xml diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 7d0f67bb386..7ab3b0e0736 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -33,7 +33,7 @@ See also: * [Working with IAM Assumed Roles](./assumed_roles.html) * [Testing](./testing.html) -## Overview +## Overview Apache Hadoop's `hadoop-aws` module provides support for AWS integration. applications to easily use this support. @@ -88,7 +88,7 @@ maintain it. This connector is no longer available: users must migrate to the newer `s3a:` client. -## Getting Started +## Getting Started S3A depends upon two JARs, alongside `hadoop-common` and its dependencies. @@ -197,7 +197,7 @@ to safely save the output of queries directly into S3 object stores through the S3A filesystem. -### Warning #3: Object stores have differerent authorization models +### Warning #3: Object stores have different authorization models The object authorization model of S3 is much different from the file authorization model of HDFS and traditional file systems. @@ -222,13 +222,12 @@ Your AWS credentials not only pay for services, they offer read and write access to the data. Anyone with the credentials can not only read your datasets —they can delete them. -Do not inadvertently share these credentials through means such as +Do not inadvertently share these credentials through means such as: 1. Checking in to SCM any configuration files containing the secrets. 1. Logging them to a console, as they invariably end up being seen. -1. Defining filesystem URIs with the credentials in the URL, such as -`s3a://AK0010:secret@landsat-pds/`. They will end up in logs and error messages. 1. Including the secrets in bug reports. +1. Logging the `AWS_` environment variables. If you do any of these: change your credentials immediately! @@ -242,6 +241,11 @@ The client supports multiple authentication mechanisms and can be configured as which mechanisms to use, and their order of use. Custom implementations of `com.amazonaws.auth.AWSCredentialsProvider` may also be used. +*Important*: The S3A connector no longer supports username and secrets +in URLs of the form `s3a://key:secret@bucket/`. +It is near-impossible to stop those secrets being logged —which is why +a warning has been printed since Hadoop 2.8 whenever such a URL was used. + ### Authentication properties ```xml @@ -281,9 +285,8 @@ of `com.amazonaws.auth.AWSCredentialsProvider` may also be used. If unspecified, then the default list of credential provider classes, queried in sequence, is: - 1. org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider: supports - static configuration of AWS access key ID and secret access key. - See also fs.s3a.access.key and fs.s3a.secret.key. + 1. org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider: + Uses the values of fs.s3a.access.key and fs.s3a.secret.key. 2. com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports configuration of AWS access key ID and secret access key in environment variables named AWS_ACCESS_KEY_ID and @@ -340,8 +343,6 @@ properties in the configuration file. The S3A client follows the following authentication chain: -1. If login details were provided in the filesystem URI, a warning is printed -and then the username and password extracted for the AWS key and secret respectively. 1. The `fs.s3a.access.key` and `fs.s3a.secret.key` are looked for in the Hadoop XML configuration. 1. The [AWS environment variables](http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-environment), @@ -461,12 +462,11 @@ security and therefore is unsuitable for most use cases. then the Anonymous Credential provider *must* come last. If not, credential providers listed after it will be ignored. -*Simple name/secret credentials with `SimpleAWSCredentialsProvider`* +### Simple name/secret credentials with `SimpleAWSCredentialsProvider`* -This is is the standard credential provider, which -supports the secret key in `fs.s3a.access.key` and token in `fs.s3a.secret.key` -values. It does not support authentication with logins credentials declared -in the URLs. +This is is the standard credential provider, which supports the secret +key in `fs.s3a.access.key` and token in `fs.s3a.secret.key` +values. ```xml @@ -475,9 +475,7 @@ in the URLs. ``` -Apart from its lack of support of user:password details being included in filesystem -URLs (a dangerous practise that is strongly discouraged), this provider acts -exactly at the basic authenticator used in the default authentication chain. +This is the basic authenticator used in the default authentication chain. This means that the default S3A authentication chain can be defined as @@ -1698,6 +1696,6 @@ as configured by the value `fs.s3a.multipart.size`. To disable checksum verification in `distcp`, use the `-skipcrccheck` option: ```bash -hadoop distcp -update -skipcrccheck /user/alice/datasets s3a://alice-backup/datasets +hadoop distcp -update -skipcrccheck -numListstatusThreads 40 /user/alice/datasets s3a://alice-backup/datasets ``` diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index 60d26e26cd0..a8c8d6cd2cd 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -100,7 +100,52 @@ More settings will may be added in the future. Currently the only Metadata Store-independent setting, besides the implementation class above, is the *allow authoritative* flag. -It is recommended that you leave the default setting here: +The _authoritative_ expression in S3Guard is present in two different layers, for +two different reasons: + +* Authoritative S3Guard + * S3Guard can be set as authoritative, which means that an S3A client will + avoid round-trips to S3 when **getting directory listings** if there is a fully + cached version of the directory stored in metadata store. + * This mode can be set as a configuration property + `fs.s3a.metadatastore.authoritative` + * All interactions with the S3 bucket(s) must be through S3A clients sharing + the same metadata store. + * This is independent from which metadata store implementation is used. + +* Authoritative directory listings (isAuthoritative bit) + * Tells if the stored directory listing metadata is complete. + * This is set by the FileSystem client (e.g. s3a) via the `DirListingMetadata` + class (`org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata`). + (The MetadataStore only knows what the FS client tells it.) + * If set to `TRUE`, we know that the directory listing + (`DirListingMetadata`) is full, and complete. + * If set to `FALSE` the listing may not be complete. + * Metadata store may persist the isAuthoritative bit on the metadata store. + * Currently `org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore` and + `org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore` implementation + supports authoritative bit. + +More on Authoritative S3Guard: + +* It is not treating the MetadataStore (e.g. dynamodb) as the source of truth + in general. +* It is the ability to short-circuit S3 list objects and serve listings from +the MetadataStore in some circumstances. +* For S3A to skip S3's list objects on some path, and serve it directly from +the MetadataStore, the following things must all be true: + 1. The MetadataStore implementation persists the bit + `DirListingMetadata.isAuthorititative` set when calling + `MetadataStore#put` (`DirListingMetadata`) + 1. The S3A client is configured to allow metadatastore to be authoritative + source of a directory listing (`fs.s3a.metadatastore.authoritative=true`). + 1. The MetadataStore has a **full listing for path** stored in it. This only + happens if the FS client (s3a) explicitly has stored a full directory + listing with `DirListingMetadata.isAuthorititative=true` before the said + listing request happens. + +This configuration only enables authoritative mode in the client layer. It is +recommended that you leave the default setting here: ```xml @@ -109,9 +154,8 @@ It is recommended that you leave the default setting here: ``` -Setting this to `true` is currently an experimental feature. When true, the -S3A client will avoid round-trips to S3 when getting directory listings, if -there is a fully-cached version of the directory stored in the Metadata Store. +Note that a MetadataStore MAY persist this bit. (Not MUST). +Setting this to `true` is currently an experimental feature. Note that if this is set to true, it may exacerbate or persist existing race conditions around multiple concurrent modifications and listings of a given @@ -254,8 +298,9 @@ rates. ``` -Attempting to perform more IO than the capacity requested simply throttles the -IO; small capacity numbers are recommended when initially experimenting +Attempting to perform more IO than the capacity requested throttles the +IO, and may result in operations failing. Larger IO capacities cost more. +We recommending using small read and write capacities when initially experimenting with S3Guard. ## Authenticating with S3Guard @@ -283,7 +328,7 @@ to the options `fs.s3a.KEY` *for that bucket only*. As an example, here is a configuration to use different metadata stores and tables for different buckets -First, we define shortcuts for the metadata store classnames +First, we define shortcuts for the metadata store classnames: ```xml @@ -299,7 +344,7 @@ First, we define shortcuts for the metadata store classnames ``` Next, Amazon's public landsat database is configured with no -metadata store +metadata store: ```xml @@ -311,7 +356,7 @@ metadata store ``` Next the `ireland-2` and `ireland-offline` buckets are configured with -DynamoDB as the store, and a shared table `production-table` +DynamoDB as the store, and a shared table `production-table`: ```xml @@ -371,6 +416,13 @@ pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb [-write PROVISIONED_WRITES] [-read PROVISIONED_READS] ``` +Tag argument can be added with a key=value list of tags. The table for the +metadata store will be created with these tags in DynamoDB. + +```bash +[-tag key=value;] +``` + Example 1 ```bash @@ -390,6 +442,14 @@ hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1 Creates a table "ireland-team" in the region "eu-west-1.amazonaws.com" +Example 3 + +```bash +hadoop s3guard init -meta dynamodb://ireland-team -tag tag1=first;tag2=second; +``` + +Creates a table "ireland-team" with tags "first" and "second". + ### Import a bucket: `s3guard import` ```bash @@ -657,10 +717,10 @@ Metadata Store Diagnostics: ``` After the update, the table status changes to `UPDATING`; this is a sign that -the capacity has been changed +the capacity has been changed. Repeating the same command will not change the capacity, as both read and -write values match that already in use +write values match that already in use. ``` 2017-08-30 16:24:35,337 [main] INFO s3guard.DynamoDBMetadataStore (DynamoDBMetadataStore.java:updateParameters(1090)) - Table capacity unchanged at read: 20, write: 20 @@ -677,6 +737,9 @@ Metadata Store Diagnostics: write-capacity=20 ``` +*Note*: There is a limit to how many times in a 24 hour period the capacity +of a bucket can be changed, either through this command or the AWS console. + ## Debugging and Error Handling If you run into network connectivity issues, or have a machine failure in the @@ -758,6 +821,97 @@ are only made after successful file creation, deletion and rename, the store is *unlikely* to get out of sync, it is still something which merits more testing before it could be considered reliable. +## Managing DynamoDB IO Capacity + +DynamoDB is not only billed on use (data and IO requests), it is billed +on allocated IO Capacity. + +When an application makes more requests than +the allocated capacity permits, the request is rejected; it is up to +the calling application to detect when it is being so throttled and +react. S3Guard does this, but as a result: when the client is being +throttled, operations are slower. This capacity throttling is averaged +over a few minutes: a briefly overloaded table will not be throttled, +but the rate cannot be sustained. + +The load on a table isvisible in the AWS console: go to the +DynamoDB page for the table and select the "metrics" tab. +If the graphs of throttled read or write +requests show that a lot of throttling has taken place, then there is not +enough allocated capacity for the applications making use of the table. + +Similarly, if the capacity graphs show that the read or write loads are +low compared to the allocated capacities, then the table *may* be overprovisioned +for the current workload. + +The S3Guard connector to DynamoDB can be configured to make +multiple attempts to repeat a throttled request, with an exponential +backoff between them. + +The relevant settings for managing retries in the connector are: + +```xml + + + fs.s3a.s3guard.ddb.max.retries + 9 + + Max retries on throttled/incompleted DynamoDB operations + before giving up and throwing an IOException. + Each retry is delayed with an exponential + backoff timer which starts at 100 milliseconds and approximately + doubles each time. The minimum wait before throwing an exception is + sum(100, 200, 400, 800, .. 100*2^N-1 ) == 100 * ((2^N)-1) + + + + + fs.s3a.s3guard.ddb.throttle.retry.interval + 100ms + + Initial interval to retry after a request is throttled events; + the back-off policy is exponential until the number of retries of + fs.s3a.s3guard.ddb.max.retries is reached. + + + + + fs.s3a.s3guard.ddb.background.sleep + 25ms + + Length (in milliseconds) of pause between each batch of deletes when + pruning metadata. Prevents prune operations (which can typically be low + priority background operations) from overly interfering with other I/O + operations. + + +``` + +Having a large value for `fs.s3a.s3guard.ddb.max.retries` will ensure +that clients of an overloaded table will not fail immediately. However +queries may be unexpectedly slow. + +If operations, especially directory operations, are slow, check the AWS +console. It is also possible to set up AWS alerts for capacity limits +being exceeded. + +[DynamoDB Auto Scaling](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AutoScaling.html) +can automatically increase and decrease the allocated capacity. +This is good for keeping capacity high when needed, but avoiding large +bills when it is not. + +Experiments with S3Guard and DynamoDB Auto Scaling have shown that any Auto Scaling +operation will only take place after callers have been throttled for a period of +time. The clients will still need to be configured to retry when overloaded +until any extra capacity is allocated. Furthermore, as this retrying will +block the threads from performing other operations -including more IO, the +the autoscale may not scale fast enough. + +We recommend experimenting with this, based on usage information collected +from previous days, and and choosing a combination of +retry counts and an interval which allow for the clients to cope with +some throttling, but not to time out other applications. + ## Troubleshooting ### Error: `S3Guard table lacks version marker.` @@ -798,12 +952,49 @@ or the configuration is preventing S3Guard from finding the table. region as the bucket being used. 1. Create the table if necessary. + ### Error `"The level of configured provisioned throughput for the table was exceeded"` +``` +org.apache.hadoop.fs.s3a.AWSServiceThrottledException: listFiles on s3a://bucket/10/d1/d2/d3: +com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputExceededException: +The level of configured provisioned throughput for the table was exceeded. +Consider increasing your provisioning level with the UpdateTable API. +(Service: AmazonDynamoDBv2; Status Code: 400; +Error Code: ProvisionedThroughputExceededException; +``` The IO load of clients of the (shared) DynamoDB table was exceeded. -Currently S3Guard doesn't do any throttling and retries here; the way to address -this is to increase capacity via the AWS console or the `set-capacity` command. +1. Increase the capacity of the DynamoDB table. +1. Increase the retry count and/or sleep time of S3Guard on throttle events. +1. Enable capacity autoscaling for the table in the AWS console. + +### Error `Max retries exceeded` + +The I/O load of clients of the (shared) DynamoDB table was exceeded, and +the number of attempts to retry the operation exceeded the configured amount. + +1. Increase the capacity of the DynamoDB table. +1. Increase the retry count and/or sleep time of S3Guard on throttle events. +1. Enable capacity autoscaling for the table in the AWS console. + + +### Error when running `set-capacity`: `org.apache.hadoop.fs.s3a.AWSServiceThrottledException: ProvisionTable` + +``` +org.apache.hadoop.fs.s3a.AWSServiceThrottledException: ProvisionTable on s3guard-example: +com.amazonaws.services.dynamodbv2.model.LimitExceededException: +Subscriber limit exceeded: Provisioned throughput decreases are limited within a given UTC day. +After the first 4 decreases, each subsequent decrease in the same UTC day can be performed at most once every 3600 seconds. +Number of decreases today: 6. +Last decrease at Wednesday, July 25, 2018 8:48:14 PM UTC. +Next decrease can be made at Wednesday, July 25, 2018 9:48:14 PM UTC +``` + +There's are limit on how often you can change the capacity of an DynamoDB table; +if you call set-capacity too often, it fails. Wait until the after the time indicated +and try again. + ## Other Topics diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 77ad0d00358..31d3a5f3af8 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -742,6 +742,54 @@ sequential one afterwards. The IO heavy ones must also be subclasses of This is invaluable for debugging test failures. +### Keeping AWS Costs down + +Most of the base S3 tests are designed to use public AWS data +(the landsat-pds bucket) for read IO, so you don't have to pay for bytes +downloaded or long term storage costs. The scale tests do work with more data +so will cost more as well as generally take more time to execute. + +You are however billed for + +1. Data left in S3 after test runs. +2. DynamoDB capacity reserved by S3Guard tables. +3. HTTP operations on files (HEAD, LIST, GET). +4. In-progress multipart uploads from bulk IO or S3A committer tests. +5. Encryption/decryption using AWS KMS keys. + +The GET/decrypt costs are incurred on each partial read of a file, +so random IO can cost more than sequential IO; the speedup of queries with +columnar data usually justifies this. + +The DynamoDB costs come from the number of entries stores and the allocated capacity. + +How to keep costs down + +* Don't run the scale tests with large datasets; keep `fs.s3a.scale.test.huge.filesize` unset, or a few MB (minimum: 5). +* Remove all files in the filesystem. The root tests usually do this, but +it can be manually done: + + hadoop fs -rm -r -f -skipTrash s3a://test-bucket/\* +* Abort all outstanding uploads: + + hadoop s3guard uploads -abort -force s3a://test-bucket/ +* If you don't need it, destroy the S3Guard DDB table. + + hadoop s3guard destroy s3a://hwdev-steve-ireland-new/ + +The S3Guard tests will automatically create the Dynamo DB table in runs with +`-Ds3guard -Ddynamodb` set; default capacity of these buckets +tests is very small; it keeps costs down at the expense of IO performance +and, for test runs in or near the S3/DDB stores, throttling events. + +If you want to manage capacity, use `s3guard set-capacity` to increase it +(performance) or decrease it (costs). +For remote `hadoop-aws` test runs, the read/write capacities of "10" each should suffice; +increase it if parallel test run logs warn of throttling. + +Tip: for agility, use DynamoDB autoscaling, setting the minimum to something very low (e.g 5 units), the maximum to the largest amount you are willing to pay. +This will automatically reduce capacity when you are not running tests against +the bucket, slowly increase it over multiple test runs, if the load justifies it. ## Tips @@ -985,9 +1033,13 @@ are included in the scale tests executed when `-Dscale` is passed to the maven command line. The two S3Guard scale tests are `ITestDynamoDBMetadataStoreScale` and -`ITestLocalMetadataStoreScale`. To run the DynamoDB test, you will need to -define your table name and region in your test configuration. For example, -the following settings allow us to run `ITestDynamoDBMetadataStoreScale` with +`ITestLocalMetadataStoreScale`. + +To run these tests, your DynamoDB table needs to be of limited capacity; +the values in `ITestDynamoDBMetadataStoreScale` currently require a read capacity +of 10 or less. a write capacity of 15 or more. + +The following settings allow us to run `ITestDynamoDBMetadataStoreScale` with artificially low read and write capacity provisioned, so we can judge the effects of being throttled by the DynamoDB service: @@ -1008,24 +1060,49 @@ effects of being throttled by the DynamoDB service: fs.s3a.s3guard.ddb.table my-scale-test - - fs.s3a.s3guard.ddb.region - us-west-2 - fs.s3a.s3guard.ddb.table.create true fs.s3a.s3guard.ddb.table.capacity.read - 10 + 5 fs.s3a.s3guard.ddb.table.capacity.write - 10 + 5 ``` +These tests verify that the invoked operations can trigger retries in the +S3Guard code, rather than just in the AWS SDK level, so showing that if +SDK operations fail, they get retried. They also verify that the filesystem +statistics are updated to record that throttling took place. + +*Do not panic if these tests fail to detect throttling!* + +These tests are unreliable as they need certain conditions to be met +to repeatedly fail: + +1. You must have a low-enough latency connection to the DynamoDB store that, +for the capacity allocated, you can overload it. +1. The AWS Console can give you a view of what is happening here. +1. Running a single test on its own is less likely to trigger an overload +than trying to run the whole test suite. +1. And running the test suite more than once, back-to-back, can also help +overload the cluster. +1. Stepping through with a debugger will reduce load, so may not trigger +failures. + +If the tests fail, it *probably* just means you aren't putting enough load +on the table. + +These tests do not verify that the entire set of DynamoDB calls made +during the use of a S3Guarded S3A filesystem are wrapped by retry logic. + +*The best way to verify resilience is to run the entire `hadoop-aws` test suite, +or even a real application, with throttling enabled. + ### Testing only: Local Metadata Store There is an in-memory Metadata Store for testing. @@ -1081,3 +1158,160 @@ thorough test, by switching to the credentials provider. The usual credentials needed to log in to the bucket will be used, but now the credentials used to interact with S3 and DynamoDB will be temporary role credentials, rather than the full credentials. + +## Qualifying an AWS SDK Update + +Updating the AWS SDK is something which does need to be done regularly, +but is rarely without complications, major or minor. + +Assume that the version of the SDK will remain constant for an X.Y release, +excluding security fixes, so it's good to have an update before each release +— as long as that update works doesn't trigger any regressions. + + +1. Don't make this a last minute action. +1. The upgrade patch should focus purely on the SDK update, so it can be cherry +picked and reverted easily. +1. Do not mix in an SDK update with any other piece of work, for the same reason. +1. Plan for an afternoon's work, including before/after testing, log analysis +and any manual tests. +1. Make sure all the integration tests are running (including s3guard, ARN, encryption, scale) + *before you start the upgrade*. +1. Create a JIRA for updating the SDK. Don't include the version (yet), +as it may take a couple of SDK updates before it is ready. +1. Identify the latest AWS SDK [available for download](https://aws.amazon.com/sdk-for-java/). +1. Create a private git branch of trunk for JIRA, and in + `hadoop-project/pom.xml` update the `aws-java-sdk.version` to the new SDK version. +1. Do a clean build and rerun all the `hadoop-aws` tests, with and without the `-Ds3guard -Ddynamodb` options. + This includes the `-Pscale` set, with a role defined for the assumed role tests. + in `fs.s3a.assumed.role.arn` for testing assumed roles, + and `fs.s3a.server-side-encryption.key` for encryption, for full coverage. + If you can, scale up the scale tests. +1. Create the site with `mvn site -DskipTests`; look in `target/site` for the report. +1. Review *every single `-output.txt` file in `hadoop-tools/hadoop-aws/target/failsafe-reports`, + paying particular attention to + `org.apache.hadoop.fs.s3a.scale.ITestS3AInputStreamPerformance-output.txt`, + as that is where changes in stream close/abort logic will surface. +1. Run `mvn install` to install the artifacts, then in + `hadoop-cloud-storage-project/hadoop-cloud-storage` run + `mvn dependency:tree -Dverbose > target/dependencies.txt`. + Examine the `target/dependencies.txt` file to verify that no new + artifacts have unintentionally been declared as dependencies + of the shaded `aws-java-sdk-bundle` artifact. + +### Basic command line regression testing + +We need a run through of the CLI to see if there have been changes there +which cause problems, especially whether new log messages have surfaced, +or whether some packaging change breaks that CLI + +From the root of the project, create a command line release `mvn package -Pdist -DskipTests -Dmaven.javadoc.skip=true -DskipShade`; + +1. Change into the `hadoop/dist/target/hadoop-x.y.z-SNAPSHOT` dir. +1. Copy a `core-site.xml` file into `etc/hadoop`. +1. Set the `HADOOP_OPTIONAL_TOOLS` env var on the command line or `~/.hadoop-env`. + +```bash +export HADOOP_OPTIONAL_TOOLS="hadoop-aws" +``` + +Run some basic s3guard commands as well as file operations. + +```bash +export BUCKET=s3a://example-bucket-name + +bin/hadoop s3guard bucket-info $BUCKET +bin/hadoop s3guard set-capacity $BUCKET +bin/hadoop s3guard set-capacity -read 15 -write 15 $BUCKET +bin/hadoop s3guard uploads $BUCKET +bin/hadoop s3guard diff $BUCKET/ +bin/hadoop s3guard prune -minutes 10 $BUCKET/ +bin/hadoop s3guard import $BUCKET/ +bin/hadoop fs -ls $BUCKET/ +bin/hadoop fs -ls $BUCKET/file +bin/hadoop fs -rm -R -f $BUCKET/dir-no-trailing +bin/hadoop fs -rm -R -f $BUCKET/dir-trailing/ +bin/hadoop fs -rm $BUCKET/ +bin/hadoop fs -touchz $BUCKET/file +# expect I/O error as root dir is not empty +bin/hadoop fs -rm -r $BUCKET/ +bin/hadoop fs -rm -r $BUCKET/\* +# now success +bin/hadoop fs -rm -r $BUCKET/ + +bin/hadoop fs -mkdir $BUCKET/dir-no-trailing +# fails with S3Guard +bin/hadoop fs -mkdir $BUCKET/dir-trailing/ +bin/hadoop fs -touchz $BUCKET/file +bin/hadoop fs -ls $BUCKET/ +bin/hadoop fs -mv $BUCKET/file $BUCKET/file2 +# expect "No such file or directory" +bin/hadoop fs -stat $BUCKET/file +bin/hadoop fs -stat $BUCKET/file2 +bin/hadoop fs -mv $BUCKET/file2 $BUCKET/dir-no-trailing +bin/hadoop fs -stat $BUCKET/dir-no-trailing/file2 +# treated the same as the file stat +bin/hadoop fs -stat $BUCKET/dir-no-trailing/file2/ +bin/hadoop fs -ls $BUCKET/dir-no-trailing/file2/ +bin/hadoop fs -ls $BUCKET/dir-no-trailing +# expect a "0" here: +bin/hadoop fs -test -d $BUCKET/dir-no-trailing && echo $? +# expect a "1" here: +bin/hadoop fs -test -d $BUCKET/dir-no-trailing/file2 && echo $? +# will return NONE unless bucket has checksums enabled +bin/hadoop fs -checksum $BUCKET/dir-no-trailing/file2 +# expect "etag" + a long string +bin/hadoop fs -D fs.s3a.etag.checksum.enabled=true -checksum $BUCKET/dir-no-trailing/file2 +``` + +### Other tests + +* Whatever applications you have which use S3A: build and run them before the upgrade, +Then see if complete successfully in roughly the same time once the upgrade is applied. +* Test any third-party endpoints you have access to. +* Try different regions (especially a v4 only region), and encryption settings. +* Any performance tests you have can identify slowdowns, which can be a sign + of changed behavior in the SDK (especially on stream reads and writes). +* If you can, try to test in an environment where a proxy is needed to talk +to AWS services. +* Try and get other people, especially anyone with their own endpoints, + apps or different deployment environments, to run their own tests. + +### Dealing with Deprecated APIs and New Features + +A Jenkins run should tell you if there are new deprecations. +If so, you should think about how to deal with them. + +Moving to methods and APIs which weren't in the previous SDK release makes it +harder to roll back if there is a problem; but there may be good reasons +for the deprecation. + +At the same time, there may be good reasons for staying with the old code. + +* AWS have embraced the builder pattern for new operations; note that objects +constructed this way often have their (existing) setter methods disabled; this +may break existing code. +* New versions of S3 calls (list v2, bucket existence checks, bulk operations) +may be better than the previous HTTP operations & APIs, but they may not work with +third-party endpoints, so can only be adopted if made optional, which then +adds a new configuration option (with docs, testing, ...). A change like that +must be done in its own patch, with its new tests which compare the old +vs new operations. + +### Committing the patch + +When the patch is committed: update the JIRA to the version number actually +used; use that title in the commit message. + +Be prepared to roll-back, re-iterate or code your way out of a regression. + +There may be some problem which surfaces with wider use, which can get +fixed in a new AWS release, rolling back to an older one, +or just worked around [HADOOP-14596](https://issues.apache.org/jira/browse/HADOOP-14596). + +Don't be surprised if this happens, don't worry too much, and, +while that rollback option is there to be used, ideally try to work forwards. + +If the problem is with the SDK, file issues with the + [AWS SDK Bug tracker](https://github.com/aws/aws-sdk-java/issues). +If the problem can be fixed or worked around in the Hadoop code, do it there too. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 66ee8a3287c..da3e0da8c1b 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -149,19 +149,22 @@ credentials, through a command such as: Note the trailing "/" here; without that the shell thinks you are trying to list your home directory under the bucket, which will only exist if explicitly created. -Attempting to list a bucket using inline credentials is a -means of verifying that the key and secret can access a bucket; - - hadoop fs -ls s3a://key:secret@my-bucket/ - -Do escape any `+` or `/` symbols in the secret, as discussed below, and never -share the URL, logs generated using it, or use such an inline authentication -mechanism in production. - Finally, if you set the environment variables, you can take advantage of S3A's support of environment-variable authentication by attempting the same ls operation. That is: unset the `fs.s3a` secrets and rely on the environment variables. + +### Authentication failure "The Filesystem URI contains login details." + +``` +The Filesystem URI contains login details. This authentication mechanism is no longer supported. +``` + +The S3A connector no longer supports the dangerously insecure mechanism of +passing login details within the S3A URLs. + +Fix: use a more secure mechanism to pass down the secrets. + ### Authentication failure due to clock skew The timestamp is used in signing to S3, so as to @@ -173,29 +176,6 @@ read requests are allowed, but operations which write to the bucket are denied. Check the system clock. -### Authentication failure when using URLs with embedded secrets - -If using the (strongly discouraged) mechanism of including the -AWS Key and secret in a URL, then both "+" and "/" symbols need -to encoded in the URL. As many AWS secrets include these characters, -encoding problems are not uncommon. - -| symbol | encoded value| -|-----------|-------------| -| `+` | `%2B` | -| `/` | `%2F` | - - -As an example, a URL for `bucket` with AWS ID `user1` and secret `a+b/c` would -be represented as - -``` -s3a://user1:a%2Bb%2Fc@bucket/ -``` - -This technique is only needed when placing secrets in the URL. Again, -this is something users are strongly advised against using. - ### "Bad Request" exception when working with AWS S3 Frankfurt, Seoul, or other "V4" endpoint diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractGetFileStatus.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractGetFileStatus.java index c2dc700a0f2..638a22786b8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractGetFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractGetFileStatus.java @@ -41,7 +41,7 @@ protected AbstractFSContract createContract(Configuration conf) { @Override public void teardown() throws Exception { - getLog().info("FS details {}", getFileSystem()); + getLogger().info("FS details {}", getFileSystem()); super.teardown(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMultipartUploader.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMultipartUploader.java new file mode 100644 index 00000000000..d28f39bafdf --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMultipartUploader.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.contract.s3a; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.AbstractContractMultipartUploaderTest; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.WriteOperationHelper; + +import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; +import static org.apache.hadoop.fs.s3a.scale.AbstractSTestS3AHugeFiles.DEFAULT_HUGE_PARTITION_SIZE; + +/** + * Test MultipartUploader with S3A. + */ +public class ITestS3AContractMultipartUploader extends + AbstractContractMultipartUploaderTest { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestS3AContractMultipartUploader.class); + + private int partitionSize; + + /** + * S3 requires a minimum part size of 5MB (except the last part). + * @return 5MB + */ + @Override + protected int partSizeInBytes() { + return partitionSize; + } + + @Override + protected int getTestPayloadCount() { + return 3; + } + + @Override + public S3AFileSystem getFileSystem() { + return (S3AFileSystem) super.getFileSystem(); + } + + /** + * Create a configuration, possibly patching in S3Guard options. + * @return a configuration + */ + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + maybeEnableS3Guard(conf); + return conf; + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new S3AContract(conf); + } + + @Override + public void setup() throws Exception { + super.setup(); + Configuration conf = getContract().getConf(); + boolean enabled = getTestPropertyBool( + conf, + KEY_SCALE_TESTS_ENABLED, + DEFAULT_SCALE_TESTS_ENABLED); + assume("Scale test disabled: to enable set property " + + KEY_SCALE_TESTS_ENABLED, + enabled); + partitionSize = (int) getTestPropertyBytes(conf, + KEY_HUGE_PARTITION_SIZE, + DEFAULT_HUGE_PARTITION_SIZE); + } + + /** + * Extend superclass teardown with actions to help clean up the S3 store, + * including aborting uploads under the test path. + */ + @Override + public void teardown() throws Exception { + Path teardown = path("teardown").getParent(); + S3AFileSystem fs = getFileSystem(); + WriteOperationHelper helper = fs.getWriteOperationHelper(); + try { + LOG.info("Teardown: aborting outstanding uploads under {}", teardown); + int count = helper.abortMultipartUploadsUnderPath(fs.pathToKey(teardown)); + LOG.info("Found {} incomplete uploads", count); + } catch (IOException e) { + LOG.warn("IOE in teardown", e); + } + super.teardown(); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index 660123379f4..267646ca258 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -33,9 +33,7 @@ import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.InstanceProfileCredentialsProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -147,23 +145,6 @@ public void testBadCredentials() throws Exception { } } - static class GoodCredentialsProvider extends AWSCredentialsProviderChain { - - @SuppressWarnings("unused") - public GoodCredentialsProvider(Configuration conf) { - super(new BasicAWSCredentialsProvider(conf.get(ACCESS_KEY), - conf.get(SECRET_KEY)), - InstanceProfileCredentialsProvider.getInstance()); - } - } - - @Test - public void testGoodProvider() throws Exception { - Configuration conf = new Configuration(); - conf.set(AWS_CREDENTIALS_PROVIDER, GoodCredentialsProvider.class.getName()); - S3ATestUtils.createTestFileSystem(conf); - } - @Test public void testAnonymousProvider() throws Exception { Configuration conf = new Configuration(); @@ -178,4 +159,5 @@ public void testAnonymousProvider() throws Exception { assertNotNull(stat); assertEquals(testFile, stat.getPath()); } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index aa6b5d86596..423003b915c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -36,14 +36,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.hadoop.fs.s3a.S3ATestConstants.TEST_FS_S3A_NAME; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - import java.io.File; import java.net.URI; import java.security.PrivilegedExceptionAction; @@ -60,9 +52,12 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.fs.s3a.S3ATestConstants.TEST_FS_S3A_NAME; +import static org.junit.Assert.*; /** - * S3A tests for configuration. + * S3A tests for configuration, especially credentials. */ public class ITestS3AConfiguration { private static final String EXAMPLE_ID = "AKASOMEACCESSKEY"; @@ -134,12 +129,26 @@ public void testProxyConnection() throws Exception { conf.setInt(Constants.PROXY_PORT, 1); String proxy = conf.get(Constants.PROXY_HOST) + ":" + conf.get(Constants.PROXY_PORT); - try { - fs = S3ATestUtils.createTestFileSystem(conf); - fail("Expected a connection error for proxy server at " + proxy); - } catch (AWSClientIOException e) { - // expected - } + expectFSCreateFailure(AWSClientIOException.class, + conf, "when using proxy " + proxy); + } + + /** + * Expect a filesystem to not be created from a configuration + * @return the exception intercepted + * @throws Exception any other exception + */ + private E expectFSCreateFailure( + Class clazz, + Configuration conf, + String text) + throws Exception { + + return intercept(clazz, + () -> { + fs = S3ATestUtils.createTestFileSystem(conf); + return "expected failure creating FS " + text + " got " + fs; + }); } @Test @@ -148,15 +157,13 @@ public void testProxyPortWithoutHost() throws Exception { conf.unset(Constants.PROXY_HOST); conf.setInt(Constants.MAX_ERROR_RETRIES, 2); conf.setInt(Constants.PROXY_PORT, 1); - try { - fs = S3ATestUtils.createTestFileSystem(conf); - fail("Expected a proxy configuration error"); - } catch (IllegalArgumentException e) { - String msg = e.toString(); - if (!msg.contains(Constants.PROXY_HOST) && - !msg.contains(Constants.PROXY_PORT)) { - throw e; - } + IllegalArgumentException e = expectFSCreateFailure( + IllegalArgumentException.class, + conf, "Expected a connection error for proxy server"); + String msg = e.toString(); + if (!msg.contains(Constants.PROXY_HOST) && + !msg.contains(Constants.PROXY_PORT)) { + throw e; } } @@ -167,19 +174,11 @@ public void testAutomaticProxyPortSelection() throws Exception { conf.setInt(Constants.MAX_ERROR_RETRIES, 2); conf.set(Constants.PROXY_HOST, "127.0.0.1"); conf.set(Constants.SECURE_CONNECTIONS, "true"); - try { - fs = S3ATestUtils.createTestFileSystem(conf); - fail("Expected a connection error for proxy server"); - } catch (AWSClientIOException e) { - // expected - } + expectFSCreateFailure(AWSClientIOException.class, + conf, "Expected a connection error for proxy server"); conf.set(Constants.SECURE_CONNECTIONS, "false"); - try { - fs = S3ATestUtils.createTestFileSystem(conf); - fail("Expected a connection error for proxy server"); - } catch (AWSClientIOException e) { - // expected - } + expectFSCreateFailure(AWSClientIOException.class, + conf, "Expected a connection error for proxy server"); } @Test @@ -189,31 +188,31 @@ public void testUsernameInconsistentWithPassword() throws Exception { conf.set(Constants.PROXY_HOST, "127.0.0.1"); conf.setInt(Constants.PROXY_PORT, 1); conf.set(Constants.PROXY_USERNAME, "user"); - try { - fs = S3ATestUtils.createTestFileSystem(conf); - fail("Expected a connection error for proxy server"); - } catch (IllegalArgumentException e) { - String msg = e.toString(); - if (!msg.contains(Constants.PROXY_USERNAME) && - !msg.contains(Constants.PROXY_PASSWORD)) { - throw e; - } + IllegalArgumentException e = expectFSCreateFailure( + IllegalArgumentException.class, + conf, "Expected a connection error for proxy server"); + assertIsProxyUsernameError(e); + } + + private void assertIsProxyUsernameError(final IllegalArgumentException e) { + String msg = e.toString(); + if (!msg.contains(Constants.PROXY_USERNAME) && + !msg.contains(Constants.PROXY_PASSWORD)) { + throw e; } + } + + @Test + public void testUsernameInconsistentWithPassword2() throws Exception { conf = new Configuration(); conf.setInt(Constants.MAX_ERROR_RETRIES, 2); conf.set(Constants.PROXY_HOST, "127.0.0.1"); conf.setInt(Constants.PROXY_PORT, 1); conf.set(Constants.PROXY_PASSWORD, "password"); - try { - fs = S3ATestUtils.createTestFileSystem(conf); - fail("Expected a connection error for proxy server"); - } catch (IllegalArgumentException e) { - String msg = e.toString(); - if (!msg.contains(Constants.PROXY_USERNAME) && - !msg.contains(Constants.PROXY_PASSWORD)) { - throw e; - } - } + IllegalArgumentException e = expectFSCreateFailure( + IllegalArgumentException.class, + conf, "Expected a connection error for proxy server"); + assertIsProxyUsernameError(e); } @Test @@ -246,47 +245,6 @@ void provisionAccessKeys(final Configuration conf) throws Exception { provider.flush(); } - @Test - public void testCredsFromUserInfo() throws Exception { - // set up conf to have a cred provider - final Configuration conf = new Configuration(); - final File file = tempDir.newFile("test.jks"); - final URI jks = ProviderUtils.nestURIForLocalJavaKeyStoreProvider( - file.toURI()); - conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, - jks.toString()); - - provisionAccessKeys(conf); - - conf.set(Constants.ACCESS_KEY, EXAMPLE_ID + "LJM"); - URI uriWithUserInfo = new URI("s3a://123:456@foobar"); - S3xLoginHelper.Login creds = - S3AUtils.getAWSAccessKeys(uriWithUserInfo, conf); - assertEquals("AccessKey incorrect.", "123", creds.getUser()); - assertEquals("SecretKey incorrect.", "456", creds.getPassword()); - } - - @Test - public void testIDFromUserInfoSecretFromCredentialProvider() - throws Exception { - // set up conf to have a cred provider - final Configuration conf = new Configuration(); - final File file = tempDir.newFile("test.jks"); - final URI jks = ProviderUtils.nestURIForLocalJavaKeyStoreProvider( - file.toURI()); - conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, - jks.toString()); - - provisionAccessKeys(conf); - - conf.set(Constants.ACCESS_KEY, EXAMPLE_ID + "LJM"); - URI uriWithUserInfo = new URI("s3a://123@foobar"); - S3xLoginHelper.Login creds = - S3AUtils.getAWSAccessKeys(uriWithUserInfo, conf); - assertEquals("AccessKey incorrect.", "123", creds.getUser()); - assertEquals("SecretKey incorrect.", EXAMPLE_KEY, creds.getPassword()); - } - @Test public void testSecretFromCredentialProviderIDFromConfig() throws Exception { // set up conf to have a cred provider @@ -359,11 +317,11 @@ public void testExcludingS3ACredentialProvider() throws Exception { provisionAccessKeys(c); conf.set(Constants.ACCESS_KEY, EXAMPLE_ID + "LJM"); - URI uriWithUserInfo = new URI("s3a://123:456@foobar"); + URI uri2 = new URI("s3a://foobar"); S3xLoginHelper.Login creds = - S3AUtils.getAWSAccessKeys(uriWithUserInfo, conf); - assertEquals("AccessKey incorrect.", "123", creds.getUser()); - assertEquals("SecretKey incorrect.", "456", creds.getPassword()); + S3AUtils.getAWSAccessKeys(uri2, conf); + assertEquals("AccessKey incorrect.", EXAMPLE_ID, creds.getUser()); + assertEquals("SecretKey incorrect.", EXAMPLE_KEY, creds.getPassword()); } @@ -393,7 +351,7 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() // Catch/pass standard path style access behaviour when live bucket // isn't in the same region as the s3 client default. See // http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html - assertEquals(e.getStatusCode(), HttpStatus.SC_MOVED_PERMANENTLY); + assertEquals(HttpStatus.SC_MOVED_PERMANENTLY, e.getStatusCode()); } } @@ -428,8 +386,16 @@ public void testCustomUserAgent() throws Exception { public void testCloseIdempotent() throws Throwable { conf = new Configuration(); fs = S3ATestUtils.createTestFileSystem(conf); + AWSCredentialProviderList credentials = + fs.shareCredentials("testCloseIdempotent"); + credentials.close(); fs.close(); + assertTrue("Closing FS didn't close credentials " + credentials, + credentials.isClosed()); + assertEquals("refcount not zero in " + credentials, 0, credentials.getRefCount()); fs.close(); + // and the numbers should not change + assertEquals("refcount not zero in " + credentials, 0, credentials.getRefCount()); } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContractGetFileStatusV1List.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContractGetFileStatusV1List.java index 527533682f8..42e905e416d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContractGetFileStatusV1List.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContractGetFileStatusV1List.java @@ -41,7 +41,7 @@ protected AbstractFSContract createContract(Configuration conf) { @Override public void teardown() throws Exception { - getLog().info("FS details {}", getFileSystem()); + getLogger().info("FS details {}", getFileSystem()); super.teardown(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACredentialsInURL.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACredentialsInURL.java deleted file mode 100644 index 4ee0fcb1f9e..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACredentialsInURL.java +++ /dev/null @@ -1,164 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.test.LambdaTestUtils; - -import org.junit.After; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.Timeout; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.URI; -import java.net.URLEncoder; -import java.nio.file.AccessDeniedException; - -import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; -import static org.apache.hadoop.fs.s3a.S3ATestConstants.TEST_FS_S3A_NAME; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeS3GuardState; - -/** - * Tests that credentials can go into the URL. This includes a valid - * set, and a check that an invalid set do at least get stripped out - * of the final URI - */ -public class ITestS3ACredentialsInURL extends Assert { - private S3AFileSystem fs; - private static final Logger LOG = - LoggerFactory.getLogger(ITestS3ACredentialsInURL.class); - @Rule - public Timeout testTimeout = new Timeout(30 * 60 * 1000); - - @After - public void teardown() { - IOUtils.closeStream(fs); - } - - /** - * Test instantiation. - * @throws Throwable - */ - @Test - public void testInstantiateFromURL() throws Throwable { - - Configuration conf = new Configuration(); - - // Skip in the case of S3Guard with DynamoDB because it cannot get - // credentials for its own use if they're only in S3 URLs - assumeS3GuardState(false, conf); - - String accessKey = conf.get(Constants.ACCESS_KEY); - String secretKey = conf.get(Constants.SECRET_KEY); - String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, ""); - Assume.assumeNotNull(fsname, accessKey, secretKey); - URI original = new URI(fsname); - URI secretsURI = createUriWithEmbeddedSecrets(original, - accessKey, secretKey); - if (secretKey.contains("/")) { - assertTrue("test URI encodes the / symbol", secretsURI.toString(). - contains("%252F")); - } - if (secretKey.contains("+")) { - assertTrue("test URI encodes the + symbol", secretsURI.toString(). - contains("%252B")); - } - assertFalse("Does not contain secrets", original.equals(secretsURI)); - - conf.set(TEST_FS_S3A_NAME, secretsURI.toString()); - conf.unset(Constants.ACCESS_KEY); - conf.unset(Constants.SECRET_KEY); - fs = S3ATestUtils.createTestFileSystem(conf); - - String fsURI = fs.getUri().toString(); - assertFalse("FS URI contains a @ symbol", fsURI.contains("@")); - assertFalse("FS URI contains a % symbol", fsURI.contains("%")); - if (!original.toString().startsWith(fsURI)) { - fail("Filesystem URI does not match original"); - } - validate("original path", new Path(original)); - validate("bare path", new Path("/")); - validate("secrets path", new Path(secretsURI)); - } - - private void validate(String text, Path path) throws IOException { - try { - fs.canonicalizeUri(path.toUri()); - fs.checkPath(path); - assertTrue(text + " Not a directory", - fs.getFileStatus(new Path("/")).isDirectory()); - fs.globStatus(path); - } catch (AssertionError e) { - throw e; - } catch (Exception e) { - LOG.debug("{} failure: {}", text, e, e); - fail(text + " Test failed"); - } - } - - /** - * Set up some invalid credentials, verify login is rejected. - */ - @Test - public void testInvalidCredentialsFail() throws Throwable { - Configuration conf = new Configuration(); - // use the default credential provider chain - conf.unset(AWS_CREDENTIALS_PROVIDER); - String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, ""); - Assume.assumeNotNull(fsname); - assumeS3GuardState(false, conf); - URI original = new URI(fsname); - URI testURI = createUriWithEmbeddedSecrets(original, "user", "//"); - - conf.set(TEST_FS_S3A_NAME, testURI.toString()); - LambdaTestUtils.intercept(AccessDeniedException.class, - () -> { - fs = S3ATestUtils.createTestFileSystem(conf); - return fs.getFileStatus(new Path("/")); - }); - } - - private URI createUriWithEmbeddedSecrets(URI original, - String accessKey, - String secretKey) throws UnsupportedEncodingException { - String encodedSecretKey = URLEncoder.encode(secretKey, "UTF-8"); - String formattedString = String.format("%s://%s:%s@%s/%s/", - original.getScheme(), - accessKey, - encodedSecretKey, - original.getHost(), - original.getPath()); - URI testURI; - try { - testURI = new Path(formattedString).toUri(); - } catch (IllegalArgumentException e) { - // inner cause is stripped to keep any secrets out of stack traces - throw new IllegalArgumentException("Could not encode Path"); - } - return testURI; - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java index 27af23aa0cc..46d6ffc85e0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java @@ -54,6 +54,11 @@ private void nameThread() { Thread.currentThread().setName("JUnit-" + methodName.getMethodName()); } + @Override + protected int getGlobalTimeout() { + return S3ATestConstants.S3A_TEST_TIMEOUT; + } + @Before public void setUp() throws Exception { nameThread(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java index 44a2beb0ea5..afc4086344f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java @@ -19,15 +19,14 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; -import java.net.URI; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient; +import com.amazonaws.services.securitytoken.AWSSecurityTokenService; +import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; import com.amazonaws.services.securitytoken.model.GetSessionTokenRequest; import com.amazonaws.services.securitytoken.model.GetSessionTokenResult; import com.amazonaws.services.securitytoken.model.Credentials; -import org.apache.hadoop.fs.s3native.S3xLoginHelper; +import org.apache.hadoop.fs.s3a.auth.STSClientFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.test.LambdaTestUtils; @@ -55,6 +54,14 @@ private static final long TEST_FILE_SIZE = 1024; + private AWSCredentialProviderList credentials; + + @Override + public void teardown() throws Exception { + S3AUtils.closeAutocloseables(LOG, credentials); + super.teardown(); + } + /** * Test use of STS for requesting temporary credentials. * @@ -63,7 +70,7 @@ * S3A tests to request temporary credentials, then attempt to use those * credentials instead. * - * @throws IOException + * @throws IOException failure */ @Test public void testSTS() throws IOException { @@ -71,21 +78,20 @@ public void testSTS() throws IOException { if (!conf.getBoolean(TEST_STS_ENABLED, true)) { skip("STS functional tests disabled"); } - - S3xLoginHelper.Login login = S3AUtils.getAWSAccessKeys( - URI.create("s3a://foobar"), conf); - if (!login.hasLogin()) { - skip("testSTS disabled because AWS credentials not configured"); - } - AWSCredentialsProvider parentCredentials = new BasicAWSCredentialsProvider( - login.getUser(), login.getPassword()); - - String stsEndpoint = conf.getTrimmed(TEST_STS_ENDPOINT, ""); - AWSSecurityTokenServiceClient stsClient; - stsClient = new AWSSecurityTokenServiceClient(parentCredentials); - if (!stsEndpoint.isEmpty()) { - LOG.debug("STS Endpoint ={}", stsEndpoint); - stsClient.setEndpoint(stsEndpoint); + S3AFileSystem testFS = getFileSystem(); + credentials = testFS.shareCredentials("testSTS"); + + String bucket = testFS.getBucket(); + AWSSecurityTokenServiceClientBuilder builder = STSClientFactory.builder( + conf, + bucket, + credentials, + conf.getTrimmed(TEST_STS_ENDPOINT, ""), ""); + AWSSecurityTokenService stsClient = builder.build(); + + if (!conf.getTrimmed(TEST_STS_ENDPOINT, "").isEmpty()) { + LOG.debug("STS Endpoint ={}", conf.getTrimmed(TEST_STS_ENDPOINT, "")); + stsClient.setEndpoint(conf.getTrimmed(TEST_STS_ENDPOINT, "")); } GetSessionTokenRequest sessionTokenRequest = new GetSessionTokenRequest(); sessionTokenRequest.setDurationSeconds(900); @@ -93,23 +99,28 @@ public void testSTS() throws IOException { sessionTokenResult = stsClient.getSessionToken(sessionTokenRequest); Credentials sessionCreds = sessionTokenResult.getCredentials(); - String childAccessKey = sessionCreds.getAccessKeyId(); - conf.set(ACCESS_KEY, childAccessKey); - String childSecretKey = sessionCreds.getSecretAccessKey(); - conf.set(SECRET_KEY, childSecretKey); - String sessionToken = sessionCreds.getSessionToken(); - conf.set(SESSION_TOKEN, sessionToken); + // clone configuration so changes here do not affect the base FS. + Configuration conf2 = new Configuration(conf); + S3AUtils.clearBucketOption(conf2, bucket, AWS_CREDENTIALS_PROVIDER); + S3AUtils.clearBucketOption(conf2, bucket, ACCESS_KEY); + S3AUtils.clearBucketOption(conf2, bucket, SECRET_KEY); + S3AUtils.clearBucketOption(conf2, bucket, SESSION_TOKEN); + + conf2.set(ACCESS_KEY, sessionCreds.getAccessKeyId()); + conf2.set(SECRET_KEY, sessionCreds.getSecretAccessKey()); + conf2.set(SESSION_TOKEN, sessionCreds.getSessionToken()); - conf.set(AWS_CREDENTIALS_PROVIDER, PROVIDER_CLASS); + conf2.set(AWS_CREDENTIALS_PROVIDER, PROVIDER_CLASS); - try(S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf)) { + // with valid credentials, we can set properties. + try(S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf2)) { createAndVerifyFile(fs, path("testSTS"), TEST_FILE_SIZE); } // now create an invalid set of credentials by changing the session // token - conf.set(SESSION_TOKEN, "invalid-" + sessionToken); - try (S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf)) { + conf2.set(SESSION_TOKEN, "invalid-" + sessionCreds.getSessionToken()); + try (S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf2)) { createAndVerifyFile(fs, path("testSTSInvalidToken"), TEST_FILE_SIZE); fail("Expected an access exception, but file access to " + fs.getUri() + " was allowed: " + fs); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index 763819b2a4e..a1df1a5fb52 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -20,6 +20,7 @@ import com.amazonaws.services.s3.model.ListObjectsV2Request; import com.amazonaws.services.s3.model.ListObjectsV2Result; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -28,6 +29,7 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.s3a.S3AContract; + import org.junit.Assume; import org.junit.Test; @@ -37,6 +39,7 @@ import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.stream.Collectors; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile; @@ -71,7 +74,9 @@ protected AbstractFSContract createContract(Configuration conf) { // Other configs would break test assumptions conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING); conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f); - conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC); + // this is a long value to guarantee that the inconsistency holds + // even over long-haul connections, and in the debugger too/ + conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, (long) (60 * 1000)); return new S3AContract(conf); } @@ -524,37 +529,60 @@ public void testInconsistentS3ClientDeletes() throws Throwable { ListObjectsV2Result postDeleteDelimited = listObjectsV2(fs, key, "/"); ListObjectsV2Result postDeleteUndelimited = listObjectsV2(fs, key, null); - - assertEquals("InconsistentAmazonS3Client added back objects incorrectly " + + assertListSizeEqual( + "InconsistentAmazonS3Client added back objects incorrectly " + "in a non-recursive listing", - preDeleteDelimited.getObjectSummaries().size(), - postDeleteDelimited.getObjectSummaries().size() - ); - assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " + + preDeleteDelimited.getObjectSummaries(), + postDeleteDelimited.getObjectSummaries()); + + assertListSizeEqual("InconsistentAmazonS3Client added back prefixes incorrectly " + "in a non-recursive listing", - preDeleteDelimited.getCommonPrefixes().size(), - postDeleteDelimited.getCommonPrefixes().size() + preDeleteDelimited.getCommonPrefixes(), + postDeleteDelimited.getCommonPrefixes() ); - assertEquals("InconsistentAmazonS3Client added back objects incorrectly " + + assertListSizeEqual("InconsistentAmazonS3Client added back objects incorrectly " + "in a recursive listing", - preDeleteUndelimited.getObjectSummaries().size(), - postDeleteUndelimited.getObjectSummaries().size() + preDeleteUndelimited.getObjectSummaries(), + postDeleteUndelimited.getObjectSummaries() ); - assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " + + + assertListSizeEqual("InconsistentAmazonS3Client added back prefixes incorrectly " + "in a recursive listing", - preDeleteUndelimited.getCommonPrefixes().size(), - postDeleteUndelimited.getCommonPrefixes().size() + preDeleteUndelimited.getCommonPrefixes(), + postDeleteUndelimited.getCommonPrefixes() ); } /** - * retrying v2 list. - * @param fs - * @param key - * @param delimiter - * @return + * Assert that the two list sizes match; failure message includes the lists. + * @param message text for the assertion + * @param expected expected list + * @param actual actual list + * @param type of list + */ + private void assertListSizeEqual(String message, + List expected, + List actual) { + String leftContents = expected.stream() + .map(n -> n.toString()) + .collect(Collectors.joining("\n")); + String rightContents = actual.stream() + .map(n -> n.toString()) + .collect(Collectors.joining("\n")); + String summary = "\nExpected:" + leftContents + + "\n-----------\nActual:" + rightContents; + assertEquals(message + summary, expected.size(), actual.size()); + } + + /** + * Retrying v2 list directly through the s3 client. + * @param fs filesystem + * @param key key to list under + * @param delimiter any delimiter + * @return the listing * @throws IOException on error */ + @Retries.RetryRaw private ListObjectsV2Result listObjectsV2(S3AFileSystem fs, String key, String delimiter) throws IOException { ListObjectsV2Request k = fs.createListObjectsRequest(key, delimiter) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java index c8a54b82ed4..d5cd4d4d9a0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java @@ -65,11 +65,12 @@ public void testListStatusWriteBack() throws Exception { // delete the existing directory (in case of last test failure) noS3Guard.delete(directory, true); // Create a directory on S3 only - noS3Guard.mkdirs(new Path(directory, "OnS3")); + Path onS3 = new Path(directory, "OnS3"); + noS3Guard.mkdirs(onS3); // Create a directory on both S3 and metadata store - Path p = new Path(directory, "OnS3AndMS"); - ContractTestUtils.assertPathDoesNotExist(noWriteBack, "path", p); - noWriteBack.mkdirs(p); + Path onS3AndMS = new Path(directory, "OnS3AndMS"); + ContractTestUtils.assertPathDoesNotExist(noWriteBack, "path", onS3AndMS); + noWriteBack.mkdirs(onS3AndMS); FileStatus[] fsResults; DirListingMetadata mdResults; @@ -83,6 +84,8 @@ public void testListStatusWriteBack() throws Exception { // Metadata store without write-back should still only contain /OnS3AndMS, // because newly discovered /OnS3 is not written back to metadata store mdResults = noWriteBack.getMetadataStore().listChildren(directory); + assertNotNull("No results from noWriteBack listChildren " + directory, + mdResults); assertEquals("Metadata store without write back should still only know " + "about /OnS3AndMS, but it has: " + mdResults, 1, mdResults.numEntries()); @@ -102,8 +105,7 @@ public void testListStatusWriteBack() throws Exception { // If we don't clean this up, the next test run will fail because it will // have recorded /OnS3 being deleted even after it's written to noS3Guard. - getFileSystem().getMetadataStore().forgetMetadata( - new Path(directory, "OnS3")); + getFileSystem().getMetadataStore().forgetMetadata(onS3); } /** @@ -118,26 +120,33 @@ private S3AFileSystem createTestFS(URI fsURI, boolean disableS3Guard, // Create a FileSystem that is S3-backed only conf = createConfiguration(); - S3ATestUtils.disableFilesystemCaching(conf); String host = fsURI.getHost(); - if (disableS3Guard) { - conf.set(Constants.S3_METADATA_STORE_IMPL, - Constants.S3GUARD_METASTORE_NULL); - S3AUtils.setBucketOption(conf, host, - S3_METADATA_STORE_IMPL, - S3GUARD_METASTORE_NULL); - } else { - S3ATestUtils.maybeEnableS3Guard(conf); - conf.setBoolean(METADATASTORE_AUTHORITATIVE, authoritativeMeta); - S3AUtils.setBucketOption(conf, host, - METADATASTORE_AUTHORITATIVE, - Boolean.toString(authoritativeMeta)); - S3AUtils.setBucketOption(conf, host, - S3_METADATA_STORE_IMPL, - conf.get(S3_METADATA_STORE_IMPL)); + String metastore; + + metastore = S3GUARD_METASTORE_NULL; + if (!disableS3Guard) { + // pick up the metadata store used by the main test + metastore = getFileSystem().getConf().get(S3_METADATA_STORE_IMPL); + assertNotEquals(S3GUARD_METASTORE_NULL, metastore); } - FileSystem fs = FileSystem.get(fsURI, conf); - return asS3AFS(fs); + + conf.set(Constants.S3_METADATA_STORE_IMPL, metastore); + conf.setBoolean(METADATASTORE_AUTHORITATIVE, authoritativeMeta); + S3AUtils.setBucketOption(conf, host, + METADATASTORE_AUTHORITATIVE, + Boolean.toString(authoritativeMeta)); + S3AUtils.setBucketOption(conf, host, + S3_METADATA_STORE_IMPL, metastore); + + S3AFileSystem fs = asS3AFS(FileSystem.newInstance(fsURI, conf)); + // do a check to verify that everything got through + assertEquals("Metadata store should have been disabled: " + fs, + disableS3Guard, !fs.hasMetadataStore()); + assertEquals("metastore option did not propagate", + metastore, fs.getConf().get(S3_METADATA_STORE_IMPL)); + + return fs; + } private static S3AFileSystem asS3AFS(FileSystem fs) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index b746bfe5cc2..dbf228d4c7f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -23,6 +23,7 @@ import java.net.URI; import java.util.ArrayList; +import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.MultipartUploadListing; import com.amazonaws.services.s3.model.Region; @@ -34,8 +35,9 @@ public class MockS3ClientFactory implements S3ClientFactory { @Override - public AmazonS3 createS3Client(URI name) { - String bucket = name.getHost(); + public AmazonS3 createS3Client(URI name, + final String bucket, + final AWSCredentialsProvider credentialSet) { AmazonS3 s3 = mock(AmazonS3.class); when(s3.doesBucketExist(bucket)).thenReturn(true); // this listing is used in startup if purging is enabled, so diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index 0f7b418c1ec..ce2a98ecb23 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -105,6 +105,11 @@ */ String KEY_HUGE_PARTITION_SIZE = S3A_SCALE_TEST + "huge.partitionsize"; + /** + * Size of partitions to upload: {@value}. + */ + String DEFAULT_HUGE_PARTITION_SIZE = "8M"; + /** * The default huge size is small —full 5GB+ scale tests are something * to run in long test runs on EC2 VMs. {@value}. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index d731ae7ae7a..66f7e0a3d3c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -19,7 +19,9 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; +import java.io.InterruptedIOException; import java.net.URI; +import java.nio.file.AccessDeniedException; import java.util.Arrays; import java.util.List; @@ -34,11 +36,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider; +import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; +import org.apache.hadoop.io.retry.RetryPolicy; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.junit.Assert.*; /** @@ -114,7 +119,7 @@ public void testDefaultChain() throws Exception { uri2, conf); List> expectedClasses = Arrays.asList( - BasicAWSCredentialsProvider.class, + SimpleAWSCredentialsProvider.class, EnvironmentVariableCredentialsProvider.class, InstanceProfileCredentialsProvider.class); assertCredentialProviders(expectedClasses, list1); @@ -208,27 +213,14 @@ public void refresh() { } } - /** - * Declare what exception to raise, and the text which must be found - * in it. - * @param exceptionClass class of exception - * @param text text in exception - */ - private void expectException(Class exceptionClass, - String text) { - exception.expect(exceptionClass); - exception.expectMessage(text); - } - - private void expectProviderInstantiationFailure(String option, - String expectedErrorText) throws IOException { + private IOException expectProviderInstantiationFailure(String option, + String expectedErrorText) throws Exception { Configuration conf = new Configuration(); conf.set(AWS_CREDENTIALS_PROVIDER, option); Path testFile = new Path( conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE)); - expectException(IOException.class, expectedErrorText); - URI uri = testFile.toUri(); - S3AUtils.createAWSCredentialProviderSet(uri, conf); + return intercept(IOException.class, expectedErrorText, + () -> S3AUtils.createAWSCredentialProviderSet(testFile.toUri(), conf)); } /** @@ -288,4 +280,98 @@ public void testAuthenticationContainsProbes() { authenticationContains(conf, AssumedRoleCredentialProvider.NAME)); } + @Test + public void testExceptionLogic() throws Throwable { + AWSCredentialProviderList providers + = new AWSCredentialProviderList(); + // verify you can't get credentials from it + NoAuthWithAWSException noAuth = intercept(NoAuthWithAWSException.class, + AWSCredentialProviderList.NO_AWS_CREDENTIAL_PROVIDERS, + () -> providers.getCredentials()); + // but that it closes safely + providers.close(); + + S3ARetryPolicy retryPolicy = new S3ARetryPolicy(new Configuration()); + assertEquals("Expected no retry on auth failure", + RetryPolicy.RetryAction.FAIL.action, + retryPolicy.shouldRetry(noAuth, 0, 0, true).action); + + try { + throw S3AUtils.translateException("login", "", noAuth); + } catch (AccessDeniedException expected) { + // this is what we want; other exceptions will be passed up + assertEquals("Expected no retry on AccessDeniedException", + RetryPolicy.RetryAction.FAIL.action, + retryPolicy.shouldRetry(expected, 0, 0, true).action); + } + + } + + @Test + public void testRefCounting() throws Throwable { + AWSCredentialProviderList providers + = new AWSCredentialProviderList(); + assertEquals("Ref count for " + providers, + 1, providers.getRefCount()); + AWSCredentialProviderList replicate = providers.share(); + assertEquals(providers, replicate); + assertEquals("Ref count after replication for " + providers, + 2, providers.getRefCount()); + assertFalse("Was closed " + providers, providers.isClosed()); + providers.close(); + assertFalse("Was closed " + providers, providers.isClosed()); + assertEquals("Ref count after close() for " + providers, + 1, providers.getRefCount()); + + // this should now close it + providers.close(); + assertTrue("Was not closed " + providers, providers.isClosed()); + assertEquals("Ref count after close() for " + providers, + 0, providers.getRefCount()); + assertEquals("Ref count after second close() for " + providers, + 0, providers.getRefCount()); + intercept(IllegalStateException.class, "closed", + () -> providers.share()); + // final call harmless + providers.close(); + assertEquals("Ref count after close() for " + providers, + 0, providers.getRefCount()); + providers.refresh(); + + intercept(NoAuthWithAWSException.class, + AWSCredentialProviderList.CREDENTIALS_REQUESTED_WHEN_CLOSED, + () -> providers.getCredentials()); + } + + /** + * Verify that IOEs are passed up without being wrapped. + */ + @Test + public void testIOEInConstructorPropagation() throws Throwable { + IOException expected = expectProviderInstantiationFailure( + IOERaisingProvider.class.getName(), + "expected"); + if (!(expected instanceof InterruptedIOException)) { + throw expected; + } + } + + private static class IOERaisingProvider implements AWSCredentialsProvider { + + public IOERaisingProvider(URI uri, Configuration conf) + throws IOException { + throw new InterruptedIOException("expected"); + } + + @Override + public AWSCredentials getCredentials() { + return null; + } + + @Override + public void refresh() { + + } + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AMultipartUploaderSupport.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AMultipartUploaderSupport.java new file mode 100644 index 00000000000..35d04605262 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AMultipartUploaderSupport.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.EOFException; +import java.io.IOException; + +import org.junit.Test; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.test.HadoopTestBase; + +import static org.apache.hadoop.fs.s3a.S3AMultipartUploader.*; +import static org.apache.hadoop.fs.s3a.S3AMultipartUploader.parsePartHandlePayload; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Test multipart upload support methods and classes. + */ +public class TestS3AMultipartUploaderSupport extends HadoopTestBase { + + @Test + public void testRoundTrip() throws Throwable { + Pair result = roundTrip("tag", 1); + assertEquals("tag", result.getRight()); + assertEquals(1, result.getLeft().longValue()); + } + + @Test + public void testRoundTrip2() throws Throwable { + long len = 1L + Integer.MAX_VALUE; + Pair result = roundTrip("11223344", + len); + assertEquals("11223344", result.getRight()); + assertEquals(len, result.getLeft().longValue()); + } + + @Test + public void testNoEtag() throws Throwable { + intercept(IllegalArgumentException.class, + () -> buildPartHandlePayload("", 1)); + } + + @Test + public void testNoLen() throws Throwable { + intercept(IllegalArgumentException.class, + () -> buildPartHandlePayload("tag", 0)); + } + + @Test + public void testBadPayload() throws Throwable { + intercept(EOFException.class, + () -> parsePartHandlePayload(new byte[0])); + } + + @Test + public void testBadHeader() throws Throwable { + byte[] bytes = buildPartHandlePayload("tag", 1); + bytes[2]='f'; + intercept(IOException.class, "header", + () -> parsePartHandlePayload(bytes)); + } + + private Pair roundTrip(final String tag, final long len) throws IOException { + byte[] bytes = buildPartHandlePayload(tag, len); + return parsePartHandlePayload(bytes); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java index 20baacce34d..050f0a7197c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java @@ -76,17 +76,6 @@ public void testKMSGoodKey() throws Throwable { assertEquals(SSE_KMS, getAlgorithm(SSE_KMS, "kmskey")); } - @Test - public void testKMSGoodOldOptionName() throws Throwable { - Configuration conf = emptyConf(); - conf.set(SERVER_SIDE_ENCRYPTION_ALGORITHM, SSE_KMS.getMethod()); - conf.set(OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY, "kmskeyID"); - // verify key round trip - assertEquals("kmskeyID", getServerSideEncryptionKey(BUCKET, conf)); - // and that KMS lookup finds it - assertEquals(SSE_KMS, getEncryptionAlgorithm(BUCKET, conf)); - } - @Test public void testAESKeySet() throws Throwable { assertGetAlgorithmFails(SSE_S3_WITH_KEY_ERROR, @@ -125,24 +114,6 @@ public void testSSEKeyFromCredentialProvider() throws Exception { assertEquals("Proxy password override did NOT work.", key, sseKey); } - /** - * Very that the old key is picked up via the properties. - * @throws Exception failure - */ - @Test - public void testOldKeyFromCredentialProvider() throws Exception { - // set up conf to have a cred provider - final Configuration conf = confWithProvider(); - String key = "provisioned"; - setProviderOption(conf, OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY, key); - // let's set the password in config and ensure that it uses the credential - // provider provisioned value instead. - //conf.set(OLD_S3A_SERVER_SIDE_ENCRYPTION_KEY, "oldKeyInConf"); - String sseKey = getServerSideEncryptionKey(BUCKET, conf); - assertNotNull("Proxy password should not retrun null.", sseKey); - assertEquals("Proxy password override did NOT work.", key, sseKey); - } - /** * Add a temp file provider to the config. * @param conf config @@ -293,7 +264,7 @@ private void assertSecretKeyEquals(Configuration conf, String bucket, String expected, String overrideVal) throws IOException { assertEquals(expected, - S3AUtils.lookupPassword(bucket, conf, SECRET_KEY, overrideVal)); + S3AUtils.lookupPassword(bucket, conf, SECRET_KEY, overrideVal, null)); } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index c6985b07d68..7451ef1641c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -61,6 +61,7 @@ import static org.apache.hadoop.fs.s3a.auth.RoleModel.*; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*; import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.forbidden; +import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; import static org.apache.hadoop.test.LambdaTestUtils.*; /** @@ -85,6 +86,24 @@ */ private S3AFileSystem roleFS; + /** + * Duration range exception text on SDKs which check client-side. + */ + protected static final String E_DURATION_RANGE_1 + = "Assume Role session duration should be in the range of 15min - 1Hr"; + + /** + * Duration range too high text on SDKs which check on the server. + */ + protected static final String E_DURATION_RANGE_2 + = "Member must have value less than or equal to 43200"; + + /** + * Duration range too low text on SDKs which check on the server. + */ + protected static final String E_DURATION_RANGE_3 + = "Member must have value greater than or equal to 900"; + @Override public void setup() throws Exception { super.setup(); @@ -112,13 +131,14 @@ private String getAssumedRoleARN() { * @param clazz class of exception to expect * @param text text in exception * @param type of exception as inferred from clazz + * @return the caught exception if it was of the expected type and contents * @throws Exception if the exception was the wrong class */ - private void expectFileSystemCreateFailure( + private E expectFileSystemCreateFailure( Configuration conf, Class clazz, String text) throws Exception { - interceptClosing(clazz, + return interceptClosing(clazz, text, () -> new Path(getFileSystem().getUri()).getFileSystem(conf)); } @@ -246,6 +266,60 @@ public void testAssumeRoleBadSession() throws Exception { "Member must satisfy regular expression pattern"); } + /** + * A duration >1h is forbidden client-side in AWS SDK 1.11.271; + * with the ability to extend durations deployed in March 2018, + * duration checks will need to go server-side, and, presumably, + * later SDKs will remove the client side checks. + * This code exists to see when this happens. + */ + @Test + public void testAssumeRoleThreeHourSessionDuration() throws Exception { + describe("Try to authenticate with a long session duration"); + + Configuration conf = createAssumedRoleConfig(); + // add a duration of three hours + conf.setInt(ASSUMED_ROLE_SESSION_DURATION, 3 * 60 * 60); + try { + new Path(getFileSystem().getUri()).getFileSystem(conf).close(); + LOG.info("Successfully created token of a duration >3h"); + } catch (IOException ioe) { + assertExceptionContains(E_DURATION_RANGE_1, ioe); + } + } + + /** + * A duration >1h is forbidden client-side in AWS SDK 1.11.271; + * with the ability to extend durations deployed in March 2018. + * with the later SDKs, the checks go server-side and + * later SDKs will remove the client side checks. + * This test asks for a duration which will still be rejected, and + * looks for either of the error messages raised. + */ + @Test + public void testAssumeRoleThirtySixHourSessionDuration() throws Exception { + describe("Try to authenticate with a long session duration"); + + Configuration conf = createAssumedRoleConfig(); + conf.setInt(ASSUMED_ROLE_SESSION_DURATION, 36 * 60 * 60); + IOException ioe = expectFileSystemCreateFailure(conf, + IOException.class, null); + assertIsRangeException(ioe); + } + + /** + * Look for either the client-side or STS-side range exception + * @param e exception + * @throws Exception the exception, if its text doesn't match + */ + private void assertIsRangeException(final Exception e) throws Exception { + String message = e.toString(); + if (!message.contains(E_DURATION_RANGE_1) + && !message.contains(E_DURATION_RANGE_2) + && !message.contains(E_DURATION_RANGE_3)) { + throw e; + } + } /** * Create the assumed role configuration. @@ -280,11 +354,11 @@ public void testAssumedIllegalDuration() throws Throwable { describe("Expect the constructor to fail if the session is to short"); Configuration conf = new Configuration(); conf.set(ASSUMED_ROLE_SESSION_DURATION, "30s"); - interceptClosing(IllegalArgumentException.class, "", + Exception ex = interceptClosing(Exception.class, "", () -> new AssumedRoleCredentialProvider(uri, conf)); + assertIsRangeException(ex); } - @Test public void testAssumeRoleCreateFS() throws IOException { describe("Create an FS client with the role and do some basic IO"); @@ -296,24 +370,32 @@ public void testAssumeRoleCreateFS() throws IOException { conf.get(ACCESS_KEY), roleARN); try (FileSystem fs = path.getFileSystem(conf)) { - fs.getFileStatus(new Path("/")); + fs.getFileStatus(ROOT); fs.mkdirs(path("testAssumeRoleFS")); } } @Test public void testAssumeRoleRestrictedPolicyFS() throws Exception { - describe("Restrict the policy for this session; verify that reads fail"); + describe("Restrict the policy for this session; verify that reads fail."); + // there's some special handling of S3Guard here as operations + // which only go to DDB don't fail the way S3 would reject them. Configuration conf = createAssumedRoleConfig(); bindRolePolicy(conf, RESTRICTED_POLICY); Path path = new Path(getFileSystem().getUri()); + boolean guarded = getFileSystem().hasMetadataStore(); try (FileSystem fs = path.getFileSystem(conf)) { - forbidden("getFileStatus", - () -> fs.getFileStatus(new Path("/"))); - forbidden("getFileStatus", - () -> fs.listStatus(new Path("/"))); - forbidden("getFileStatus", + if (!guarded) { + // when S3Guard is enabled, the restricted policy still + // permits S3Guard record lookup, so getFileStatus calls + // will work iff the record is in the database. + forbidden("getFileStatus", + () -> fs.getFileStatus(ROOT)); + } + forbidden("", + () -> fs.listStatus(ROOT)); + forbidden("", () -> fs.mkdirs(path("testAssumeRoleFS"))); } } @@ -333,7 +415,11 @@ public void testAssumeRolePoliciesOverrideRolePerms() throws Throwable { Configuration conf = createAssumedRoleConfig(); bindRolePolicy(conf, - policy(statement(false, S3_ALL_BUCKETS, S3_GET_OBJECT_TORRENT))); + policy( + statement(false, S3_ALL_BUCKETS, S3_GET_OBJECT_TORRENT), + ALLOW_S3_GET_BUCKET_LOCATION, + STATEMENT_S3GUARD_CLIENT, + STATEMENT_ALLOW_SSE_KMS_RW)); Path path = path("testAssumeRoleStillIncludesRolePerms"); roleFS = (S3AFileSystem) path.getFileSystem(conf); assertTouchForbidden(roleFS, path); @@ -342,6 +428,8 @@ public void testAssumeRolePoliciesOverrideRolePerms() throws Throwable { /** * After blocking all write verbs used by S3A, try to write data (fail) * and read data (succeed). + * For S3Guard: full DDB RW access is retained. + * SSE-KMS key access is set to decrypt only. */ @Test public void testReadOnlyOperations() throws Throwable { @@ -352,7 +440,9 @@ public void testReadOnlyOperations() throws Throwable { bindRolePolicy(conf, policy( statement(false, S3_ALL_BUCKETS, S3_PATH_WRITE_OPERATIONS), - STATEMENT_ALL_S3, STATEMENT_ALL_DDB)); + STATEMENT_ALL_S3, + STATEMENT_S3GUARD_CLIENT, + STATEMENT_ALLOW_SSE_KMS_READ)); Path path = methodPath(); roleFS = (S3AFileSystem) path.getFileSystem(conf); // list the root path, expect happy @@ -399,8 +489,9 @@ public void testRestrictedWriteSubdir() throws Throwable { Configuration conf = createAssumedRoleConfig(); bindRolePolicyStatements(conf, - STATEMENT_ALL_DDB, + STATEMENT_S3GUARD_CLIENT, statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS), + STATEMENT_ALLOW_SSE_KMS_RW, new Statement(Effects.Allow) .addActions(S3_ALL_OPERATIONS) .addResources(directory(restrictedDir))); @@ -447,7 +538,7 @@ public void testRestrictedSingleDeleteRename() throws Throwable { } /** - * Execute a sequence of rename operations. + * Execute a sequence of rename operations with access locked down. * @param conf FS configuration */ public void executeRestrictedRename(final Configuration conf) @@ -461,7 +552,8 @@ public void executeRestrictedRename(final Configuration conf) fs.delete(basePath, true); bindRolePolicyStatements(conf, - STATEMENT_ALL_DDB, + STATEMENT_S3GUARD_CLIENT, + STATEMENT_ALLOW_SSE_KMS_RW, statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS), new Statement(Effects.Allow) .addActions(S3_PATH_RW_OPERATIONS) @@ -502,6 +594,25 @@ public void testRestrictedRenameReadOnlySingleDelete() throws Throwable { executeRenameReadOnlyData(conf); } + /** + * Without simulation of STS failures, and with STS overload likely to + * be very rare, there'll be no implicit test coverage of + * {@link AssumedRoleCredentialProvider#operationRetried(String, Exception, int, boolean)}. + * This test simply invokes the callback for both the first and second retry event. + * + * If the handler ever adds more than logging, this test ensures that things + * don't break. + */ + @Test + public void testAssumedRoleRetryHandler() throws Throwable { + try(AssumedRoleCredentialProvider provider + = new AssumedRoleCredentialProvider(getFileSystem().getUri(), + createAssumedRoleConfig())) { + provider.operationRetried("retry", new IOException("failure"), 0, true); + provider.operationRetried("retry", new IOException("failure"), 1, true); + } + } + /** * Execute a sequence of rename operations where the source * data is read only to the client calling rename(). @@ -534,7 +645,7 @@ public void executeRenameReadOnlyData(final Configuration conf) touch(fs, readOnlyFile); bindRolePolicyStatements(conf, - STATEMENT_ALL_DDB, + STATEMENT_S3GUARD_CLIENT, statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS), new Statement(Effects.Allow) .addActions(S3_PATH_RW_OPERATIONS) @@ -614,7 +725,8 @@ public void testRestrictedCommitActions() throws Throwable { fs.mkdirs(readOnlyDir); bindRolePolicyStatements(conf, - STATEMENT_ALL_DDB, + STATEMENT_S3GUARD_CLIENT, + STATEMENT_ALLOW_SSE_KMS_RW, statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS), new Statement(Effects.Allow) .addActions(S3_PATH_RW_OPERATIONS) @@ -752,7 +864,8 @@ public void executePartialDelete(final Configuration conf) fs.delete(destDir, true); bindRolePolicyStatements(conf, - STATEMENT_ALL_DDB, + STATEMENT_S3GUARD_CLIENT, + STATEMENT_ALLOW_SSE_KMS_RW, statement(true, S3_ALL_BUCKETS, S3_ALL_OPERATIONS), new Statement(Effects.Deny) .addActions(S3_PATH_WRITE_OPERATIONS) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumedRoleCommitOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumedRoleCommitOperations.java index bb662680ff6..834826e447c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumedRoleCommitOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumedRoleCommitOperations.java @@ -72,7 +72,8 @@ public void setup() throws Exception { Configuration conf = newAssumedRoleConfig(getConfiguration(), getAssumedRoleARN()); bindRolePolicyStatements(conf, - STATEMENT_ALL_DDB, + STATEMENT_S3GUARD_CLIENT, + STATEMENT_ALLOW_SSE_KMS_RW, statement(true, S3_ALL_BUCKETS, S3_ROOT_READ_OPERATIONS), new RoleModel.Statement(RoleModel.Effects.Allow) .addActions(S3_PATH_RW_OPERATIONS) @@ -81,7 +82,6 @@ public void setup() throws Exception { roleFS = (S3AFileSystem) restrictedDir.getFileSystem(conf); } - @Override public void teardown() throws Exception { S3AUtils.closeAll(LOG, roleFS); @@ -122,7 +122,6 @@ protected Path path(String filepath) throws IOException { return new Path(restrictedDir, filepath); } - private String getAssumedRoleARN() { return getContract().getConf().getTrimmed(ASSUMED_ROLE_ARN, ""); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java index 9fa26002e40..854e7ec9816 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java @@ -58,14 +58,23 @@ /** Deny GET requests to all buckets. */ - public static final Statement DENY_GET_ALL = + public static final Statement DENY_S3_GET_OBJECT = statement(false, S3_ALL_BUCKETS, S3_GET_OBJECT); + public static final Statement ALLOW_S3_GET_BUCKET_LOCATION + = statement(true, S3_ALL_BUCKETS, S3_GET_BUCKET_LOCATION); + /** - * This is AWS policy removes read access. + * This is AWS policy removes read access from S3, leaves S3Guard access up. + * This will allow clients to use S3Guard list/HEAD operations, even + * the ability to write records, but not actually access the underlying + * data. + * The client does need {@link RolePolicies#S3_GET_BUCKET_LOCATION} to + * get the bucket location. */ - public static final Policy RESTRICTED_POLICY = policy(DENY_GET_ALL); - + public static final Policy RESTRICTED_POLICY = policy( + DENY_S3_GET_OBJECT, STATEMENT_ALL_DDB, ALLOW_S3_GET_BUCKET_LOCATION + ); /** * Error message to get from the AWS SDK if you can't assume the role. @@ -145,7 +154,7 @@ public static Configuration newAssumedRoleConfig( Configuration conf = new Configuration(srcConf); conf.set(AWS_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME); conf.set(ASSUMED_ROLE_ARN, roleARN); - conf.set(ASSUMED_ROLE_SESSION_NAME, "valid"); + conf.set(ASSUMED_ROLE_SESSION_NAME, "test"); conf.set(ASSUMED_ROLE_SESSION_DURATION, "15m"); disableFilesystemCaching(conf); return conf; @@ -163,9 +172,8 @@ public static Configuration newAssumedRoleConfig( String contained, Callable eval) throws Exception { - AccessDeniedException ex = intercept(AccessDeniedException.class, eval); - GenericTestUtils.assertExceptionContains(contained, ex); - return ex; + return intercept(AccessDeniedException.class, + contained, eval); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java index 90e88945b39..246bf9d6134 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java @@ -173,6 +173,25 @@ public void setup() throws Exception { } } + /** + * Create a random Job ID using the fork ID as part of the number. + * @return fork ID string in a format parseable by Jobs + * @throws Exception failure + */ + protected String randomJobId() throws Exception { + String testUniqueForkId = System.getProperty(TEST_UNIQUE_FORK_ID, "0001"); + int l = testUniqueForkId.length(); + String trailingDigits = testUniqueForkId.substring(l - 4, l); + try { + int digitValue = Integer.valueOf(trailingDigits); + return String.format("20070712%04d_%04d", + (long)(Math.random() * 1000), + digitValue); + } catch (NumberFormatException e) { + throw new Exception("Failed to parse " + trailingDigits, e); + } + } + /** * Teardown waits for the consistency delay and resets failure count, so * FS is stable, before the superclass teardown is called. This diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitMRJob.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitMRJob.java index 13dfd831b3e..161db8521de 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitMRJob.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitMRJob.java @@ -38,7 +38,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -266,9 +265,9 @@ public int getTestFileCount() { /** * Override point to let implementations tune the MR Job conf. - * @param c configuration + * @param jobConf configuration */ - protected void applyCustomConfigOptions(Configuration c) { + protected void applyCustomConfigOptions(JobConf jobConf) throws IOException { } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java index 4d7f524d390..5ae8f545227 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java @@ -159,25 +159,6 @@ public void setup() throws Exception { cleanupDestDir(); } - /** - * Create a random Job ID using the fork ID as part of the number. - * @return fork ID string in a format parseable by Jobs - * @throws Exception failure - */ - private String randomJobId() throws Exception { - String testUniqueForkId = System.getProperty(TEST_UNIQUE_FORK_ID, "0001"); - int l = testUniqueForkId.length(); - String trailingDigits = testUniqueForkId.substring(l - 4, l); - try { - int digitValue = Integer.valueOf(trailingDigits); - return String.format("20070712%04d_%04d", - (long)(Math.random() * 1000), - digitValue); - } catch (NumberFormatException e) { - throw new Exception("Failed to parse " + trailingDigits, e); - } - } - @Override public void teardown() throws Exception { describe("teardown"); @@ -765,6 +746,7 @@ public void testCommitLifecycle() throws Exception { JobContext jContext = jobData.jContext; TaskAttemptContext tContext = jobData.tContext; AbstractS3ACommitter committer = jobData.committer; + validateTaskAttemptWorkingDirectory(committer, tContext); // write output describe("1. Writing output"); @@ -1360,12 +1342,55 @@ public void testParallelJobsToAdjacentPaths() throws Throwable { } + @Test + public void testS3ACommitterFactoryBinding() throws Throwable { + describe("Verify that the committer factory returns this " + + "committer when configured to do so"); + Job job = newJob(); + FileOutputFormat.setOutputPath(job, outDir); + Configuration conf = job.getConfiguration(); + conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt0); + conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); + TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, + taskAttempt0); + String name = getCommitterName(); + S3ACommitterFactory factory = new S3ACommitterFactory(); + assertEquals("Wrong committer from factory", + createCommitter(outDir, tContext).getClass(), + factory.createOutputCommitter(outDir, tContext).getClass()); + } + + /** + * Validate the path of a file being written to during the write + * itself. + * @param p path + * @throws IOException IO failure + */ protected void validateTaskAttemptPathDuringWrite(Path p) throws IOException { } + /** + * Validate the path of a file being written to after the write + * operation has completed. + * @param p path + * @throws IOException IO failure + */ protected void validateTaskAttemptPathAfterWrite(Path p) throws IOException { } + /** + * Perform any actions needed to validate the working directory of + * a committer. + * For example: filesystem, path attributes + * @param committer committer instance + * @param context task attempt context + * @throws IOException IO failure + */ + protected void validateTaskAttemptWorkingDirectory( + AbstractS3ACommitter committer, + TaskAttemptContext context) throws IOException { + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestS3ACommitterFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestS3ACommitterFactory.java new file mode 100644 index 00000000000..a8547d67289 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestS3ACommitterFactory.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.commit; + +import java.io.IOException; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.commit.magic.MagicS3GuardCommitter; +import org.apache.hadoop.fs.s3a.commit.staging.DirectoryStagingCommitter; +import org.apache.hadoop.fs.s3a.commit.staging.PartitionedStagingCommitter; +import org.apache.hadoop.fs.s3a.commit.staging.StagingCommitter; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; +import org.apache.hadoop.test.LambdaTestUtils; + +import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; + +/** + * Tests for some aspects of the committer factory. + * All tests are grouped into one single test so that only one + * S3A FS client is set up and used for the entire run. + * Saves time and money. + */ +public class ITestS3ACommitterFactory extends AbstractCommitITest { + + + protected static final String INVALID_NAME = "invalid-name"; + + /** + * Counter to guarantee that even in parallel test runs, no job has the same + * ID. + */ + + private String jobId; + + // A random task attempt id for testing. + private String attempt0; + + private TaskAttemptID taskAttempt0; + + private Path outDir; + + private S3ACommitterFactory factory; + + private TaskAttemptContext tContext; + + /** + * Parameterized list of bindings of committer name in config file to + * expected class instantiated. + */ + private static final Object[][] bindings = { + {COMMITTER_NAME_FILE, FileOutputCommitter.class}, + {COMMITTER_NAME_DIRECTORY, DirectoryStagingCommitter.class}, + {COMMITTER_NAME_PARTITIONED, PartitionedStagingCommitter.class}, + {InternalCommitterConstants.COMMITTER_NAME_STAGING, + StagingCommitter.class}, + {COMMITTER_NAME_MAGIC, MagicS3GuardCommitter.class} + }; + + /** + * This is a ref to the FS conf, so changes here are visible + * to callers querying the FS config. + */ + private Configuration filesystemConfRef; + + private Configuration taskConfRef; + + @Override + public void setup() throws Exception { + super.setup(); + jobId = randomJobId(); + attempt0 = "attempt_" + jobId + "_m_000000_0"; + taskAttempt0 = TaskAttemptID.forName(attempt0); + + outDir = path(getMethodName()); + factory = new S3ACommitterFactory(); + Configuration conf = new Configuration(); + conf.set(FileOutputFormat.OUTDIR, outDir.toUri().toString()); + conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt0); + conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); + filesystemConfRef = getFileSystem().getConf(); + tContext = new TaskAttemptContextImpl(conf, taskAttempt0); + taskConfRef = tContext.getConfiguration(); + } + + @Test + public void testEverything() throws Throwable { + testImplicitFileBinding(); + testBindingsInTask(); + testBindingsInFSConfig(); + testInvalidFileBinding(); + testInvalidTaskBinding(); + } + + /** + * Verify that if all config options are unset, the FileOutputCommitter + * + * is returned. + */ + public void testImplicitFileBinding() throws Throwable { + taskConfRef.unset(FS_S3A_COMMITTER_NAME); + filesystemConfRef.unset(FS_S3A_COMMITTER_NAME); + assertFactoryCreatesExpectedCommitter(FileOutputCommitter.class); + } + + /** + * Verify that task bindings are picked up. + */ + public void testBindingsInTask() throws Throwable { + // set this to an invalid value to be confident it is not + // being checked. + filesystemConfRef.set(FS_S3A_COMMITTER_NAME, "INVALID"); + taskConfRef.set(FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE); + assertFactoryCreatesExpectedCommitter(FileOutputCommitter.class); + for (Object[] binding : bindings) { + taskConfRef.set(FS_S3A_COMMITTER_NAME, + (String) binding[0]); + assertFactoryCreatesExpectedCommitter((Class) binding[1]); + } + } + + /** + * Verify that FS bindings are picked up. + */ + public void testBindingsInFSConfig() throws Throwable { + taskConfRef.unset(FS_S3A_COMMITTER_NAME); + filesystemConfRef.set(FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE); + assertFactoryCreatesExpectedCommitter(FileOutputCommitter.class); + for (Object[] binding : bindings) { + taskConfRef.set(FS_S3A_COMMITTER_NAME, (String) binding[0]); + assertFactoryCreatesExpectedCommitter((Class) binding[1]); + } + } + + /** + * Create an invalid committer via the FS binding, + */ + public void testInvalidFileBinding() throws Throwable { + taskConfRef.unset(FS_S3A_COMMITTER_NAME); + filesystemConfRef.set(FS_S3A_COMMITTER_NAME, INVALID_NAME); + LambdaTestUtils.intercept(PathCommitException.class, INVALID_NAME, + () -> createCommitter()); + } + + /** + * Create an invalid committer via the task attempt. + */ + public void testInvalidTaskBinding() throws Throwable { + filesystemConfRef.unset(FS_S3A_COMMITTER_NAME); + taskConfRef.set(FS_S3A_COMMITTER_NAME, INVALID_NAME); + LambdaTestUtils.intercept(PathCommitException.class, INVALID_NAME, + () -> createCommitter()); + } + + /** + * Assert that the factory creates the expected committer. + * @param expected expected committer class. + * @throws IOException IO failure. + */ + protected void assertFactoryCreatesExpectedCommitter( + final Class expected) + throws IOException { + assertEquals("Wrong Committer from factory", + expected, + createCommitter().getClass()); + } + + /** + * Create a committer. + * @return the committer + * @throws IOException IO failure. + */ + private PathOutputCommitter createCommitter() throws IOException { + return factory.createOutputCommitter(outDir, tContext); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITMagicCommitMRJob.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITMagicCommitMRJob.java index 57eb8b226fa..b7be17ad5ea 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITMagicCommitMRJob.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITMagicCommitMRJob.java @@ -18,10 +18,10 @@ package org.apache.hadoop.fs.s3a.commit.magic; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.commit.AbstractITCommitMRJob; import org.apache.hadoop.fs.s3a.commit.files.SuccessData; +import org.apache.hadoop.mapred.JobConf; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; @@ -30,7 +30,7 @@ * * There's no need to disable the committer setting for the filesystem here, * because the committers are being instantiated in their own processes; - * the settings in {@link #applyCustomConfigOptions(Configuration)} are + * the settings in {@link AbstractITCommitMRJob#applyCustomConfigOptions(JobConf)} are * passed down to these processes. */ public class ITMagicCommitMRJob extends AbstractITCommitMRJob { @@ -54,7 +54,7 @@ protected String committerName() { * @param conf configuration */ @Override - protected void applyCustomConfigOptions(Configuration conf) { + protected void applyCustomConfigOptions(JobConf conf) { conf.setBoolean(MAGIC_COMMITTER_ENABLED, true); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocol.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocol.java index 74c1d9de4ea..057adf5341b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocol.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/magic/ITestMagicCommitProtocol.java @@ -18,6 +18,9 @@ package org.apache.hadoop.fs.s3a.commit.magic; +import java.io.IOException; +import java.net.URI; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; @@ -32,9 +35,8 @@ import org.apache.hadoop.mapreduce.JobStatus; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import java.io.IOException; - import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; +import static org.hamcrest.CoreMatchers.containsString; /** * Test the magic committer's commit protocol. @@ -115,6 +117,25 @@ protected void validateTaskAttemptPathAfterWrite(Path p) throws IOException { assertPathExists("pending file", pendingFile); } + /** + * The magic committer paths are always on S3, and always have + * "__magic" in the path. + * @param committer committer instance + * @param context task attempt context + * @throws IOException IO failure + */ + @Override + protected void validateTaskAttemptWorkingDirectory( + final AbstractS3ACommitter committer, + final TaskAttemptContext context) throws IOException { + URI wd = committer.getWorkPath().toUri(); + assertEquals("Wrong schema for working dir " + wd + + " with committer " + committer, + "s3a", wd.getScheme()); + assertThat(wd.getPath(), + containsString('/' + CommitConstants.MAGIC + '/')); + } + /** * The class provides a overridden implementation of commitJobInternal which * causes the commit failed for the first time then succeed. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedJobCommit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedJobCommit.java index 4df3912c0a6..55e4dc717a4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedJobCommit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedJobCommit.java @@ -83,7 +83,9 @@ private PartitionedStagingCommitterForTesting(TaskAttemptContext context) commit.setDestinationKey(key); commit.setUri("s3a://" + BUCKET + "/" + key); commit.setUploadId(UUID.randomUUID().toString()); - commit.setEtags(new ArrayList<>()); + ArrayList etags = new ArrayList<>(); + etags.add("tag1"); + commit.setEtags(etags); pending.add(commit); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITStagingCommitMRJobBadDest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITStagingCommitMRJobBadDest.java new file mode 100644 index 00000000000..be477a7de6e --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITStagingCommitMRJobBadDest.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.commit.staging.integration; + +import java.io.IOException; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.commit.AbstractITCommitMRJob; +import org.apache.hadoop.fs.s3a.commit.staging.StagingCommitter; +import org.apache.hadoop.mapred.FileAlreadyExistsException; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.test.LambdaTestUtils; + +/** + * This is a test to verify that the committer will fail if the destination + * directory exists, and that this happens in job setup. + */ +public class ITStagingCommitMRJobBadDest extends AbstractITCommitMRJob { + + @Override + protected String committerName() { + return StagingCommitter.NAME; + } + + /** + * create the destination directory and expect a failure. + * @param conf configuration + */ + @Override + protected void applyCustomConfigOptions(JobConf conf) throws IOException { + // This is the destination in the S3 FS + String outdir = conf.get(FileOutputFormat.OUTDIR); + S3AFileSystem fs = getFileSystem(); + Path outputPath = new Path(outdir); + fs.mkdirs(outputPath); + } + + @Override + public void testMRJob() throws Exception { + LambdaTestUtils.intercept(FileAlreadyExistsException.class, + "Output directory", + super::testMRJob); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocol.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocol.java index 08c572ef6ca..180e7435222 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocol.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestStagingCommitProtocol.java @@ -117,6 +117,19 @@ protected FileSystem getLocalFS() throws IOException { return FileSystem.getLocal(getConfiguration()); } + /** + * The staging committers always have the local FS for their work. + * @param committer committer instance + * @param context task attempt context + * @throws IOException IO failure + */ + @Override + protected void validateTaskAttemptWorkingDirectory(final AbstractS3ACommitter committer, + final TaskAttemptContext context) throws IOException { + Path wd = context.getWorkingDirectory(); + assertEquals("file", wd.toUri().getScheme()); + } + /** * The class provides a overridden implementation of commitJobInternal which * causes the commit failed for the first time then succeed. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java index 7247c168c64..3b4eaf4a806 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java @@ -51,6 +51,12 @@ public void testCreateFlagCreateAppendExistingFile() throws IOException { //append not supported, so test removed } + @Test + @Ignore + public void testBuilderCreateAppendExistingFile() throws IOException { + // not supported + } + @Test @Ignore public void testSetVerifyChecksum() throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java index f591e3258cf..632676f002e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java @@ -29,9 +29,11 @@ import java.util.Collection; import java.util.HashSet; import java.util.Set; +import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.util.StopWatch; import com.google.common.base.Preconditions; import org.apache.hadoop.fs.FileSystem; @@ -51,9 +53,12 @@ import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.StringUtils; +import static org.apache.hadoop.fs.s3a.Constants.METADATASTORE_AUTHORITATIVE; +import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_CREATE_KEY; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_METASTORE_NULL; import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; +import static org.apache.hadoop.fs.s3a.S3AUtils.clearBucketOption; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.E_BAD_STATE; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -140,12 +145,17 @@ protected MetadataStore getMetadataStore() { public void setup() throws Exception { super.setup(); S3ATestUtils.assumeS3GuardState(true, getConfiguration()); - ms = getFileSystem().getMetadataStore(); + S3AFileSystem fs = getFileSystem(); + ms = fs.getMetadataStore(); // Also create a "raw" fs without any MetadataStore configured Configuration conf = new Configuration(getConfiguration()); + clearBucketOption(conf, fs.getBucket(), S3_METADATA_STORE_IMPL); conf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL); - URI fsUri = getFileSystem().getUri(); + URI fsUri = fs.getUri(); + S3AUtils.setBucketOption(conf,fsUri.getHost(), + METADATASTORE_AUTHORITATIVE, + S3GUARD_METASTORE_NULL); rawFs = (S3AFileSystem) FileSystem.newInstance(fsUri, conf); } @@ -288,22 +298,39 @@ public void testPruneCommandConf() throws Exception { } @Test - public void testSetCapacityFailFast() throws Exception{ + public void testSetCapacityFailFastOnReadWriteOfZero() throws Exception{ Configuration conf = getConfiguration(); + String bucket = getFileSystem().getBucket(); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, getFileSystem().getBucket()); S3GuardTool.SetCapacity cmdR = new S3GuardTool.SetCapacity(conf); - String[] argsR = new String[]{cmdR.getName(), "-read", "0", "s3a://bucket"}; + String[] argsR = + new String[]{cmdR.getName(), "-read", "0", "s3a://" + bucket}; intercept(IllegalArgumentException.class, S3GuardTool.SetCapacity.READ_CAP_INVALID, () -> cmdR.run(argsR)); S3GuardTool.SetCapacity cmdW = new S3GuardTool.SetCapacity(conf); - String[] argsW = new String[]{cmdW.getName(), "-write", "0", - "s3a://bucket"}; + String[] argsW = + new String[]{cmdW.getName(), "-write", "0", "s3a://" + bucket}; intercept(IllegalArgumentException.class, S3GuardTool.SetCapacity.WRITE_CAP_INVALID, () -> cmdW.run(argsW)); } + @Test + public void testSetCapacityFailFastIfNotGuarded() throws Exception{ + Configuration conf = getConfiguration(); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, UUID.randomUUID().toString()); + conf.set(S3GUARD_DDB_TABLE_CREATE_KEY, Boolean.FALSE.toString()); + conf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL); + + S3GuardTool.SetCapacity cmdR = new S3GuardTool.SetCapacity(conf); + String[] argsR = new String[]{cmdR.getName(), + "s3a://" + getFileSystem().getBucket()}; + + intercept(IllegalStateException.class, "unguarded", + () -> run(argsR)); + } + @Test public void testDestroyNoBucket() throws Throwable { intercept(FileNotFoundException.class, @@ -389,13 +416,17 @@ protected void exec(S3GuardTool cmd, ByteArrayOutputStream buf, String...args) } @Test - public void testDiffCommand() throws Exception { + public void + testDiffCommand() throws Exception { S3AFileSystem fs = getFileSystem(); ms = getMetadataStore(); Set filesOnS3 = new HashSet<>(); // files on S3. Set filesOnMS = new HashSet<>(); // files on metadata store. Path testPath = path("test-diff"); + // clean up through the store and behind it. + fs.delete(testPath, true); + rawFs.delete(testPath, true); mkdirs(testPath, true, true); Path msOnlyPath = new Path(testPath, "ms_only"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java new file mode 100644 index 00000000000..53559107529 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java @@ -0,0 +1,691 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import com.amazonaws.services.dynamodbv2.document.DynamoDB; +import com.amazonaws.services.dynamodbv2.document.Item; +import com.amazonaws.services.dynamodbv2.document.PrimaryKey; +import com.amazonaws.services.dynamodbv2.document.Table; +import com.amazonaws.services.dynamodbv2.model.ListTagsOfResourceRequest; +import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription; +import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException; +import com.amazonaws.services.dynamodbv2.model.TableDescription; + +import com.amazonaws.services.dynamodbv2.model.Tag; +import com.google.common.collect.Lists; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.fs.contract.s3a.S3AContract; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.Tristate; + +import org.apache.hadoop.io.IOUtils; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.security.UserGroupInformation; + +import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; +import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*; +import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*; +import static org.apache.hadoop.test.LambdaTestUtils.*; + +/** + * Test that {@link DynamoDBMetadataStore} implements {@link MetadataStore}. + * + * In this integration test, we use a real AWS DynamoDB. A + * {@link DynamoDBMetadataStore} object is created in the @BeforeClass method, + * and shared for all test in the @BeforeClass method. You will be charged + * bills for AWS S3 or DynamoDB when you run these tests. + * + * According to the base class, every test case will have independent contract + * to create a new {@link S3AFileSystem} instance and initializes it. + * A table will be created and shared between the tests, + */ +public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase { + private static final Logger LOG = + LoggerFactory.getLogger(ITestDynamoDBMetadataStore.class); + public static final PrimaryKey + VERSION_MARKER_PRIMARY_KEY = createVersionMarkerPrimaryKey( + DynamoDBMetadataStore.VERSION_MARKER); + + private S3AFileSystem fileSystem; + private S3AContract s3AContract; + + private URI fsUri; + + private String bucket; + + private static DynamoDBMetadataStore ddbmsStatic; + + private static String testDynamoDBTableName; + + /** + * Create a path under the test path provided by + * the FS contract. + * @param filepath path string in + * @return a path qualified by the test filesystem + */ + protected Path path(String filepath) { + return getFileSystem().makeQualified( + new Path(s3AContract.getTestPath(), filepath)); + } + + @Override + public void setUp() throws Exception { + Configuration conf = prepareTestConfiguration(new Configuration()); + assertThatDynamoMetadataStoreImpl(conf); + Assume.assumeTrue("Test DynamoDB table name should be set to run " + + "integration tests.", testDynamoDBTableName != null); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName); + + s3AContract = new S3AContract(conf); + s3AContract.init(); + + fileSystem = (S3AFileSystem) s3AContract.getTestFileSystem(); + assume("No test filesystem", s3AContract.isEnabled()); + assertNotNull("No test filesystem", fileSystem); + fsUri = fileSystem.getUri(); + bucket = fileSystem.getBucket(); + + try{ + super.setUp(); + } catch (FileNotFoundException e){ + LOG.warn("MetadataStoreTestBase setup failed. Waiting for table to be " + + "deleted before trying again."); + ddbmsStatic.getTable().waitForDelete(); + super.setUp(); + } + } + + + @BeforeClass + public static void beforeClassSetup() throws IOException { + Configuration conf = prepareTestConfiguration(new Configuration()); + assertThatDynamoMetadataStoreImpl(conf); + testDynamoDBTableName = conf.get(S3GUARD_DDB_TEST_TABLE_NAME_KEY); + Assume.assumeTrue("Test DynamoDB table name should be set to run " + + "integration tests.", testDynamoDBTableName != null); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName); + + LOG.debug("Creating static ddbms which will be shared between tests."); + ddbmsStatic = new DynamoDBMetadataStore(); + ddbmsStatic.initialize(conf); + } + + @AfterClass + public static void afterClassTeardown() { + LOG.debug("Destroying static DynamoDBMetadataStore."); + if (ddbmsStatic != null) { + try { + ddbmsStatic.destroy(); + } catch (Exception e) { + LOG.warn("Failed to destroy tables in teardown", e); + } + IOUtils.closeStream(ddbmsStatic); + ddbmsStatic = null; + } + } + + private static void assertThatDynamoMetadataStoreImpl(Configuration conf){ + Assume.assumeTrue("Test only applies when DynamoDB is used for S3Guard", + conf.get(Constants.S3_METADATA_STORE_IMPL).equals( + Constants.S3GUARD_METASTORE_DYNAMO)); + } + + + @Override + public void tearDown() throws Exception { + LOG.info("Removing data from ddbms table in teardown."); + // The following is a way to be sure the table will be cleared and there + // will be no leftovers after the test. + PathMetadata meta = ddbmsStatic.get(strToPath("/")); + if (meta != null){ + for (DescendantsIterator desc = new DescendantsIterator(ddbmsStatic, meta); + desc.hasNext();) { + ddbmsStatic.forgetMetadata(desc.next().getPath()); + } + } + + fileSystem.close(); + } + + /** + * Each contract has its own S3AFileSystem and DynamoDBMetadataStore objects. + */ + private class DynamoDBMSContract extends AbstractMSContract { + + DynamoDBMSContract(Configuration conf) { + } + + DynamoDBMSContract() { + this(new Configuration()); + } + + @Override + public S3AFileSystem getFileSystem() { + return ITestDynamoDBMetadataStore.this.fileSystem; + } + + @Override + public DynamoDBMetadataStore getMetadataStore() { + return ITestDynamoDBMetadataStore.ddbmsStatic; + } + } + + @Override + public DynamoDBMSContract createContract() { + return new DynamoDBMSContract(); + } + + @Override + public DynamoDBMSContract createContract(Configuration conf) { + return new DynamoDBMSContract(conf); + } + + @Override + FileStatus basicFileStatus(Path path, int size, boolean isDir) + throws IOException { + String owner = UserGroupInformation.getCurrentUser().getShortUserName(); + return isDir + ? new S3AFileStatus(true, path, owner) + : new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner); + } + + private DynamoDBMetadataStore getDynamoMetadataStore() throws IOException { + return (DynamoDBMetadataStore) getContract().getMetadataStore(); + } + + private S3AFileSystem getFileSystem() { + return this.fileSystem; + } + + /** + * This tests that after initialize() using an S3AFileSystem object, the + * instance should have been initialized successfully, and tables are ACTIVE. + */ + @Test + public void testInitialize() throws IOException { + final S3AFileSystem s3afs = this.fileSystem; + final String tableName = "testInitialize"; + final Configuration conf = s3afs.getConf(); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(s3afs); + verifyTableInitialized(tableName, ddbms.getDynamoDB()); + assertNotNull(ddbms.getTable()); + assertEquals(tableName, ddbms.getTable().getTableName()); + String expectedRegion = conf.get(S3GUARD_DDB_REGION_KEY, + s3afs.getBucketLocation(bucket)); + assertEquals("DynamoDB table should be in configured region or the same" + + " region as S3 bucket", + expectedRegion, + ddbms.getRegion()); + ddbms.destroy(); + } + } + + /** + * This tests that after initialize() using a Configuration object, the + * instance should have been initialized successfully, and tables are ACTIVE. + */ + @Test + public void testInitializeWithConfiguration() throws IOException { + final String tableName = "testInitializeWithConfiguration"; + final Configuration conf = getFileSystem().getConf(); + conf.unset(S3GUARD_DDB_TABLE_NAME_KEY); + String savedRegion = conf.get(S3GUARD_DDB_REGION_KEY, + getFileSystem().getBucketLocation()); + conf.unset(S3GUARD_DDB_REGION_KEY); + try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(conf); + fail("Should have failed because the table name is not set!"); + } catch (IllegalArgumentException ignored) { + } + // config table name + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(conf); + fail("Should have failed because as the region is not set!"); + } catch (IllegalArgumentException ignored) { + } + // config region + conf.set(S3GUARD_DDB_REGION_KEY, savedRegion); + try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(conf); + verifyTableInitialized(tableName, ddbms.getDynamoDB()); + assertNotNull(ddbms.getTable()); + assertEquals(tableName, ddbms.getTable().getTableName()); + assertEquals("Unexpected key schema found!", + keySchema(), + ddbms.getTable().describe().getKeySchema()); + ddbms.destroy(); + } + } + + /** + * Test that for a large batch write request, the limit is handled correctly. + */ + @Test + public void testBatchWrite() throws IOException { + final int[] numMetasToDeleteOrPut = { + -1, // null + 0, // empty collection + 1, // one path + S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT, // exact limit of a batch request + S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT + 1 // limit + 1 + }; + DynamoDBMetadataStore ms = getDynamoMetadataStore(); + for (int numOldMetas : numMetasToDeleteOrPut) { + for (int numNewMetas : numMetasToDeleteOrPut) { + doTestBatchWrite(numOldMetas, numNewMetas, ms); + } + } + } + + private void doTestBatchWrite(int numDelete, int numPut, + DynamoDBMetadataStore ms) throws IOException { + Path path = new Path( + "/ITestDynamoDBMetadataStore_testBatchWrite_" + numDelete + '_' + + numPut); + final Path root = fileSystem.makeQualified(path); + final Path oldDir = new Path(root, "oldDir"); + final Path newDir = new Path(root, "newDir"); + LOG.info("doTestBatchWrite: oldDir={}, newDir={}", oldDir, newDir); + + ms.put(new PathMetadata(basicFileStatus(oldDir, 0, true))); + ms.put(new PathMetadata(basicFileStatus(newDir, 0, true))); + + final List oldMetas = numDelete < 0 ? null : + new ArrayList<>(numDelete); + for (int i = 0; i < numDelete; i++) { + oldMetas.add(new PathMetadata( + basicFileStatus(new Path(oldDir, "child" + i), i, true))); + } + final List newMetas = numPut < 0 ? null : + new ArrayList<>(numPut); + for (int i = 0; i < numPut; i++) { + newMetas.add(new PathMetadata( + basicFileStatus(new Path(newDir, "child" + i), i, false))); + } + + Collection pathsToDelete = null; + if (oldMetas != null) { + // put all metadata of old paths and verify + ms.put(new DirListingMetadata(oldDir, oldMetas, false)); + assertEquals(0, ms.listChildren(newDir).withoutTombstones().numEntries()); + assertTrue(CollectionUtils + .isEqualCollection(oldMetas, ms.listChildren(oldDir).getListing())); + + pathsToDelete = new ArrayList<>(oldMetas.size()); + for (PathMetadata meta : oldMetas) { + pathsToDelete.add(meta.getFileStatus().getPath()); + } + } + + // move the old paths to new paths and verify + ms.move(pathsToDelete, newMetas); + assertEquals(0, ms.listChildren(oldDir).withoutTombstones().numEntries()); + if (newMetas != null) { + assertTrue(CollectionUtils + .isEqualCollection(newMetas, ms.listChildren(newDir).getListing())); + } + } + + @Test + public void testInitExistingTable() throws IOException { + final DynamoDBMetadataStore ddbms = getDynamoMetadataStore(); + final String tableName = ddbms.getTable().getTableName(); + verifyTableInitialized(tableName, ddbms.getDynamoDB()); + // create existing table + ddbms.initTable(); + verifyTableInitialized(tableName, ddbms.getDynamoDB()); + } + + /** + * Test the low level version check code. + */ + @Test + public void testItemVersionCompatibility() throws Throwable { + verifyVersionCompatibility("table", + createVersionMarker(VERSION_MARKER, VERSION, 0)); + } + + /** + * Test that a version marker entry without the version number field + * is rejected as incompatible with a meaningful error message. + */ + @Test + public void testItemLacksVersion() throws Throwable { + intercept(IOException.class, E_NOT_VERSION_MARKER, + () -> verifyVersionCompatibility("table", + new Item().withPrimaryKey( + createVersionMarkerPrimaryKey(VERSION_MARKER)))); + } + + /** + * Delete the version marker and verify that table init fails. + */ + @Test + public void testTableVersionRequired() throws Exception { + String tableName = "testTableVersionRequired"; + Configuration conf = getFileSystem().getConf(); + int maxRetries = conf.getInt(S3GUARD_DDB_MAX_RETRIES, + S3GUARD_DDB_MAX_RETRIES_DEFAULT); + conf.setInt(S3GUARD_DDB_MAX_RETRIES, 3); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + + try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(conf); + Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB()); + table.deleteItem(VERSION_MARKER_PRIMARY_KEY); + + // create existing table + intercept(IOException.class, E_NO_VERSION_MARKER, + () -> ddbms.initTable()); + + conf.setInt(S3GUARD_DDB_MAX_RETRIES, maxRetries); + ddbms.destroy(); + } + } + + /** + * Set the version value to a different number and verify that + * table init fails. + */ + @Test + public void testTableVersionMismatch() throws Exception { + String tableName = "testTableVersionMismatch"; + Configuration conf = getFileSystem().getConf(); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + + try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(conf); + Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB()); + table.deleteItem(VERSION_MARKER_PRIMARY_KEY); + Item v200 = createVersionMarker(VERSION_MARKER, 200, 0); + table.putItem(v200); + + // create existing table + intercept(IOException.class, E_INCOMPATIBLE_VERSION, + () -> ddbms.initTable()); + ddbms.destroy(); + } + } + + + + + /** + * Test that initTable fails with IOException when table does not exist and + * table auto-creation is disabled. + */ + @Test + public void testFailNonexistentTable() throws IOException { + final String tableName = "testFailNonexistentTable"; + final S3AFileSystem s3afs = getFileSystem(); + final Configuration conf = s3afs.getConf(); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + conf.unset(S3GUARD_DDB_TABLE_CREATE_KEY); + try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(s3afs); + fail("Should have failed as table does not exist and table auto-creation" + + " is disabled"); + } catch (IOException ignored) { + } + } + + /** + * Test cases about root directory as it is not in the DynamoDB table. + */ + @Test + public void testRootDirectory() throws IOException { + final DynamoDBMetadataStore ddbms = getDynamoMetadataStore(); + Path rootPath = new Path(new Path(fsUri), "/"); + verifyRootDirectory(ddbms.get(rootPath), true); + + ddbms.put(new PathMetadata(new S3AFileStatus(true, + new Path(rootPath, "foo"), + UserGroupInformation.getCurrentUser().getShortUserName()))); + verifyRootDirectory(ddbms.get(rootPath), false); + } + + private void verifyRootDirectory(PathMetadata rootMeta, boolean isEmpty) { + assertNotNull(rootMeta); + final FileStatus status = rootMeta.getFileStatus(); + assertNotNull(status); + assertTrue(status.isDirectory()); + // UNKNOWN is always a valid option, but true / false should not contradict + if (isEmpty) { + assertNotSame("Should not be marked non-empty", + Tristate.FALSE, + rootMeta.isEmptyDirectory()); + } else { + assertNotSame("Should not be marked empty", + Tristate.TRUE, + rootMeta.isEmptyDirectory()); + } + } + + /** + * Test that when moving nested paths, all its ancestors up to destination + * root will also be created. + * Here is the directory tree before move: + *
+   * testMovePopulateAncestors
+   * ├── a
+   * │   └── b
+   * │       └── src
+   * │           ├── dir1
+   * │           │   └── dir2
+   * │           └── file1.txt
+   * └── c
+   *     └── d
+   *         └── dest
+   *
+ * As part of rename(a/b/src, d/c/dest), S3A will enumerate the subtree at + * a/b/src. This test verifies that after the move, the new subtree at + * 'dest' is reachable from the root (i.e. c/ and c/d exist in the table. + * DynamoDBMetadataStore depends on this property to do recursive delete + * without a full table scan. + */ + @Test + public void testMovePopulatesAncestors() throws IOException { + final DynamoDBMetadataStore ddbms = getDynamoMetadataStore(); + final String testRoot = "/testMovePopulatesAncestors"; + final String srcRoot = testRoot + "/a/b/src"; + final String destRoot = testRoot + "/c/d/e/dest"; + + final Path nestedPath1 = strToPath(srcRoot + "/file1.txt"); + ddbms.put(new PathMetadata(basicFileStatus(nestedPath1, 1024, false))); + final Path nestedPath2 = strToPath(srcRoot + "/dir1/dir2"); + ddbms.put(new PathMetadata(basicFileStatus(nestedPath2, 0, true))); + + // We don't put the destRoot path here, since put() would create ancestor + // entries, and we want to ensure that move() does it, instead. + + // Build enumeration of src / dest paths and do the move() + final Collection fullSourcePaths = Lists.newArrayList( + strToPath(srcRoot), + strToPath(srcRoot + "/dir1"), + strToPath(srcRoot + "/dir1/dir2"), + strToPath(srcRoot + "/file1.txt") + ); + final Collection pathsToCreate = Lists.newArrayList( + new PathMetadata(basicFileStatus(strToPath(destRoot), + 0, true)), + new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1"), + 0, true)), + new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1/dir2"), + 0, true)), + new PathMetadata(basicFileStatus(strToPath(destRoot + "/file1.txt"), + 1024, false)) + ); + + ddbms.move(fullSourcePaths, pathsToCreate); + + // assert that all the ancestors should have been populated automatically + assertCached(testRoot + "/c"); + assertCached(testRoot + "/c/d"); + assertCached(testRoot + "/c/d/e"); + assertCached(destRoot /* /c/d/e/dest */); + + // Also check moved files while we're at it + assertCached(destRoot + "/dir1"); + assertCached(destRoot + "/dir1/dir2"); + assertCached(destRoot + "/file1.txt"); + } + + @Test + public void testProvisionTable() throws IOException { + final String tableName = "testProvisionTable"; + Configuration conf = getFileSystem().getConf(); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + + try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(conf); + DynamoDB dynamoDB = ddbms.getDynamoDB(); + final ProvisionedThroughputDescription oldProvision = + dynamoDB.getTable(tableName).describe().getProvisionedThroughput(); + ddbms.provisionTable(oldProvision.getReadCapacityUnits() * 2, + oldProvision.getWriteCapacityUnits() * 2); + ddbms.initTable(); + final ProvisionedThroughputDescription newProvision = + dynamoDB.getTable(tableName).describe().getProvisionedThroughput(); + LOG.info("Old provision = {}, new provision = {}", oldProvision, + newProvision); + assertEquals(oldProvision.getReadCapacityUnits() * 2, + newProvision.getReadCapacityUnits().longValue()); + assertEquals(oldProvision.getWriteCapacityUnits() * 2, + newProvision.getWriteCapacityUnits().longValue()); + ddbms.destroy(); + } + } + + @Test + public void testDeleteTable() throws Exception { + final String tableName = "testDeleteTable"; + Path testPath = new Path(new Path(fsUri), "/" + tableName); + final S3AFileSystem s3afs = getFileSystem(); + final Configuration conf = s3afs.getConf(); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(s3afs); + // we can list the empty table + ddbms.listChildren(testPath); + DynamoDB dynamoDB = ddbms.getDynamoDB(); + ddbms.destroy(); + verifyTableNotExist(tableName, dynamoDB); + + // delete table once more; be ResourceNotFoundException swallowed silently + ddbms.destroy(); + verifyTableNotExist(tableName, dynamoDB); + try { + // we can no longer list the destroyed table + ddbms.listChildren(testPath); + fail("Should have failed after the table is destroyed!"); + } catch (IOException ignored) { + } + ddbms.destroy(); + } + } + + @Test + public void testTableTagging() throws IOException { + final Configuration conf = getFileSystem().getConf(); + + // clear all table tagging config before this test + conf.getPropsWithPrefix(S3GUARD_DDB_TABLE_TAG).keySet().forEach( + propKey -> conf.unset(S3GUARD_DDB_TABLE_TAG + propKey) + ); + + String tableName = "testTableTagging-" + UUID.randomUUID(); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + conf.set(S3GUARD_DDB_TABLE_CREATE_KEY, "true"); + + Map tagMap = new HashMap<>(); + tagMap.put("hello", "dynamo"); + tagMap.put("tag", "youre it"); + for (Map.Entry tagEntry : tagMap.entrySet()) { + conf.set(S3GUARD_DDB_TABLE_TAG + tagEntry.getKey(), tagEntry.getValue()); + } + + try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(conf); + assertNotNull(ddbms.getTable()); + assertEquals(tableName, ddbms.getTable().getTableName()); + ListTagsOfResourceRequest listTagsOfResourceRequest = + new ListTagsOfResourceRequest() + .withResourceArn(ddbms.getTable().getDescription().getTableArn()); + List tags = ddbms.getAmazonDynamoDB() + .listTagsOfResource(listTagsOfResourceRequest).getTags(); + assertEquals(tagMap.size(), tags.size()); + for (Tag tag : tags) { + Assert.assertEquals(tagMap.get(tag.getKey()), tag.getValue()); + } + } + } + + /** + * This validates the table is created and ACTIVE in DynamoDB. + * + * This should not rely on the {@link DynamoDBMetadataStore} implementation. + * Return the table + */ + private Table verifyTableInitialized(String tableName, DynamoDB dynamoDB) { + final Table table = dynamoDB.getTable(tableName); + final TableDescription td = table.describe(); + assertEquals(tableName, td.getTableName()); + assertEquals("ACTIVE", td.getTableStatus()); + return table; + } + + /** + * This validates the table is not found in DynamoDB. + * + * This should not rely on the {@link DynamoDBMetadataStore} implementation. + */ + private void verifyTableNotExist(String tableName, DynamoDB dynamoDB) throws + Exception{ + intercept(ResourceNotFoundException.class, + () -> dynamoDB.getTable(tableName).describe()); + } + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java index 02a896653a6..48dbce98a77 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java @@ -18,46 +18,176 @@ package org.apache.hadoop.fs.s3a.s3guard; +import javax.annotation.Nullable; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import javax.annotation.Nullable; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import com.amazonaws.services.dynamodbv2.document.DynamoDB; +import com.amazonaws.services.dynamodbv2.document.Table; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription; +import org.junit.FixMethodOrder; import org.junit.Test; +import org.junit.internal.AssumptionViolatedException; +import org.junit.runners.MethodSorters; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StorageStatistics; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.AWSServiceThrottledException; +import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3AStorageStatistics; +import org.apache.hadoop.fs.s3a.Statistic; import org.apache.hadoop.fs.s3a.scale.AbstractITestS3AMetadataStoreScale; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; -import static org.apache.hadoop.fs.s3a.s3guard.MetadataStoreTestBase.basicFileStatus; import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.s3guard.MetadataStoreTestBase.basicFileStatus; import static org.junit.Assume.*; /** * Scale test for DynamoDBMetadataStore. + * + * The throttle tests aren't quite trying to verify that throttling can + * be recovered from, because that makes for very slow tests: you have + * to overload the system and them have them back of until they finally complete. + * Instead */ +@FixMethodOrder(MethodSorters.NAME_ASCENDING) public class ITestDynamoDBMetadataStoreScale extends AbstractITestS3AMetadataStoreScale { + private static final Logger LOG = LoggerFactory.getLogger( + ITestDynamoDBMetadataStoreScale.class); + private static final long BATCH_SIZE = 25; - private static final long SMALL_IO_UNITS = BATCH_SIZE / 4; + /** + * IO Units for batch size; this sets the size to use for IO capacity. + * Value: {@value}. + */ + private static final long MAXIMUM_READ_CAPACITY = 10; + private static final long MAXIMUM_WRITE_CAPACITY = 15; + + private DynamoDBMetadataStore ddbms; + + private DynamoDB ddb; + + private Table table; + + private String tableName; + + /** was the provisioning changed in test_001_limitCapacity()? */ + private boolean isOverProvisionedForTest; + + private ProvisionedThroughputDescription originalCapacity; + + private static final int THREADS = 40; + + private static final int OPERATIONS_PER_THREAD = 50; + + /** + * Create the metadata store. The table and region are determined from + * the attributes of the FS used in the tests. + * @return a new metadata store instance + * @throws IOException failure to instantiate + * @throws AssumptionViolatedException if the FS isn't running S3Guard + DDB/ + */ @Override public MetadataStore createMetadataStore() throws IOException { - Configuration conf = getFileSystem().getConf(); - String ddbTable = conf.get(S3GUARD_DDB_TABLE_NAME_KEY); - assumeNotNull("DynamoDB table is configured", ddbTable); - String ddbEndpoint = conf.get(S3GUARD_DDB_REGION_KEY); - assumeNotNull("DynamoDB endpoint is configured", ddbEndpoint); + S3AFileSystem fs = getFileSystem(); + assumeTrue("S3Guard is disabled for " + fs.getUri(), + fs.hasMetadataStore()); + MetadataStore store = fs.getMetadataStore(); + assumeTrue("Metadata store for " + fs.getUri() + " is " + store + + " -not DynamoDBMetadataStore", + store instanceof DynamoDBMetadataStore); + + DynamoDBMetadataStore fsStore = (DynamoDBMetadataStore) store; + Configuration conf = new Configuration(fs.getConf()); + + tableName = fsStore.getTableName(); + assertTrue("Null/Empty tablename in " + fsStore, + StringUtils.isNotEmpty(tableName)); + String region = fsStore.getRegion(); + assertTrue("Null/Empty region in " + fsStore, + StringUtils.isNotEmpty(region)); + // create a new metastore configured to fail fast if throttling + // happens. + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + conf.set(S3GUARD_DDB_REGION_KEY, region); + conf.set(S3GUARD_DDB_THROTTLE_RETRY_INTERVAL, "50ms"); + conf.set(S3GUARD_DDB_MAX_RETRIES, "2"); + conf.set(MAX_ERROR_RETRIES, "1"); + conf.set(S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY, "5ms"); DynamoDBMetadataStore ms = new DynamoDBMetadataStore(); - ms.initialize(getFileSystem().getConf()); + ms.initialize(conf); + // wire up the owner FS so that we can make assertions about throttle + // events + ms.bindToOwnerFilesystem(fs); return ms; } + @Override + public void setup() throws Exception { + super.setup(); + ddbms = (DynamoDBMetadataStore) createMetadataStore(); + tableName = ddbms.getTableName(); + assertNotNull("table has no name", tableName); + ddb = ddbms.getDynamoDB(); + table = ddb.getTable(tableName); + originalCapacity = table.describe().getProvisionedThroughput(); + + // If you set the same provisioned I/O as already set it throws an + // exception, avoid that. + isOverProvisionedForTest = ( + originalCapacity.getReadCapacityUnits() > MAXIMUM_READ_CAPACITY + || originalCapacity.getWriteCapacityUnits() > MAXIMUM_WRITE_CAPACITY); + assumeFalse("Table has too much capacity: " + originalCapacity.toString(), + isOverProvisionedForTest); + } + + @Override + public void teardown() throws Exception { + IOUtils.cleanupWithLogger(LOG, ddbms); + super.teardown(); + } + + /** + * The subclass expects the superclass to be throttled; sometimes it is. + */ + @Test + @Override + public void test_020_Moves() throws Throwable { + ThrottleTracker tracker = new ThrottleTracker(); + try { + // if this doesn't throttle, all is well. + super.test_020_Moves(); + } catch (AWSServiceThrottledException ex) { + // if the service was throttled, we ex;ect the exception text + GenericTestUtils.assertExceptionContains( + DynamoDBMetadataStore.HINT_DDB_IOPS_TOO_LOW, + ex, + "Expected throttling message"); + } finally { + LOG.info("Statistics {}", tracker); + } + } /** * Though the AWS SDK claims in documentation to handle retries and @@ -70,92 +200,298 @@ public MetadataStore createMetadataStore() throws IOException { * correctly, retrying w/ smaller batch instead of surfacing exceptions. */ @Test - public void testBatchedWriteExceedsProvisioned() throws Exception { + public void test_030_BatchedWrite() throws Exception { - final long iterations = 5; - boolean isProvisionedChanged; - List toCleanup = new ArrayList<>(); + final int iterations = 15; + final ArrayList toCleanup = new ArrayList<>(); + toCleanup.ensureCapacity(THREADS * iterations); // Fail if someone changes a constant we depend on assertTrue("Maximum batch size must big enough to run this test", S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT >= BATCH_SIZE); - try (DynamoDBMetadataStore ddbms = - (DynamoDBMetadataStore)createMetadataStore()) { - - DynamoDB ddb = ddbms.getDynamoDB(); - String tableName = ddbms.getTable().getTableName(); - final ProvisionedThroughputDescription existing = - ddb.getTable(tableName).describe().getProvisionedThroughput(); - - // If you set the same provisioned I/O as already set it throws an - // exception, avoid that. - isProvisionedChanged = (existing.getReadCapacityUnits() != SMALL_IO_UNITS - || existing.getWriteCapacityUnits() != SMALL_IO_UNITS); - - if (isProvisionedChanged) { - // Set low provisioned I/O for dynamodb - describe("Provisioning dynamo tbl %s read/write -> %d/%d", tableName, - SMALL_IO_UNITS, SMALL_IO_UNITS); - // Blocks to ensure table is back to ready state before we proceed - ddbms.provisionTableBlocking(SMALL_IO_UNITS, SMALL_IO_UNITS); - } else { - describe("Skipping provisioning table I/O, already %d/%d", - SMALL_IO_UNITS, SMALL_IO_UNITS); + + // We know the dynamodb metadata store will expand a put of a path + // of depth N into a batch of N writes (all ancestors are written + // separately up to the root). (Ab)use this for an easy way to write + // a batch of stuff that is bigger than the provisioned write units + try { + describe("Running %d iterations of batched put, size %d", iterations, + BATCH_SIZE); + + ThrottleTracker result = execute("prune", + 1, + true, + () -> { + ThrottleTracker tracker = new ThrottleTracker(); + long pruneItems = 0; + for (long i = 0; i < iterations; i++) { + Path longPath = pathOfDepth(BATCH_SIZE, String.valueOf(i)); + FileStatus status = basicFileStatus(longPath, 0, false, 12345, + 12345); + PathMetadata pm = new PathMetadata(status); + synchronized (toCleanup) { + toCleanup.add(pm); + } + + ddbms.put(pm); + + pruneItems++; + + if (pruneItems == BATCH_SIZE) { + describe("pruning files"); + ddbms.prune(Long.MAX_VALUE /* all files */); + pruneItems = 0; + } + if (tracker.probe()) { + // fail fast + break; + } + } + }); + assertNotEquals("No batch retries in " + result, + 0, result.batchThrottles); + } finally { + describe("Cleaning up table %s", tableName); + for (PathMetadata pm : toCleanup) { + cleanupMetadata(ddbms, pm); } + } + } + + /** + * Test Get throttling including using + * {@link MetadataStore#get(Path, boolean)}, + * as that stresses more of the code. + */ + @Test + public void test_040_get() throws Throwable { + // attempt to create many many get requests in parallel. + Path path = new Path("s3a://example.org/get"); + S3AFileStatus status = new S3AFileStatus(true, path, "alice"); + PathMetadata metadata = new PathMetadata(status); + ddbms.put(metadata); + try { + execute("get", + OPERATIONS_PER_THREAD, + true, + () -> ddbms.get(path, true) + ); + } finally { + retryingDelete(path); + } + } + + /** + * Ask for the version marker, which is where table init can be overloaded. + */ + @Test + public void test_050_getVersionMarkerItem() throws Throwable { + execute("get", + OPERATIONS_PER_THREAD * 2, + true, + () -> ddbms.getVersionMarkerItem() + ); + } - try { - // We know the dynamodb metadata store will expand a put of a path - // of depth N into a batch of N writes (all ancestors are written - // separately up to the root). (Ab)use this for an easy way to write - // a batch of stuff that is bigger than the provisioned write units - try { - describe("Running %d iterations of batched put, size %d", iterations, - BATCH_SIZE); - long pruneItems = 0; - for (long i = 0; i < iterations; i++) { - Path longPath = pathOfDepth(BATCH_SIZE, String.valueOf(i)); - FileStatus status = basicFileStatus(longPath, 0, false, 12345, - 12345); - PathMetadata pm = new PathMetadata(status); - - ddbms.put(pm); - toCleanup.add(pm); - pruneItems++; - // Having hard time reproducing Exceeded exception with put, also - // try occasional prune, which was the only stack trace I've seen - // (on JIRA) - if (pruneItems == BATCH_SIZE) { - describe("pruning files"); - ddbms.prune(Long.MAX_VALUE /* all files */); - pruneItems = 0; + /** + * Cleanup with an extra bit of retry logic around it, in case things + * are still over the limit. + * @param path path + */ + private void retryingDelete(final Path path) { + try { + ddbms.getInvoker().retry("Delete ", path.toString(), true, + () -> ddbms.delete(path)); + } catch (IOException e) { + LOG.warn("Failed to delete {}: ", path, e); + } + } + + @Test + public void test_060_list() throws Throwable { + // attempt to create many many get requests in parallel. + Path path = new Path("s3a://example.org/list"); + S3AFileStatus status = new S3AFileStatus(true, path, "alice"); + PathMetadata metadata = new PathMetadata(status); + ddbms.put(metadata); + try { + Path parent = path.getParent(); + execute("list", + OPERATIONS_PER_THREAD, + true, + () -> ddbms.listChildren(parent) + ); + } finally { + retryingDelete(path); + } + } + + @Test + public void test_070_putDirMarker() throws Throwable { + // attempt to create many many get requests in parallel. + Path path = new Path("s3a://example.org/putDirMarker"); + S3AFileStatus status = new S3AFileStatus(true, path, "alice"); + PathMetadata metadata = new PathMetadata(status); + ddbms.put(metadata); + DirListingMetadata children = ddbms.listChildren(path.getParent()); + try { + execute("list", + OPERATIONS_PER_THREAD, + true, + () -> ddbms.put(children) + ); + } finally { + retryingDelete(path); + } + } + + @Test + public void test_080_fullPathsToPut() throws Throwable { + // attempt to create many many get requests in parallel. + Path base = new Path("s3a://example.org/test_080_fullPathsToPut"); + Path child = new Path(base, "child"); + List pms = new ArrayList<>(); + ddbms.put(new PathMetadata(makeDirStatus(base))); + ddbms.put(new PathMetadata(makeDirStatus(child))); + ddbms.getInvoker().retry("set up directory tree", + base.toString(), + true, + () -> ddbms.put(pms)); + try { + DDBPathMetadata dirData = ddbms.get(child, true); + execute("list", + OPERATIONS_PER_THREAD, + true, + () -> ddbms.fullPathsToPut(dirData) + ); + } finally { + retryingDelete(base); + } + } + + @Test + public void test_900_instrumentation() throws Throwable { + describe("verify the owner FS gets updated after throttling events"); + // we rely on the FS being shared + S3AFileSystem fs = getFileSystem(); + String fsSummary = fs.toString(); + + S3AStorageStatistics statistics = fs.getStorageStatistics(); + for (StorageStatistics.LongStatistic statistic : statistics) { + LOG.info("{}", statistic.toString()); + } + String retryKey = Statistic.S3GUARD_METADATASTORE_RETRY.getSymbol(); + assertTrue("No increment of " + retryKey + " in " + fsSummary, + statistics.getLong(retryKey) > 0); + String throttledKey = Statistic.S3GUARD_METADATASTORE_THROTTLED.getSymbol(); + assertTrue("No increment of " + throttledKey + " in " + fsSummary, + statistics.getLong(throttledKey) > 0); + } + + /** + * Execute a set of operations in parallel, collect throttling statistics + * and return them. + * This execution will complete as soon as throttling is detected. + * This ensures that the tests do not run for longer than they should. + * @param operation string for messages. + * @param operationsPerThread number of times per thread to invoke the action. + * @param expectThrottling is throttling expected (and to be asserted on?) + * @param action action to invoke. + * @return the throttle statistics + */ + public ThrottleTracker execute(String operation, + int operationsPerThread, + final boolean expectThrottling, + LambdaTestUtils.VoidCallable action) + throws Exception { + + final ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); + final ThrottleTracker tracker = new ThrottleTracker(); + final ExecutorService executorService = Executors.newFixedThreadPool( + THREADS); + final List> tasks = new ArrayList<>(THREADS); + + final AtomicInteger throttleExceptions = new AtomicInteger(0); + for (int i = 0; i < THREADS; i++) { + tasks.add( + () -> { + final ExecutionOutcome outcome = new ExecutionOutcome(); + final ContractTestUtils.NanoTimer t + = new ContractTestUtils.NanoTimer(); + for (int j = 0; j < operationsPerThread; j++) { + if (tracker.isThrottlingDetected()) { + outcome.skipped = true; + return outcome; + } + try { + action.call(); + outcome.completed++; + } catch (AWSServiceThrottledException e) { + // this is possibly OK + LOG.info("Operation [{}] raised a throttled exception " + e, j, e); + LOG.debug(e.toString(), e); + throttleExceptions.incrementAndGet(); + // consider it completed + outcome.throttleExceptions.add(e); + outcome.throttled++; + } catch (Exception e) { + LOG.error("Failed to execute {}", operation, e); + outcome.exceptions.add(e); + break; + } + tracker.probe(); } + LOG.info("Thread completed {} with in {} ms with outcome {}: {}", + operation, t.elapsedTimeMs(), outcome, tracker); + return outcome; } - } finally { - describe("Cleaning up table %s", tableName); - for (PathMetadata pm : toCleanup) { - cleanupMetadata(ddbms, pm); - } - } - } finally { - if (isProvisionedChanged) { - long write = existing.getWriteCapacityUnits(); - long read = existing.getReadCapacityUnits(); - describe("Restoring dynamo tbl %s read/write -> %d/%d", tableName, - read, write); - ddbms.provisionTableBlocking(existing.getReadCapacityUnits(), - existing.getWriteCapacityUnits()); - } + ); + } + final List> futures = + executorService.invokeAll(tasks, + getTestTimeoutMillis(), TimeUnit.MILLISECONDS); + long elapsedMs = timer.elapsedTimeMs(); + LOG.info("Completed {} with {}", operation, tracker); + LOG.info("time to execute: {} millis", elapsedMs); + + for (Future future : futures) { + assertTrue("Future timed out", future.isDone()); + } + tracker.probe(); + + if (expectThrottling) { + tracker.assertThrottlingDetected(); + } + for (Future future : futures) { + + ExecutionOutcome outcome = future.get(); + if (!outcome.exceptions.isEmpty()) { + throw outcome.exceptions.get(0); + } + if (!outcome.skipped) { + assertEquals("Future did not complete all operations", + operationsPerThread, outcome.completed + outcome.throttled); } } + + return tracker; } - // Attempt do delete metadata, suppressing any errors + /** + * Attempt to delete metadata, suppressing any errors, and retrying on + * throttle events just in case some are still surfacing. + * @param ms store + * @param pm path to clean up + */ private void cleanupMetadata(MetadataStore ms, PathMetadata pm) { + Path path = pm.getFileStatus().getPath(); try { - ms.forgetMetadata(pm.getFileStatus().getPath()); + ddbms.getInvoker().retry("clean up", path.toString(), true, + () -> ms.forgetMetadata(path)); } catch (IOException ioe) { // Ignore. + LOG.info("Ignoring error while cleaning up {} in database", path, ioe); } } @@ -164,11 +500,114 @@ private Path pathOfDepth(long n, @Nullable String fileSuffix) { for (long i = 0; i < n; i++) { sb.append(i == 0 ? "/" + this.getClass().getSimpleName() : "lvl"); sb.append(i); - if (i == n-1 && fileSuffix != null) { + if (i == n - 1 && fileSuffix != null) { sb.append(fileSuffix); } sb.append("/"); } return new Path(getFileSystem().getUri().toString(), sb.toString()); } + + /** + * Something to track throttles. + * The constructor sets the counters to the current count in the + * DDB table; a call to {@link #reset()} will set it to the latest values. + * The {@link #probe()} will pick up the latest values to compare them with + * the original counts. + */ + private class ThrottleTracker { + + private long writeThrottleEventOrig = ddbms.getWriteThrottleEventCount(); + + private long readThrottleEventOrig = ddbms.getReadThrottleEventCount(); + + private long batchWriteThrottleCountOrig = + ddbms.getBatchWriteCapacityExceededCount(); + + private long readThrottles; + + private long writeThrottles; + + private long batchThrottles; + + ThrottleTracker() { + reset(); + } + + /** + * Reset the counters. + */ + private synchronized void reset() { + writeThrottleEventOrig + = ddbms.getWriteThrottleEventCount(); + + readThrottleEventOrig + = ddbms.getReadThrottleEventCount(); + + batchWriteThrottleCountOrig + = ddbms.getBatchWriteCapacityExceededCount(); + } + + /** + * Update the latest throttle count; synchronized. + * @return true if throttling has been detected. + */ + private synchronized boolean probe() { + readThrottles = ddbms.getReadThrottleEventCount() - readThrottleEventOrig; + writeThrottles = ddbms.getWriteThrottleEventCount() + - writeThrottleEventOrig; + batchThrottles = ddbms.getBatchWriteCapacityExceededCount() + - batchWriteThrottleCountOrig; + return isThrottlingDetected(); + } + + @Override + public String toString() { + return String.format( + "Tracker with read throttle events = %d;" + + " write events = %d;" + + " batch throttles = %d", + readThrottles, writeThrottles, batchThrottles); + } + + /** + * Assert that throttling has been detected. + */ + void assertThrottlingDetected() { + assertTrue("No throttling detected in " + this + + " against " + ddbms.toString(), + isThrottlingDetected()); + } + + /** + * Has there been any throttling on an operation? + * @return true iff read, write or batch operations were throttled. + */ + private boolean isThrottlingDetected() { + return readThrottles > 0 || writeThrottles > 0 || batchThrottles > 0; + } + } + + /** + * Outcome of a thread's execution operation. + */ + private static class ExecutionOutcome { + private int completed; + private int throttled; + private boolean skipped; + private final List exceptions = new ArrayList<>(1); + private final List throttleExceptions = new ArrayList<>(1); + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "ExecutionOutcome{"); + sb.append("completed=").append(completed); + sb.append(", skipped=").append(skipped); + sb.append(", throttled=").append(throttled); + sb.append(", exception count=").append(exceptions.size()); + sb.append('}'); + return sb.toString(); + } + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardConcurrentOps.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardConcurrentOps.java index c6838a08c74..22a1efd2a46 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardConcurrentOps.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardConcurrentOps.java @@ -40,8 +40,10 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AFileSystem; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY; @@ -80,81 +82,102 @@ private void deleteTable(DynamoDB db, String tableName) throws @Test public void testConcurrentTableCreations() throws Exception { - final Configuration conf = getConfiguration(); + S3AFileSystem fs = getFileSystem(); + final Configuration conf = fs.getConf(); Assume.assumeTrue("Test only applies when DynamoDB is used for S3Guard", conf.get(Constants.S3_METADATA_STORE_IMPL).equals( Constants.S3GUARD_METASTORE_DYNAMO)); - DynamoDBMetadataStore ms = new DynamoDBMetadataStore(); - ms.initialize(getFileSystem()); - DynamoDB db = ms.getDynamoDB(); - - String tableName = "testConcurrentTableCreations" + new Random().nextInt(); - conf.setBoolean(Constants.S3GUARD_DDB_TABLE_CREATE_KEY, true); - conf.set(Constants.S3GUARD_DDB_TABLE_NAME_KEY, tableName); + AWSCredentialProviderList sharedCreds = + fs.shareCredentials("testConcurrentTableCreations"); + // close that shared copy. + sharedCreds.close(); + // this is the original reference count. + int originalRefCount = sharedCreds.getRefCount(); - String region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY); - if (StringUtils.isEmpty(region)) { - // no region set, so pick it up from the test bucket - conf.set(S3GUARD_DDB_REGION_KEY, getFileSystem().getBucketLocation()); - } - int concurrentOps = 16; - int iterations = 4; + //now init the store; this should increment the ref count. + DynamoDBMetadataStore ms = new DynamoDBMetadataStore(); + ms.initialize(fs); - failIfTableExists(db, tableName); + // the ref count should have gone up + assertEquals("Credential Ref count unchanged after initializing metastore " + + sharedCreds, + originalRefCount + 1, sharedCreds.getRefCount()); + try { + DynamoDB db = ms.getDynamoDB(); - for (int i = 0; i < iterations; i++) { - ExecutorService executor = Executors.newFixedThreadPool( - concurrentOps, new ThreadFactory() { - private AtomicInteger count = new AtomicInteger(0); + String tableName = "testConcurrentTableCreations" + new Random().nextInt(); + conf.setBoolean(Constants.S3GUARD_DDB_TABLE_CREATE_KEY, true); + conf.set(Constants.S3GUARD_DDB_TABLE_NAME_KEY, tableName); - public Thread newThread(Runnable r) { - return new Thread(r, - "testConcurrentTableCreations" + count.getAndIncrement()); + String region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY); + if (StringUtils.isEmpty(region)) { + // no region set, so pick it up from the test bucket + conf.set(S3GUARD_DDB_REGION_KEY, fs.getBucketLocation()); + } + int concurrentOps = 16; + int iterations = 4; + + failIfTableExists(db, tableName); + + for (int i = 0; i < iterations; i++) { + ExecutorService executor = Executors.newFixedThreadPool( + concurrentOps, new ThreadFactory() { + private AtomicInteger count = new AtomicInteger(0); + + public Thread newThread(Runnable r) { + return new Thread(r, + "testConcurrentTableCreations" + count.getAndIncrement()); + } + }); + ((ThreadPoolExecutor) executor).prestartAllCoreThreads(); + Future[] futures = new Future[concurrentOps]; + for (int f = 0; f < concurrentOps; f++) { + final int index = f; + futures[f] = executor.submit(new Callable() { + @Override + public Exception call() throws Exception { + + ContractTestUtils.NanoTimer timer = + new ContractTestUtils.NanoTimer(); + + Exception result = null; + try (DynamoDBMetadataStore store = new DynamoDBMetadataStore()) { + store.initialize(conf); + } catch (Exception e) { + LOG.error(e.getClass() + ": " + e.getMessage()); + result = e; + } + + timer.end("Parallel DynamoDB client creation %d", index); + LOG.info("Parallel DynamoDB client creation {} ran from {} to {}", + index, timer.getStartTime(), timer.getEndTime()); + return result; } }); - ((ThreadPoolExecutor) executor).prestartAllCoreThreads(); - Future[] futures = new Future[concurrentOps]; - for (int f = 0; f < concurrentOps; f++) { - final int index = f; - futures[f] = executor.submit(new Callable() { - @Override - public Exception call() throws Exception { - - ContractTestUtils.NanoTimer timer = - new ContractTestUtils.NanoTimer(); - - Exception result = null; - try (DynamoDBMetadataStore store = new DynamoDBMetadataStore()) { - store.initialize(conf); - } catch (Exception e) { - LOG.error(e.getClass() + ": " + e.getMessage()); - result = e; - } - - timer.end("Parallel DynamoDB client creation %d", index); - LOG.info("Parallel DynamoDB client creation {} ran from {} to {}", - index, timer.getStartTime(), timer.getEndTime()); - return result; + } + List exceptions = new ArrayList<>(concurrentOps); + for (int f = 0; f < concurrentOps; f++) { + Exception outcome = futures[f].get(); + if (outcome != null) { + exceptions.add(outcome); } - }); - } - List exceptions = new ArrayList<>(concurrentOps); - for (int f = 0; f < concurrentOps; f++) { - Exception outcome = futures[f].get(); - if (outcome != null) { - exceptions.add(outcome); + } + deleteTable(db, tableName); + int exceptionsThrown = exceptions.size(); + if (exceptionsThrown > 0) { + // at least one exception was thrown. Fail the test & nest the first + // exception caught + throw new AssertionError(exceptionsThrown + "/" + concurrentOps + + " threads threw exceptions while initializing on iteration " + i, + exceptions.get(0)); } } - deleteTable(db, tableName); - int exceptionsThrown = exceptions.size(); - if (exceptionsThrown > 0) { - // at least one exception was thrown. Fail the test & nest the first - // exception caught - throw new AssertionError(exceptionsThrown + "/" + concurrentOps + - " threads threw exceptions while initializing on iteration " + i, - exceptions.get(0)); - } + } finally { + ms.close(); } + assertEquals("Credential Ref count unchanged after closing metastore: " + + sharedCreds, + originalRefCount, sharedCreds.getRefCount()); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java index 1a59bf1c935..65e2619fe75 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java @@ -19,15 +19,20 @@ package org.apache.hadoop.fs.s3a.s3guard; import java.io.IOException; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Random; +import java.util.UUID; import java.util.concurrent.Callable; -import java.util.concurrent.atomic.AtomicInteger; import com.amazonaws.services.dynamodbv2.document.DynamoDB; import com.amazonaws.services.dynamodbv2.document.Table; +import com.amazonaws.services.dynamodbv2.model.ListTagsOfResourceRequest; import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException; +import com.amazonaws.services.dynamodbv2.model.Tag; +import org.junit.Assert; import org.junit.Assume; import org.junit.Test; @@ -39,8 +44,12 @@ import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Init; import org.apache.hadoop.test.LambdaTestUtils; +import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY; +import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY; +import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_TAG; import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.*; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test S3Guard related CLI commands against DynamoDB. @@ -50,9 +59,10 @@ @Override public void setup() throws Exception { super.setup(); - Assume.assumeTrue("Test only applies when DynamoDB is used for S3Guard", - getConfiguration().get(Constants.S3_METADATA_STORE_IMPL).equals( - Constants.S3GUARD_METASTORE_DYNAMO)); + MetadataStore ms = getMetadataStore(); + Assume.assumeTrue("Test only applies when DynamoDB is used for S3Guard;" + + "Store is " + (ms == null ? "none" : ms.toString()), + ms instanceof DynamoDBMetadataStore); } // Check the existence of a given DynamoDB table. @@ -91,6 +101,65 @@ public String call() throws Exception { }); } + @Test + public void testDynamoTableTagging() throws Exception { + Configuration conf = getConfiguration(); + // If the region is not set in conf, skip the test. + String ddbRegion = conf.get(S3GUARD_DDB_REGION_KEY); + Assume.assumeTrue( + S3GUARD_DDB_REGION_KEY + " should be set to run this test", + ddbRegion != null && !ddbRegion.isEmpty() + ); + + // setup + // clear all table tagging config before this test + conf.getPropsWithPrefix(S3GUARD_DDB_TABLE_TAG).keySet().forEach( + propKey -> conf.unset(S3GUARD_DDB_TABLE_TAG + propKey) + ); + + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, + "testDynamoTableTagging-" + UUID.randomUUID()); + S3GuardTool.Init cmdR = new S3GuardTool.Init(conf); + Map tagMap = new HashMap<>(); + tagMap.put("hello", "dynamo"); + tagMap.put("tag", "youre it"); + + String[] argsR = new String[]{ + cmdR.getName(), + "-tag", tagMapToStringParams(tagMap) + }; + + // run + cmdR.run(argsR); + + // Check. Should create new metadatastore with the table name set. + try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + ddbms.initialize(conf); + ListTagsOfResourceRequest listTagsOfResourceRequest = new ListTagsOfResourceRequest() + .withResourceArn(ddbms.getTable().getDescription().getTableArn()); + List tags = ddbms.getAmazonDynamoDB().listTagsOfResource(listTagsOfResourceRequest).getTags(); + + // assert + assertEquals(tagMap.size(), tags.size()); + for (Tag tag : tags) { + Assert.assertEquals(tagMap.get(tag.getKey()), tag.getValue()); + } + // be sure to clean up - delete table + ddbms.destroy(); + } + } + + private String tagMapToStringParams(Map tagMap) { + StringBuilder stringBuilder = new StringBuilder(); + + for (Map.Entry kv : tagMap.entrySet()) { + stringBuilder.append(kv.getKey() + "=" + kv.getValue() + ";"); + } + + return stringBuilder.toString(); + } + + private static class Capacities { private final long read, write; @@ -219,38 +288,7 @@ public void testDynamoDBInitDestroyCycle() throws Throwable { // that call does not change the values original.checkEquals("unchanged", getCapacities()); - // now update the value - long readCap = original.getRead(); - long writeCap = original.getWrite(); - long rc2 = readCap + 1; - long wc2 = writeCap + 1; - Capacities desired = new Capacities(rc2, wc2); - capacityOut = exec(newSetCapacity(), - S3GuardTool.SetCapacity.NAME, - "-" + READ_FLAG, Long.toString(rc2), - "-" + WRITE_FLAG, Long.toString(wc2), - fsURI); - LOG.info("Set Capacity output=\n{}", capacityOut); - - // to avoid race conditions, spin for the state change - AtomicInteger c = new AtomicInteger(0); - LambdaTestUtils.eventually(60000, - new LambdaTestUtils.VoidCallable() { - @Override - public void call() throws Exception { - c.incrementAndGet(); - Map diags = getMetadataStore().getDiagnostics(); - Capacities updated = getCapacities(diags); - String tableInfo = String.format("[%02d] table state: %s", - c.intValue(), diags.get(STATUS)); - LOG.info("{}; capacities {}", - tableInfo, updated); - desired.checkEquals(tableInfo, updated); - } - }, - new LambdaTestUtils.ProportionalRetryInterval(500, 5000)); - - // Destroy MetadataStore + // Destroy MetadataStore Destroy destroyCmd = new Destroy(fs.getConf()); String destroyed = exec(destroyCmd, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java index f5c4b0316bb..1ee3cde80d9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java @@ -31,6 +31,8 @@ import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.StringUtils; + +import org.junit.Assume; import org.junit.Test; import org.apache.hadoop.fs.FSDataOutputStream; @@ -52,6 +54,15 @@ private static final String[] ABORT_FORCE_OPTIONS = new String[] {"-abort", "-force", "-verbose"}; + @Override + public void setup() throws Exception { + super.setup(); + MetadataStore ms = getMetadataStore(); + Assume.assumeTrue("Test only applies when a local store is used for S3Guard;" + + "Store is " + (ms == null ? "none" : ms.toString()), + ms instanceof LocalMetadataStore); + } + @Test public void testImportCommand() throws Exception { S3AFileSystem fs = getFileSystem(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java index 5a59400849f..45d6051ddb1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java @@ -727,6 +727,13 @@ public void testPruneUnsetsAuthoritative() throws Exception { new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)), Tristate.FALSE, false)); + // set parent dir as authoritative + if (!allowMissing()) { + DirListingMetadata parentDirMd = ms.listChildren(strToPath(parentDir)); + parentDirMd.setAuthoritative(true); + ms.put(parentDirMd); + } + ms.prune(time); DirListingMetadata listing; for (String directory : directories) { @@ -738,6 +745,48 @@ public void testPruneUnsetsAuthoritative() throws Exception { } } + @Test + public void testPrunePreservesAuthoritative() throws Exception { + String rootDir = "/unpruned-root-dir"; + String grandparentDir = rootDir + "/pruned-grandparent-dir"; + String parentDir = grandparentDir + "/pruned-parent-dir"; + String staleFile = parentDir + "/stale-file"; + String freshFile = rootDir + "/fresh-file"; + String[] directories = {rootDir, grandparentDir, parentDir}; + + // create dirs + createNewDirs(rootDir, grandparentDir, parentDir); + long time = System.currentTimeMillis(); + ms.put(new PathMetadata( + new FileStatus(0, false, 0, 0, time + 1, strToPath(staleFile)), + Tristate.FALSE, false)); + ms.put(new PathMetadata( + new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)), + Tristate.FALSE, false)); + + if (!allowMissing()) { + // set parent dir as authoritative + DirListingMetadata parentDirMd = ms.listChildren(strToPath(parentDir)); + parentDirMd.setAuthoritative(true); + ms.put(parentDirMd); + + // prune the ms + ms.prune(time); + + // get the directory listings + DirListingMetadata rootDirMd = ms.listChildren(strToPath(rootDir)); + DirListingMetadata grandParentDirMd = + ms.listChildren(strToPath(grandparentDir)); + parentDirMd = ms.listChildren(strToPath(parentDir)); + + // assert that parent dir is still authoritative (no removed elements + // during prune) + assertFalse(rootDirMd.isAuthoritative()); + assertFalse(grandParentDirMd.isAuthoritative()); + assertTrue(parentDirMd.isAuthoritative()); + } + } + @Test public void testPutDirListingMetadataPutsFileMetadata() throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java index 1678746abd4..70d4c3b0389 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java @@ -40,6 +40,7 @@ import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.LambdaTestUtils; +import org.mockito.Mockito; import static com.amazonaws.services.dynamodbv2.model.KeyType.HASH; import static com.amazonaws.services.dynamodbv2.model.KeyType.RANGE; @@ -50,6 +51,7 @@ import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*; import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION_MARKER; import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION; +import static org.mockito.Mockito.never; /** * Test the PathMetadataDynamoDBTranslation is able to translate between domain @@ -59,28 +61,30 @@ private static final Path TEST_DIR_PATH = new Path("s3a://test-bucket/myDir"); private static final Item TEST_DIR_ITEM = new Item(); - private static PathMetadata testDirPathMetadata; + private static DDBPathMetadata testDirPathMetadata; private static final long TEST_FILE_LENGTH = 100; private static final long TEST_MOD_TIME = 9999; private static final long TEST_BLOCK_SIZE = 128; private static final Path TEST_FILE_PATH = new Path(TEST_DIR_PATH, "myFile"); private static final Item TEST_FILE_ITEM = new Item(); - private static PathMetadata testFilePathMetadata; + private static DDBPathMetadata testFilePathMetadata; @BeforeClass public static void setUpBeforeClass() throws IOException { String username = UserGroupInformation.getCurrentUser().getShortUserName(); - testDirPathMetadata = - new PathMetadata(new S3AFileStatus(false, TEST_DIR_PATH, username)); + testDirPathMetadata = new DDBPathMetadata(new S3AFileStatus(false, + TEST_DIR_PATH, username)); + TEST_DIR_ITEM .withPrimaryKey(PARENT, "/test-bucket", CHILD, TEST_DIR_PATH.getName()) .withBoolean(IS_DIR, true); - testFilePathMetadata = new PathMetadata( + testFilePathMetadata = new DDBPathMetadata( new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH, TEST_BLOCK_SIZE, username)); + TEST_FILE_ITEM .withPrimaryKey(PARENT, pathToParentKey(TEST_FILE_PATH.getParent()), CHILD, TEST_FILE_PATH.getName()) @@ -235,4 +239,37 @@ public void testVersionMarkerNotStatusIllegalPath() throws Throwable { itemToPathMetadata(marker, "alice")); } + /** + * Test when translating an {@link Item} to {@link DDBPathMetadata} works + * if {@code IS_AUTHORITATIVE} flag is ignored. + */ + @Test + public void testIsAuthoritativeCompatibilityItemToPathMetadata() + throws Exception { + Item item = Mockito.spy(TEST_DIR_ITEM); + item.withBoolean(IS_AUTHORITATIVE, true); + + final String user = + UserGroupInformation.getCurrentUser().getShortUserName(); + DDBPathMetadata meta = itemToPathMetadata(item, user, true); + + Mockito.verify(item, Mockito.never()).getBoolean(IS_AUTHORITATIVE); + assertFalse(meta.isAuthoritativeDir()); + } + + /** + * Test when translating an {@link DDBPathMetadata} to {@link Item} works + * if {@code IS_AUTHORITATIVE} flag is ignored. + */ + @Test + public void testIsAuthoritativeCompatibilityPathMetadataToItem() { + DDBPathMetadata meta = Mockito.spy(testFilePathMetadata); + meta.setAuthoritativeDir(true); + + Item item = pathMetadataToItem(meta, true); + + Mockito.verify(meta, never()).isAuthoritativeDir(); + assertFalse(item.hasAttribute(IS_AUTHORITATIVE)); + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java index 876cc8020d3..0e6a1d8d092 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java @@ -22,7 +22,10 @@ import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; import org.apache.hadoop.fs.s3a.s3guard.PathMetadata; + +import org.junit.FixMethodOrder; import org.junit.Test; +import org.junit.runners.MethodSorters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,6 +41,7 @@ * Could be separated from S3A code, but we're using the S3A scale test * framework for convenience. */ +@FixMethodOrder(MethodSorters.NAME_ASCENDING) public abstract class AbstractITestS3AMetadataStoreScale extends S3AScaleTestBase { private static final Logger LOG = LoggerFactory.getLogger( @@ -60,7 +64,7 @@ public abstract MetadataStore createMetadataStore() throws IOException; @Test - public void testPut() throws Throwable { + public void test_010_Put() throws Throwable { describe("Test workload of put() operations"); // As described in hadoop-aws site docs, count parameter is used for @@ -83,7 +87,7 @@ public void testPut() throws Throwable { } @Test - public void testMoves() throws Throwable { + public void test_020_Moves() throws Throwable { describe("Test workload of batched move() operations"); // As described in hadoop-aws site docs, count parameter is used for @@ -140,7 +144,7 @@ public void testMoves() throws Throwable { * Create a copy of given list of PathMetadatas with the paths moved from * src to dest. */ - private List moveMetas(List metas, Path src, + protected List moveMetas(List metas, Path src, Path dest) throws IOException { List moved = new ArrayList<>(metas.size()); for (PathMetadata srcMeta : metas) { @@ -151,7 +155,7 @@ public void testMoves() throws Throwable { return moved; } - private Path movePath(Path p, Path src, Path dest) { + protected Path movePath(Path p, Path src, Path dest) { String srcStr = src.toUri().getPath(); String pathStr = p.toUri().getPath(); // Strip off src dir @@ -160,7 +164,7 @@ private Path movePath(Path p, Path src, Path dest) { return new Path(dest, pathStr); } - private S3AFileStatus copyStatus(S3AFileStatus status) { + protected S3AFileStatus copyStatus(S3AFileStatus status) { if (status.isDirectory()) { return new S3AFileStatus(status.isEmptyDirectory(), status.getPath(), status.getOwner()); @@ -185,7 +189,7 @@ private long populateMetadataStore(Collection paths, return count; } - private void clearMetadataStore(MetadataStore ms, long count) + protected void clearMetadataStore(MetadataStore ms, long count) throws IOException { describe("Recursive deletion"); NanoTimer deleteTimer = new NanoTimer(); @@ -202,15 +206,15 @@ private static void printTiming(Logger log, String op, NanoTimer timer, msecPerOp, op, count)); } - private static S3AFileStatus makeFileStatus(Path path) throws IOException { + protected static S3AFileStatus makeFileStatus(Path path) throws IOException { return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER); } - private static S3AFileStatus makeDirStatus(Path p) throws IOException { + protected static S3AFileStatus makeDirStatus(Path p) throws IOException { return new S3AFileStatus(false, p, OWNER); } - private List metasToPaths(List metas) { + protected List metasToPaths(List metas) { List paths = new ArrayList<>(metas.size()); for (PathMetadata meta : metas) { paths.add(meta.getFileStatus().getPath()); @@ -225,7 +229,7 @@ private static S3AFileStatus makeDirStatus(Path p) throws IOException { * @param width Number of files (and directories, if depth > 0) per directory. * @param paths List to add generated paths to. */ - private static void createDirTree(Path parent, int depth, int width, + protected static void createDirTree(Path parent, int depth, int width, Collection paths) throws IOException { // Create files diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java index 02236eba448..88a19d574cb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java @@ -64,7 +64,7 @@ private static final Logger LOG = LoggerFactory.getLogger( AbstractSTestS3AHugeFiles.class); public static final int DEFAULT_UPLOAD_BLOCKSIZE = 64 * _1KB; - public static final String DEFAULT_PARTITION_SIZE = "8M"; + private Path scaleTestDir; private Path hugefile; private Path hugefileRenamed; @@ -101,7 +101,7 @@ protected Configuration createScaleConfiguration() { Configuration conf = super.createScaleConfiguration(); partitionSize = (int) getTestPropertyBytes(conf, KEY_HUGE_PARTITION_SIZE, - DEFAULT_PARTITION_SIZE); + DEFAULT_HUGE_PARTITION_SIZE); assertTrue("Partition size too small: " + partitionSize, partitionSize > MULTIPART_MIN_SIZE); conf.setLong(SOCKET_SEND_BUFFER, _1MB); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3xLoginHelper.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3xLoginHelper.java index 3761cb76117..10409df99f1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3xLoginHelper.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3xLoginHelper.java @@ -18,12 +18,14 @@ package org.apache.hadoop.fs.s3native; -import org.apache.hadoop.fs.Path; +import java.net.URI; +import java.net.URISyntaxException; + import org.junit.Assert; import org.junit.Test; -import java.net.URI; -import java.net.URISyntaxException; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test how URIs and login details are extracted from URIs. @@ -35,14 +37,11 @@ public static final String P = "%2b"; public static final String P_RAW = "+"; public static final String USER = "user"; - public static final String PASS = "pass"; public static final String PASLASHSLASH = "pa" + S + S; public static final String PAPLUS = "pa" + P; public static final String PAPLUS_RAW = "pa" + P_RAW; public static final URI WITH_USER_AND_PASS = uri("s3a://user:pass@bucket"); - public static final Path PATH_WITH_LOGIN = - new Path(uri("s3a://user:pass@bucket/dest")); public static final URI WITH_SLASH_IN_PASS = uri( "s3a://user:" + PASLASHSLASH + "@bucket"); @@ -73,13 +72,24 @@ private static URI uri(String s) { /** * Assert that a built up FS URI matches the endpoint. - * @param uri URI to build the FS UIR from + * @param uri URI to build the FS URI from */ private void assertMatchesEndpoint(URI uri) { assertEquals("Source " + uri, ENDPOINT, S3xLoginHelper.buildFSURI(uri)); } + /** + * Assert that the supplied FS URI is invalid as it contains + * username:password secrets. + * @param uri URI to build the FS URI from + */ + private void assertInvalid(URI uri) throws Exception { + intercept(IllegalArgumentException.class, + S3xLoginHelper.LOGIN_WARNING, + () -> S3xLoginHelper.buildFSURI(uri)); + } + /** * Assert that the login/pass details from a URI match that expected. * @param user username @@ -89,10 +99,8 @@ private void assertMatchesEndpoint(URI uri) { */ private S3xLoginHelper.Login assertMatchesLogin(String user, String pass, URI uri) { - S3xLoginHelper.Login expected = new S3xLoginHelper.Login(user, - pass); - S3xLoginHelper.Login actual = S3xLoginHelper.extractLoginDetails( - uri); + S3xLoginHelper.Login expected = new S3xLoginHelper.Login(user, pass); + S3xLoginHelper.Login actual = S3xLoginHelper.extractLoginDetails(uri); if (!expected.equals(actual)) { Assert.fail("Source " + uri + " login expected=:" + toString(expected) @@ -112,28 +120,6 @@ public void testLoginSimple() throws Throwable { assertFalse("Login of " + login, login.hasLogin()); } - @Test - public void testLoginWithUserAndPass() throws Throwable { - S3xLoginHelper.Login login = assertMatchesLogin(USER, PASS, - WITH_USER_AND_PASS); - assertTrue("Login of " + login, login.hasLogin()); - } - - @Test - public void testLoginWithSlashInPass() throws Throwable { - assertMatchesLogin(USER, "pa//", WITH_SLASH_IN_PASS); - } - - @Test - public void testLoginWithPlusInPass() throws Throwable { - assertMatchesLogin(USER, "pa+", WITH_PLUS_IN_PASS); - } - - @Test - public void testLoginWithPlusRawInPass() throws Throwable { - assertMatchesLogin(USER, "pa+", WITH_PLUS_RAW_IN_PASS); - } - @Test public void testLoginWithUser() throws Throwable { assertMatchesLogin(USER, "", USER_NO_PASS); @@ -161,32 +147,32 @@ public void testLoginNoUserNoPassTwoColon() throws Throwable { @Test public void testFsUriWithUserAndPass() throws Throwable { - assertMatchesEndpoint(WITH_USER_AND_PASS); + assertInvalid(WITH_USER_AND_PASS); } @Test public void testFsUriWithSlashInPass() throws Throwable { - assertMatchesEndpoint(WITH_SLASH_IN_PASS); + assertInvalid(WITH_SLASH_IN_PASS); } @Test public void testFsUriWithPlusInPass() throws Throwable { - assertMatchesEndpoint(WITH_PLUS_IN_PASS); + assertInvalid(WITH_PLUS_IN_PASS); } @Test public void testFsUriWithPlusRawInPass() throws Throwable { - assertMatchesEndpoint(WITH_PLUS_RAW_IN_PASS); + assertInvalid(WITH_PLUS_RAW_IN_PASS); } @Test public void testFsUriWithUser() throws Throwable { - assertMatchesEndpoint(USER_NO_PASS); + assertInvalid(USER_NO_PASS); } @Test public void testFsUriWithUserAndColon() throws Throwable { - assertMatchesEndpoint(WITH_USER_AND_COLON); + assertInvalid(WITH_USER_AND_COLON); } @Test @@ -204,12 +190,6 @@ public void testFsUriNoUserNoPassTwoColon() throws Throwable { assertMatchesEndpoint(NO_USER_NO_PASS_TWO_COLON); } - @Test - public void testPathURIFixup() throws Throwable { - - } - - /** * Stringifier. Kept in the code to avoid accidental logging in production * code. diff --git a/hadoop-tools/hadoop-aws/src/test/resources/contract/s3a.xml b/hadoop-tools/hadoop-aws/src/test/resources/contract/s3a.xml index fe0af663fd3..ec4c54ae393 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/contract/s3a.xml +++ b/hadoop-tools/hadoop-aws/src/test/resources/contract/s3a.xml @@ -107,6 +107,11 @@ true + + fs.contract.supports-multipartuploader + true + + fs.contract.supports-unix-permissions false diff --git a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml index b68f5593980..f3a47fe5c12 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml +++ b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml @@ -150,6 +150,16 @@ simple + + + fs.s3a.s3guard.ddb.table.capacity.read + 10 + + + fs.s3a.s3guard.ddb.table.capacity.write + 10 + + + + + + diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml index 44b67a0b063..52b5b726a13 100644 --- a/hadoop-tools/hadoop-azure/pom.xml +++ b/hadoop-tools/hadoop-azure/pom.xml @@ -43,6 +43,8 @@ unset 7200 + 10 + 1000 @@ -298,6 +300,8 @@ ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.huge.partitionsize} ${fs.azure.scale.test.timeout} + ${fs.azure.scale.test.list.performance.threads} + ${fs.azure.scale.test.list.performance.files} **/Test*.java @@ -326,6 +330,8 @@ ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.huge.partitionsize} ${fs.azure.scale.test.timeout} + ${fs.azure.scale.test.list.performance.threads} + ${fs.azure.scale.test.list.performance.files} **/TestRollingWindowAverage*.java @@ -367,6 +373,8 @@ ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.huge.partitionsize} ${fs.azure.scale.test.timeout} + ${fs.azure.scale.test.list.performance.threads} + ${fs.azure.scale.test.list.performance.files} @@ -412,6 +420,8 @@ ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.huge.partitionsize} ${fs.azure.scale.test.timeout} + ${fs.azure.scale.test.list.performance.threads} + ${fs.azure.scale.test.list.performance.files} **/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java @@ -454,6 +464,8 @@ ${fs.azure.scale.test.enabled} ${fs.azure.scale.test.huge.filesize} ${fs.azure.scale.test.timeout} + ${fs.azure.scale.test.list.performance.threads} + ${fs.azure.scale.test.list.performance.files} ${fs.azure.scale.test.timeout} false diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java index 197ab22be21..d2f9ca69947 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java @@ -30,7 +30,6 @@ import java.net.URLDecoder; import java.net.URLEncoder; import java.security.InvalidKeyException; -import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.EnumSet; @@ -128,6 +127,7 @@ // computed as min(2*cpu,8) private static final String KEY_CONCURRENT_CONNECTION_VALUE_OUT = "fs.azure.concurrentRequestCount.out"; + private static final String HADOOP_BLOCK_SIZE_PROPERTY_NAME = "fs.azure.block.size"; private static final String KEY_STREAM_MIN_READ_SIZE = "fs.azure.read.request.size"; private static final String KEY_STORAGE_CONNECTION_TIMEOUT = "fs.azure.storage.timeout"; private static final String KEY_WRITE_BLOCK_SIZE = "fs.azure.write.request.size"; @@ -252,6 +252,7 @@ // Default block sizes public static final int DEFAULT_DOWNLOAD_BLOCK_SIZE = 4 * 1024 * 1024; public static final int DEFAULT_UPLOAD_BLOCK_SIZE = 4 * 1024 * 1024; + public static final long DEFAULT_HADOOP_BLOCK_SIZE = 512 * 1024 * 1024L; private static final int DEFAULT_INPUT_STREAM_VERSION = 2; @@ -313,6 +314,7 @@ private boolean tolerateOobAppends = DEFAULT_READ_TOLERATE_CONCURRENT_APPEND; + private long hadoopBlockSize = DEFAULT_HADOOP_BLOCK_SIZE; private int downloadBlockSizeBytes = DEFAULT_DOWNLOAD_BLOCK_SIZE; private int uploadBlockSizeBytes = DEFAULT_UPLOAD_BLOCK_SIZE; private int inputStreamVersion = DEFAULT_INPUT_STREAM_VERSION; @@ -740,6 +742,8 @@ private void configureAzureStorageSession() throws AzureException { KEY_STREAM_MIN_READ_SIZE, DEFAULT_DOWNLOAD_BLOCK_SIZE); this.uploadBlockSizeBytes = sessionConfiguration.getInt( KEY_WRITE_BLOCK_SIZE, DEFAULT_UPLOAD_BLOCK_SIZE); + this.hadoopBlockSize = sessionConfiguration.getLong( + HADOOP_BLOCK_SIZE_PROPERTY_NAME, DEFAULT_HADOOP_BLOCK_SIZE); this.inputStreamVersion = sessionConfiguration.getInt( KEY_INPUT_STREAM_VERSION, DEFAULT_INPUT_STREAM_VERSION); @@ -1234,7 +1238,14 @@ public boolean isKeyForDirectorySet(String key, Set dirSet) { return false; } - + /** + * Returns the file block size. This is a fake value used for integration + * of the Azure store with Hadoop. + */ + @Override + public long getHadoopBlockSize() { + return hadoopBlockSize; + } /** * This should be called from any method that does any modifications to the @@ -2066,7 +2077,7 @@ public FileMetadata retrieveMetadata(String key) throws IOException { // The key refers to root directory of container. // Set the modification time for root to zero. return new FileMetadata(key, 0, defaultPermissionNoBlobMetadata(), - BlobMaterialization.Implicit); + BlobMaterialization.Implicit, hadoopBlockSize); } CloudBlobWrapper blob = getBlobReference(key); @@ -2086,7 +2097,7 @@ public FileMetadata retrieveMetadata(String key) throws IOException { if (retrieveFolderAttribute(blob)) { LOG.debug("{} is a folder blob.", key); return new FileMetadata(key, properties.getLastModified().getTime(), - getPermissionStatus(blob), BlobMaterialization.Explicit); + getPermissionStatus(blob), BlobMaterialization.Explicit, hadoopBlockSize); } else { LOG.debug("{} is a normal blob.", key); @@ -2095,7 +2106,7 @@ public FileMetadata retrieveMetadata(String key) throws IOException { key, // Always return denormalized key with metadata. getDataLength(blob, properties), properties.getLastModified().getTime(), - getPermissionStatus(blob)); + getPermissionStatus(blob), hadoopBlockSize); } } catch(StorageException e){ if (!NativeAzureFileSystemHelper.isFileNotFoundException(e)) { @@ -2129,7 +2140,7 @@ public FileMetadata retrieveMetadata(String key) throws IOException { BlobProperties properties = blob.getProperties(); return new FileMetadata(key, properties.getLastModified().getTime(), - getPermissionStatus(blob), BlobMaterialization.Implicit); + getPermissionStatus(blob), BlobMaterialization.Implicit, hadoopBlockSize); } } @@ -2178,46 +2189,13 @@ public InputStream retrieve(String key, long startByteOffset) } @Override - public PartialListing list(String prefix, final int maxListingCount, + public FileMetadata[] list(String prefix, final int maxListingCount, final int maxListingDepth) throws IOException { - return list(prefix, maxListingCount, maxListingDepth, null); - } - - @Override - public PartialListing list(String prefix, final int maxListingCount, - final int maxListingDepth, String priorLastKey) throws IOException { - return list(prefix, PATH_DELIMITER, maxListingCount, maxListingDepth, - priorLastKey); + return listInternal(prefix, maxListingCount, maxListingDepth); } - @Override - public PartialListing listAll(String prefix, final int maxListingCount, - final int maxListingDepth, String priorLastKey) throws IOException { - return list(prefix, null, maxListingCount, maxListingDepth, priorLastKey); - } - - /** - * Searches the given list of {@link FileMetadata} objects for a directory - * with the given key. - * - * @param list - * The list to search. - * @param key - * The key to search for. - * @return The wanted directory, or null if not found. - */ - private static FileMetadata getFileMetadataInList( - final Iterable list, String key) { - for (FileMetadata current : list) { - if (current.getKey().equals(key)) { - return current; - } - } - return null; - } - - private PartialListing list(String prefix, String delimiter, - final int maxListingCount, final int maxListingDepth, String priorLastKey) + private FileMetadata[] listInternal(String prefix, final int maxListingCount, + final int maxListingDepth) throws IOException { try { checkContainer(ContainerAccessType.PureRead); @@ -2241,7 +2219,8 @@ private PartialListing list(String prefix, String delimiter, objects = listRootBlobs(prefix, true, enableFlatListing); } - ArrayList fileMetadata = new ArrayList(); + HashMap fileMetadata = new HashMap<>(256); + for (ListBlobItem blobItem : objects) { // Check that the maximum listing count is not exhausted. // @@ -2261,25 +2240,37 @@ private PartialListing list(String prefix, String delimiter, FileMetadata metadata; if (retrieveFolderAttribute(blob)) { - metadata = new FileMetadata(blobKey, - properties.getLastModified().getTime(), - getPermissionStatus(blob), - BlobMaterialization.Explicit); + metadata = new FileMetadata(blobKey, + properties.getLastModified().getTime(), + getPermissionStatus(blob), + BlobMaterialization.Explicit, + hadoopBlockSize); } else { - metadata = new FileMetadata( - blobKey, - getDataLength(blob, properties), - properties.getLastModified().getTime(), - getPermissionStatus(blob)); + metadata = new FileMetadata( + blobKey, + getDataLength(blob, properties), + properties.getLastModified().getTime(), + getPermissionStatus(blob), + hadoopBlockSize); } + // Add the metadata but remove duplicates. Note that the azure + // storage java SDK returns two types of entries: CloudBlobWrappter + // and CloudDirectoryWrapper. In the case where WASB generated the + // data, there will be an empty blob for each "directory", and we will + // receive a CloudBlobWrapper. If there are also files within this + // "directory", we will also receive a CloudDirectoryWrapper. To + // complicate matters, the data may not be generated by WASB, in + // which case we may not have an empty blob for each "directory". + // So, sometimes we receive both a CloudBlobWrapper and a + // CloudDirectoryWrapper for each directory, and sometimes we receive + // one or the other but not both. We remove duplicates, but + // prefer CloudBlobWrapper over CloudDirectoryWrapper. + // Furthermore, it is very unfortunate that the list results are not + // ordered, and it is a partial list which uses continuation. So + // the HashMap is the best structure to remove the duplicates, despite + // its potential large size. + fileMetadata.put(blobKey, metadata); - // Add the metadata to the list, but remove any existing duplicate - // entries first that we may have added by finding nested files. - FileMetadata existing = getFileMetadataInList(fileMetadata, blobKey); - if (existing != null) { - fileMetadata.remove(existing); - } - fileMetadata.add(metadata); } else if (blobItem instanceof CloudBlobDirectoryWrapper) { CloudBlobDirectoryWrapper directory = (CloudBlobDirectoryWrapper) blobItem; // Determine format of directory name depending on whether an absolute @@ -2298,12 +2289,15 @@ private PartialListing list(String prefix, String delimiter, // inherit the permissions of the first non-directory blob. // Also, getting a proper value for last-modified is tricky. FileMetadata directoryMetadata = new FileMetadata(dirKey, 0, - defaultPermissionNoBlobMetadata(), BlobMaterialization.Implicit); + defaultPermissionNoBlobMetadata(), BlobMaterialization.Implicit, + hadoopBlockSize); // Add the directory metadata to the list only if it's not already - // there. - if (getFileMetadataInList(fileMetadata, dirKey) == null) { - fileMetadata.add(directoryMetadata); + // there. See earlier note, we prefer CloudBlobWrapper over + // CloudDirectoryWrapper because it may have additional metadata ( + // properties and ACLs). + if (!fileMetadata.containsKey(dirKey)) { + fileMetadata.put(dirKey, directoryMetadata); } if (!enableFlatListing) { @@ -2314,13 +2308,7 @@ private PartialListing list(String prefix, String delimiter, } } } - // Note: Original code indicated that this may be a hack. - priorLastKey = null; - PartialListing listing = new PartialListing(priorLastKey, - fileMetadata.toArray(new FileMetadata[] {}), - 0 == fileMetadata.size() ? new String[] {} - : new String[] { prefix }); - return listing; + return fileMetadata.values().toArray(new FileMetadata[fileMetadata.size()]); } catch (Exception e) { // Re-throw as an Azure storage exception. // @@ -2334,13 +2322,13 @@ private PartialListing list(String prefix, String delimiter, * the sorted order of the blob names. * * @param aCloudBlobDirectory Azure blob directory - * @param aFileMetadataList a list of file metadata objects for each + * @param metadataHashMap a map of file metadata objects for each * non-directory blob. * @param maxListingCount maximum length of the built up list. */ private void buildUpList(CloudBlobDirectoryWrapper aCloudBlobDirectory, - ArrayList aFileMetadataList, final int maxListingCount, - final int maxListingDepth) throws Exception { + HashMap metadataHashMap, final int maxListingCount, + final int maxListingDepth) throws Exception { // Push the blob directory onto the stack. // @@ -2371,12 +2359,12 @@ private void buildUpList(CloudBlobDirectoryWrapper aCloudBlobDirectory, // (2) maxListingCount > 0 implies that the number of items in the // metadata list is less than the max listing count. while (null != blobItemIterator - && (maxListingCount <= 0 || aFileMetadataList.size() < maxListingCount)) { + && (maxListingCount <= 0 || metadataHashMap.size() < maxListingCount)) { while (blobItemIterator.hasNext()) { // Check if the count of items on the list exhausts the maximum // listing count. // - if (0 < maxListingCount && aFileMetadataList.size() >= maxListingCount) { + if (0 < maxListingCount && metadataHashMap.size() >= maxListingCount) { break; } @@ -2399,22 +2387,34 @@ private void buildUpList(CloudBlobDirectoryWrapper aCloudBlobDirectory, metadata = new FileMetadata(blobKey, properties.getLastModified().getTime(), getPermissionStatus(blob), - BlobMaterialization.Explicit); + BlobMaterialization.Explicit, + hadoopBlockSize); } else { metadata = new FileMetadata( blobKey, getDataLength(blob, properties), properties.getLastModified().getTime(), - getPermissionStatus(blob)); + getPermissionStatus(blob), + hadoopBlockSize); } - // Add the directory metadata to the list only if it's not already - // there. - FileMetadata existing = getFileMetadataInList(aFileMetadataList, blobKey); - if (existing != null) { - aFileMetadataList.remove(existing); - } - aFileMetadataList.add(metadata); + // Add the metadata but remove duplicates. Note that the azure + // storage java SDK returns two types of entries: CloudBlobWrappter + // and CloudDirectoryWrapper. In the case where WASB generated the + // data, there will be an empty blob for each "directory", and we will + // receive a CloudBlobWrapper. If there are also files within this + // "directory", we will also receive a CloudDirectoryWrapper. To + // complicate matters, the data may not be generated by WASB, in + // which case we may not have an empty blob for each "directory". + // So, sometimes we receive both a CloudBlobWrapper and a + // CloudDirectoryWrapper for each directory, and sometimes we receive + // one or the other but not both. We remove duplicates, but + // prefer CloudBlobWrapper over CloudDirectoryWrapper. + // Furthermore, it is very unfortunate that the list results are not + // ordered, and it is a partial list which uses continuation. So + // the HashMap is the best structure to remove the duplicates, despite + // its potential large size. + metadataHashMap.put(blobKey, metadata); } else if (blobItem instanceof CloudBlobDirectoryWrapper) { CloudBlobDirectoryWrapper directory = (CloudBlobDirectoryWrapper) blobItem; @@ -2439,7 +2439,12 @@ private void buildUpList(CloudBlobDirectoryWrapper aCloudBlobDirectory, // absolute path is being used or not. String dirKey = normalizeKey(directory); - if (getFileMetadataInList(aFileMetadataList, dirKey) == null) { + // Add the directory metadata to the list only if it's not already + // there. See earlier note, we prefer CloudBlobWrapper over + // CloudDirectoryWrapper because it may have additional metadata ( + // properties and ACLs). + if (!metadataHashMap.containsKey(dirKey)) { + // Reached the targeted listing depth. Return metadata for the // directory using default permissions. // @@ -2450,10 +2455,11 @@ private void buildUpList(CloudBlobDirectoryWrapper aCloudBlobDirectory, FileMetadata directoryMetadata = new FileMetadata(dirKey, 0, defaultPermissionNoBlobMetadata(), - BlobMaterialization.Implicit); + BlobMaterialization.Implicit, + hadoopBlockSize); // Add the directory metadata to the list. - aFileMetadataList.add(directoryMetadata); + metadataHashMap.put(dirKey, directoryMetadata); } } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/FileMetadata.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/FileMetadata.java index 5085a0f7dbc..cbf3ab96160 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/FileMetadata.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/FileMetadata.java @@ -19,6 +19,8 @@ package org.apache.hadoop.fs.azure; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.PermissionStatus; /** @@ -27,12 +29,9 @@ *

*/ @InterfaceAudience.Private -class FileMetadata { - private final String key; - private final long length; - private final long lastModified; - private final boolean isDir; - private final PermissionStatus permissionStatus; +class FileMetadata extends FileStatus { + // this is not final so that it can be cleared to save memory when not needed. + private String key; private final BlobMaterialization blobMaterialization; /** @@ -46,16 +45,19 @@ * The last modified date (milliseconds since January 1, 1970 UTC.) * @param permissionStatus * The permission for the file. + * @param blockSize + * The Hadoop file block size. */ public FileMetadata(String key, long length, long lastModified, - PermissionStatus permissionStatus) { + PermissionStatus permissionStatus, final long blockSize) { + super(length, false, 1, blockSize, lastModified, 0, + permissionStatus.getPermission(), + permissionStatus.getUserName(), + permissionStatus.getGroupName(), + null); this.key = key; - this.length = length; - this.lastModified = lastModified; - this.isDir = false; - this.permissionStatus = permissionStatus; - this.blobMaterialization = BlobMaterialization.Explicit; // File are never - // implicit. + // Files are never implicit. + this.blobMaterialization = BlobMaterialization.Explicit; } /** @@ -70,37 +72,42 @@ public FileMetadata(String key, long length, long lastModified, * @param blobMaterialization * Whether this is an implicit (no real blob backing it) or explicit * directory. + * @param blockSize + * The Hadoop file block size. */ public FileMetadata(String key, long lastModified, - PermissionStatus permissionStatus, BlobMaterialization blobMaterialization) { + PermissionStatus permissionStatus, BlobMaterialization blobMaterialization, + final long blockSize) { + super(0, true, 1, blockSize, lastModified, 0, + permissionStatus.getPermission(), + permissionStatus.getUserName(), + permissionStatus.getGroupName(), + null); this.key = key; - this.isDir = true; - this.length = 0; - this.lastModified = lastModified; - this.permissionStatus = permissionStatus; this.blobMaterialization = blobMaterialization; } - public boolean isDir() { - return isDir; + @Override + public Path getPath() { + Path p = super.getPath(); + if (p == null) { + // Don't store this yet to reduce memory usage, as it will + // stay in the Eden Space and later we will update it + // with the full canonicalized path. + p = NativeAzureFileSystem.keyToPath(key); + } + return p; } + /** + * Returns the Azure storage key for the file. Used internally by the framework. + * + * @return The key for the file. + */ public String getKey() { return key; } - public long getLength() { - return length; - } - - public long getLastModified() { - return lastModified; - } - - public PermissionStatus getPermissionStatus() { - return permissionStatus; - } - /** * Indicates whether this is an implicit directory (no real blob backing it) * or an explicit one. @@ -112,9 +119,7 @@ public BlobMaterialization getBlobMaterialization() { return blobMaterialization; } - @Override - public String toString() { - return "FileMetadata[" + key + ", " + length + ", " + lastModified + ", " - + permissionStatus + "]"; + void removeKey() { + key = null; } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index 52027621ef1..f8962d9b170 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -31,9 +31,7 @@ import java.util.ArrayList; import java.util.Date; import java.util.EnumSet; -import java.util.Set; import java.util.TimeZone; -import java.util.TreeSet; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Matcher; @@ -129,20 +127,12 @@ public FolderRenamePending(String srcKey, String dstKey, SelfRenewingLease lease this.dstKey = dstKey; this.folderLease = lease; this.fs = fs; - ArrayList fileMetadataList = new ArrayList(); // List all the files in the folder. long start = Time.monotonicNow(); - String priorLastKey = null; - do { - PartialListing listing = fs.getStoreInterface().listAll(srcKey, AZURE_LIST_ALL, - AZURE_UNBOUNDED_DEPTH, priorLastKey); - for(FileMetadata file : listing.getFiles()) { - fileMetadataList.add(file); - } - priorLastKey = listing.getPriorLastKey(); - } while (priorLastKey != null); - fileMetadata = fileMetadataList.toArray(new FileMetadata[fileMetadataList.size()]); + fileMetadata = fs.getStoreInterface().list(srcKey, AZURE_LIST_ALL, + AZURE_UNBOUNDED_DEPTH); + long end = Time.monotonicNow(); LOG.debug("Time taken to list {} blobs for rename operation is: {} ms", fileMetadata.length, (end - start)); @@ -669,7 +659,6 @@ public String getScheme() { public static final Logger LOG = LoggerFactory.getLogger(NativeAzureFileSystem.class); - static final String AZURE_BLOCK_SIZE_PROPERTY_NAME = "fs.azure.block.size"; /** * The time span in seconds before which we consider a temp blob to be * dangling (not being actively uploaded to) and up for reclamation. @@ -685,8 +674,6 @@ public String getScheme() { private static final int AZURE_LIST_ALL = -1; private static final int AZURE_UNBOUNDED_DEPTH = -1; - private static final long MAX_AZURE_BLOCK_SIZE = 512 * 1024 * 1024L; - /** * The configuration property that determines which group owns files created * in WASB. @@ -1196,7 +1183,6 @@ private void restoreKey() throws IOException { private NativeFileSystemStore store; private AzureNativeFileSystemStore actualStore; private Path workingDir; - private long blockSize = MAX_AZURE_BLOCK_SIZE; private AzureFileSystemInstrumentation instrumentation; private String metricsSourceName; private boolean isClosed = false; @@ -1361,13 +1347,10 @@ public void initialize(URI uri, Configuration conf) this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); this.workingDir = new Path("/user", UserGroupInformation.getCurrentUser() .getShortUserName()).makeQualified(getUri(), getWorkingDirectory()); - this.blockSize = conf.getLong(AZURE_BLOCK_SIZE_PROPERTY_NAME, - MAX_AZURE_BLOCK_SIZE); this.appendSupportEnabled = conf.getBoolean(APPEND_SUPPORT_ENABLE_PROPERTY_NAME, false); LOG.debug("NativeAzureFileSystem. Initializing."); - LOG.debug(" blockSize = {}", - conf.getLong(AZURE_BLOCK_SIZE_PROPERTY_NAME, MAX_AZURE_BLOCK_SIZE)); + LOG.debug(" blockSize = {}", store.getHadoopBlockSize()); // Initialize thread counts from user configuration deleteThreadCount = conf.getInt(AZURE_DELETE_THREADS, DEFAULT_AZURE_DELETE_THREADS); @@ -1491,7 +1474,7 @@ private static String removeTrailingSlash(String key) { } } - private static Path keyToPath(String key) { + static Path keyToPath(String key) { if (key.equals("/")) { return new Path("/"); // container } @@ -1599,7 +1582,7 @@ public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throw new FileNotFoundException(f.toString()); } - if (meta.isDir()) { + if (meta.isDirectory()) { throw new FileNotFoundException(f.toString() + " is a directory not a file."); } @@ -1815,7 +1798,7 @@ protected FSDataOutputStream createInternal(Path f, FsPermission permission, FileMetadata existingMetadata = store.retrieveMetadata(key); if (existingMetadata != null) { - if (existingMetadata.isDir()) { + if (existingMetadata.isDirectory()) { throw new FileAlreadyExistsException("Cannot create file " + f + "; already exists as a directory."); } @@ -1833,7 +1816,7 @@ protected FSDataOutputStream createInternal(Path f, FsPermission permission, // already exists. String parentKey = pathToKey(parentFolder); FileMetadata parentMetadata = store.retrieveMetadata(parentKey); - if (parentMetadata != null && parentMetadata.isDir() && + if (parentMetadata != null && parentMetadata.isDirectory() && parentMetadata.getBlobMaterialization() == BlobMaterialization.Explicit) { if (parentFolderLease != null) { store.updateFolderLastModifiedTime(parentKey, parentFolderLease); @@ -1850,7 +1833,7 @@ protected FSDataOutputStream createInternal(Path f, FsPermission permission, firstExisting = firstExisting.getParent(); metadata = store.retrieveMetadata(pathToKey(firstExisting)); } - mkdirs(parentFolder, metadata.getPermissionStatus().getPermission(), true); + mkdirs(parentFolder, metadata.getPermission(), true); } } @@ -1988,7 +1971,7 @@ private boolean deleteWithAuthEnabled(Path f, boolean recursive, + parentPath + " whose metadata cannot be retrieved. Can't resolve"); } - if (!parentMetadata.isDir()) { + if (!parentMetadata.isDirectory()) { // Invalid state: the parent path is actually a file. Throw. throw new AzureException("File " + f + " has a parent directory " + parentPath + " which is also a file. Can't resolve."); @@ -1997,7 +1980,7 @@ private boolean deleteWithAuthEnabled(Path f, boolean recursive, // The path exists, determine if it is a folder containing objects, // an empty folder, or a simple file and take the appropriate actions. - if (!metaFile.isDir()) { + if (!metaFile.isDirectory()) { // The path specifies a file. We need to check the parent path // to make sure it's a proper materialized directory before we // delete the file. Otherwise we may get into a situation where @@ -2114,9 +2097,9 @@ private boolean deleteWithAuthEnabled(Path f, boolean recursive, AzureFileSystemThreadTask task = new AzureFileSystemThreadTask() { @Override public boolean execute(FileMetadata file) throws IOException{ - if (!deleteFile(file.getKey(), file.isDir())) { + if (!deleteFile(file.getKey(), file.isDirectory())) { LOG.warn("Attempt to delete non-existent {} {}", - file.isDir() ? "directory" : "file", + file.isDirectory() ? "directory" : "file", file.getKey()); } return true; @@ -2138,7 +2121,7 @@ public boolean execute(FileMetadata file) throws IOException{ // Delete the current directory if all underlying contents are deleted if (isPartialDelete || (store.retrieveMetadata(metaFile.getKey()) != null - && !deleteFile(metaFile.getKey(), metaFile.isDir()))) { + && !deleteFile(metaFile.getKey(), metaFile.isDirectory()))) { LOG.error("Failed delete directory : {}", f); return false; } @@ -2191,7 +2174,7 @@ private boolean deleteWithoutAuth(Path f, boolean recursive, // The path exists, determine if it is a folder containing objects, // an empty folder, or a simple file and take the appropriate actions. - if (!metaFile.isDir()) { + if (!metaFile.isDirectory()) { // The path specifies a file. We need to check the parent path // to make sure it's a proper materialized directory before we // delete the file. Otherwise we may get into a situation where @@ -2234,7 +2217,7 @@ private boolean deleteWithoutAuth(Path f, boolean recursive, + parentPath + " whose metadata cannot be retrieved. Can't resolve"); } - if (!parentMetadata.isDir()) { + if (!parentMetadata.isDirectory()) { // Invalid state: the parent path is actually a file. Throw. throw new AzureException("File " + f + " has a parent directory " + parentPath + " which is also a file. Can't resolve."); @@ -2319,38 +2302,27 @@ private boolean deleteWithoutAuth(Path f, boolean recursive, } } - // List all the blobs in the current folder. - String priorLastKey = null; - // Start time for list operation long start = Time.monotonicNow(); - ArrayList fileMetadataList = new ArrayList(); + final FileMetadata[] contents; // List all the files in the folder with AZURE_UNBOUNDED_DEPTH depth. - do { - try { - PartialListing listing = store.listAll(key, AZURE_LIST_ALL, - AZURE_UNBOUNDED_DEPTH, priorLastKey); - for(FileMetadata file : listing.getFiles()) { - fileMetadataList.add(file); - } - priorLastKey = listing.getPriorLastKey(); - } catch (IOException e) { - Throwable innerException = checkForAzureStorageException(e); - - if (innerException instanceof StorageException - && isFileNotFoundException((StorageException) innerException)) { - return false; - } + try { + contents = store.list(key, AZURE_LIST_ALL, + AZURE_UNBOUNDED_DEPTH); + } catch (IOException e) { + Throwable innerException = checkForAzureStorageException(e); - throw e; + if (innerException instanceof StorageException + && isFileNotFoundException((StorageException) innerException)) { + return false; } - } while (priorLastKey != null); - long end = Time.monotonicNow(); - LOG.debug("Time taken to list {} blobs for delete operation: {} ms", fileMetadataList.size(), (end - start)); + throw e; + } - final FileMetadata[] contents = fileMetadataList.toArray(new FileMetadata[fileMetadataList.size()]); + long end = Time.monotonicNow(); + LOG.debug("Time taken to list {} blobs for delete operation: {} ms", contents.length, (end - start)); if (contents.length > 0) { if (!recursive) { @@ -2365,9 +2337,9 @@ private boolean deleteWithoutAuth(Path f, boolean recursive, AzureFileSystemThreadTask task = new AzureFileSystemThreadTask() { @Override public boolean execute(FileMetadata file) throws IOException{ - if (!deleteFile(file.getKey(), file.isDir())) { + if (!deleteFile(file.getKey(), file.isDirectory())) { LOG.warn("Attempt to delete non-existent {} {}", - file.isDir() ? "directory" : "file", + file.isDirectory() ? "directory" : "file", file.getKey()); } return true; @@ -2384,7 +2356,7 @@ public boolean execute(FileMetadata file) throws IOException{ // Delete the current directory if (store.retrieveMetadata(metaFile.getKey()) != null - && !deleteFile(metaFile.getKey(), metaFile.isDir())) { + && !deleteFile(metaFile.getKey(), metaFile.isDirectory())) { LOG.error("Failed delete directory : {}", f); return false; } @@ -2456,13 +2428,13 @@ private boolean getFolderContentsToDelete(FileMetadata folderToDelete, boolean isPartialDelete = false; - Path pathToDelete = makeAbsolute(keyToPath(folderToDelete.getKey())); + Path pathToDelete = makeAbsolute(folderToDelete.getPath()); foldersToProcess.push(folderToDelete); while (!foldersToProcess.empty()) { FileMetadata currentFolder = foldersToProcess.pop(); - Path currentPath = makeAbsolute(keyToPath(currentFolder.getKey())); + Path currentPath = makeAbsolute(currentFolder.getPath()); boolean canDeleteChildren = true; // If authorization is enabled, check for 'write' permission on current folder @@ -2478,8 +2450,8 @@ private boolean getFolderContentsToDelete(FileMetadata folderToDelete, if (canDeleteChildren) { // get immediate children list - ArrayList fileMetadataList = getChildrenMetadata(currentFolder.getKey(), - maxListingDepth); + FileMetadata[] fileMetadataList = store.list(currentFolder.getKey(), + AZURE_LIST_ALL, maxListingDepth); // Process children of currentFolder and add them to list of contents // that can be deleted. We Perform stickybit check on every file and @@ -2490,12 +2462,12 @@ private boolean getFolderContentsToDelete(FileMetadata folderToDelete, // This file/folder cannot be deleted and neither can the parent paths be deleted. // Remove parent paths from list of contents that can be deleted. canDeleteChildren = false; - Path filePath = makeAbsolute(keyToPath(childItem.getKey())); + Path filePath = makeAbsolute(childItem.getPath()); LOG.error("User does not have permissions to delete {}. " + "Parent directory has sticky bit set.", filePath); } else { // push the child directories to the stack to process their contents - if (childItem.isDir()) { + if (childItem.isDirectory()) { foldersToProcess.push(childItem); } // Add items to list of contents that can be deleted. @@ -2540,23 +2512,6 @@ private boolean getFolderContentsToDelete(FileMetadata folderToDelete, return isPartialDelete; } - private ArrayList getChildrenMetadata(String key, int maxListingDepth) - throws IOException { - - String priorLastKey = null; - ArrayList fileMetadataList = new ArrayList(); - do { - PartialListing listing = store.listAll(key, AZURE_LIST_ALL, - maxListingDepth, priorLastKey); - for (FileMetadata file : listing.getFiles()) { - fileMetadataList.add(file); - } - priorLastKey = listing.getPriorLastKey(); - } while (priorLastKey != null); - - return fileMetadataList; - } - private boolean isStickyBitCheckViolated(FileMetadata metaData, FileMetadata parentMetadata, boolean throwOnException) throws IOException { try { @@ -2602,13 +2557,13 @@ private boolean isStickyBitCheckViolated(FileMetadata metaData, } // stickybit is not set on parent and hence cannot be violated - if (!parentMetadata.getPermissionStatus().getPermission().getStickyBit()) { + if (!parentMetadata.getPermission().getStickyBit()) { return false; } String currentUser = UserGroupInformation.getCurrentUser().getShortUserName(); - String parentDirectoryOwner = parentMetadata.getPermissionStatus().getUserName(); - String currentFileOwner = metaData.getPermissionStatus().getUserName(); + String parentDirectoryOwner = parentMetadata.getOwner(); + String currentFileOwner = metaData.getOwner(); // Files/Folders with no owner set will not pass stickybit check if ((parentDirectoryOwner.equalsIgnoreCase(currentUser)) @@ -2687,7 +2642,15 @@ private FileStatus getFileStatusInternal(Path f) throws FileNotFoundException, I Path absolutePath = makeAbsolute(f); String key = pathToKey(absolutePath); if (key.length() == 0) { // root always exists - return newDirectory(null, absolutePath); + return new FileStatus( + 0, + true, + 1, + store.getHadoopBlockSize(), + 0, + 0, + FsPermission.getDefault(), "", "", + absolutePath.makeQualified(getUri(), getWorkingDirectory())); } // The path is either a folder or a file. Retrieve metadata to @@ -2709,7 +2672,7 @@ private FileStatus getFileStatusInternal(Path f) throws FileNotFoundException, I } if (meta != null) { - if (meta.isDir()) { + if (meta.isDirectory()) { // The path is a folder with files in it. // @@ -2723,14 +2686,14 @@ private FileStatus getFileStatusInternal(Path f) throws FileNotFoundException, I } // Return reference to the directory object. - return newDirectory(meta, absolutePath); + return updateFileStatusPath(meta, absolutePath); } // The path is a file. LOG.debug("Found the path: {} as a file.", f.toString()); // Return with reference to a file object. - return newFile(meta, absolutePath); + return updateFileStatusPath(meta, absolutePath); } // File not found. Throw exception no such file or directory. @@ -2787,7 +2750,7 @@ public URI getUri() { performAuthCheck(absolutePath, WasbAuthorizationOperations.READ, "liststatus", absolutePath); String key = pathToKey(absolutePath); - Set status = new TreeSet(); + FileMetadata meta = null; try { meta = store.retrieveMetadata(key); @@ -2804,101 +2767,93 @@ public URI getUri() { throw ex; } - if (meta != null) { - if (!meta.isDir()) { - - LOG.debug("Found path as a file"); - - return new FileStatus[] { newFile(meta, absolutePath) }; - } - - String partialKey = null; - PartialListing listing = null; - - try { - listing = store.list(key, AZURE_LIST_ALL, 1, partialKey); - } catch (IOException ex) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { + if (meta == null) { + // There is no metadata found for the path. + LOG.debug("Did not find any metadata for path: {}", key); + throw new FileNotFoundException(f + " is not found"); + } - throw new FileNotFoundException(String.format("%s is not found", key)); - } + if (!meta.isDirectory()) { + LOG.debug("Found path as a file"); + return new FileStatus[] { updateFileStatusPath(meta, absolutePath) }; + } - throw ex; - } - // NOTE: We don't check for Null condition as the Store API should return - // an empty list if there are not listing. + FileMetadata[] listing; - // For any -RenamePending.json files in the listing, - // push the rename forward. - boolean renamed = conditionalRedoFolderRenames(listing); + listing = listWithErrorHandling(key, AZURE_LIST_ALL, 1); - // If any renames were redone, get another listing, - // since the current one may have changed due to the redo. - if (renamed) { - listing = null; - try { - listing = store.list(key, AZURE_LIST_ALL, 1, partialKey); - } catch (IOException ex) { - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); + // NOTE: We don't check for Null condition as the Store API should return + // an empty list if there are not listing. - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { + // For any -RenamePending.json files in the listing, + // push the rename forward. + boolean renamed = conditionalRedoFolderRenames(listing); - throw new FileNotFoundException(String.format("%s is not found", key)); - } + // If any renames were redone, get another listing, + // since the current one may have changed due to the redo. + if (renamed) { + listing = listWithErrorHandling(key, AZURE_LIST_ALL, 1); + } - throw ex; - } - } + // We only need to check for AZURE_TEMP_FOLDER if the key is the root, + // and if it is not the root we also know the exact size of the array + // of FileStatus. - // NOTE: We don't check for Null condition as the Store API should return - // and empty list if there are not listing. + FileMetadata[] result = null; - for (FileMetadata fileMetadata : listing.getFiles()) { - Path subpath = keyToPath(fileMetadata.getKey()); + if (key.equals("/")) { + ArrayList status = new ArrayList<>(listing.length); - // Test whether the metadata represents a file or directory and - // add the appropriate metadata object. - // - // Note: There was a very old bug here where directories were added - // to the status set as files flattening out recursive listings - // using "-lsr" down the file system hierarchy. - if (fileMetadata.isDir()) { + for (FileMetadata fileMetadata : listing) { + if (fileMetadata.isDirectory()) { // Make sure we hide the temp upload folder if (fileMetadata.getKey().equals(AZURE_TEMP_FOLDER)) { // Don't expose that. continue; } - status.add(newDirectory(fileMetadata, subpath)); + status.add(updateFileStatusPath(fileMetadata, fileMetadata.getPath())); } else { - status.add(newFile(fileMetadata, subpath)); + status.add(updateFileStatusPath(fileMetadata, fileMetadata.getPath())); } } + result = status.toArray(new FileMetadata[0]); + } else { + for (int i = 0; i < listing.length; i++) { + FileMetadata fileMetadata = listing[i]; + listing[i] = updateFileStatusPath(fileMetadata, fileMetadata.getPath()); + } + result = listing; + } - LOG.debug("Found path as a directory with {}" - + " files in it.", status.size()); + LOG.debug("Found path as a directory with {}" + + " files in it.", result.length); - } else { - // There is no metadata found for the path. - LOG.debug("Did not find any metadata for path: {}", key); + return result; + } - throw new FileNotFoundException(f + " is not found"); + private FileMetadata[] listWithErrorHandling(String prefix, final int maxListingCount, + final int maxListingDepth) throws IOException { + try { + return store.list(prefix, maxListingCount, maxListingDepth); + } catch (IOException ex) { + Throwable innerException + = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); + if (innerException instanceof StorageException + && NativeAzureFileSystemHelper.isFileNotFoundException( + (StorageException) innerException)) { + throw new FileNotFoundException(String.format("%s is not found", prefix)); + } + throw ex; } - - return status.toArray(new FileStatus[0]); } // Redo any folder renames needed if there are rename pending files in the // directory listing. Return true if one or more redo operations were done. - private boolean conditionalRedoFolderRenames(PartialListing listing) + private boolean conditionalRedoFolderRenames(FileMetadata[] listing) throws IllegalArgumentException, IOException { boolean renamed = false; - for (FileMetadata fileMetadata : listing.getFiles()) { - Path subpath = keyToPath(fileMetadata.getKey()); + for (FileMetadata fileMetadata : listing) { + Path subpath = fileMetadata.getPath(); if (isRenamePendingFile(subpath)) { FolderRenamePending pending = new FolderRenamePending(subpath, this); @@ -2914,32 +2869,11 @@ private boolean isRenamePendingFile(Path path) { return path.toString().endsWith(FolderRenamePending.SUFFIX); } - private FileStatus newFile(FileMetadata meta, Path path) { - return new FileStatus ( - meta.getLength(), - false, - 1, - blockSize, - meta.getLastModified(), - 0, - meta.getPermissionStatus().getPermission(), - meta.getPermissionStatus().getUserName(), - meta.getPermissionStatus().getGroupName(), - path.makeQualified(getUri(), getWorkingDirectory())); - } - - private FileStatus newDirectory(FileMetadata meta, Path path) { - return new FileStatus ( - 0, - true, - 1, - blockSize, - meta == null ? 0 : meta.getLastModified(), - 0, - meta == null ? FsPermission.getDefault() : meta.getPermissionStatus().getPermission(), - meta == null ? "" : meta.getPermissionStatus().getUserName(), - meta == null ? "" : meta.getPermissionStatus().getGroupName(), - path.makeQualified(getUri(), getWorkingDirectory())); + private FileMetadata updateFileStatusPath(FileMetadata meta, Path path) { + meta.setPath(path.makeQualified(getUri(), getWorkingDirectory())); + // reduce memory use by setting the internal-only key to null + meta.removeKey(); + return meta; } private static enum UMaskApplyMode { @@ -3000,8 +2934,8 @@ private Path getAncestor(Path f) throws IOException { String currentKey = pathToKey(current); FileMetadata currentMetadata = store.retrieveMetadata(currentKey); - if (currentMetadata != null && currentMetadata.isDir()) { - Path ancestor = keyToPath(currentMetadata.getKey()); + if (currentMetadata != null && currentMetadata.isDirectory()) { + Path ancestor = currentMetadata.getPath(); LOG.debug("Found ancestor {}, for path: {}", ancestor.toString(), f.toString()); return ancestor; } @@ -3052,7 +2986,7 @@ public boolean mkdirs(Path f, FsPermission permission, boolean noUmask) throws I current = parent, parent = current.getParent()) { String currentKey = pathToKey(current); FileMetadata currentMetadata = store.retrieveMetadata(currentKey); - if (currentMetadata != null && !currentMetadata.isDir()) { + if (currentMetadata != null && !currentMetadata.isDirectory()) { throw new FileAlreadyExistsException("Cannot create directory " + f + " because " + current + " is an existing file."); } else if (currentMetadata == null) { @@ -3099,7 +3033,7 @@ public FSDataInputStream open(Path f, int bufferSize) throws FileNotFoundExcepti if (meta == null) { throw new FileNotFoundException(f.toString()); } - if (meta.isDir()) { + if (meta.isDirectory()) { throw new FileNotFoundException(f.toString() + " is a directory not a file."); } @@ -3120,7 +3054,7 @@ public FSDataInputStream open(Path f, int bufferSize) throws FileNotFoundExcepti } return new FSDataInputStream(new BufferedFSInputStream( - new NativeAzureFsInputStream(inputStream, key, meta.getLength()), bufferSize)); + new NativeAzureFsInputStream(inputStream, key, meta.getLen()), bufferSize)); } @Override @@ -3196,7 +3130,7 @@ public boolean rename(Path src, Path dst) throws FileNotFoundException, IOExcept } } - if (dstMetadata != null && dstMetadata.isDir()) { + if (dstMetadata != null && dstMetadata.isDirectory()) { // It's an existing directory. performAuthCheck(absoluteDstPath, WasbAuthorizationOperations.WRITE, "rename", absoluteDstPath); @@ -3232,7 +3166,7 @@ public boolean rename(Path src, Path dst) throws FileNotFoundException, IOExcept LOG.debug("Parent of the destination {}" + " doesn't exist, failing the rename.", dst); return false; - } else if (!parentOfDestMetadata.isDir()) { + } else if (!parentOfDestMetadata.isDirectory()) { LOG.debug("Parent of the destination {}" + " is a file, failing the rename.", dst); return false; @@ -3261,7 +3195,7 @@ public boolean rename(Path src, Path dst) throws FileNotFoundException, IOExcept // Source doesn't exist LOG.debug("Source {} doesn't exist, failing the rename.", src); return false; - } else if (!srcMetadata.isDir()) { + } else if (!srcMetadata.isDirectory()) { LOG.debug("Source {} found as a file, renaming.", src); try { // HADOOP-15086 - file rename must ensure that the destination does @@ -3335,7 +3269,7 @@ private void updateParentFolderLastModifiedTime(String key) // single file. In this case, the parent folder no longer exists if the // file is renamed; so we can safely ignore the null pointer case. if (parentMetadata != null) { - if (parentMetadata.isDir() + if (parentMetadata.isDirectory() && parentMetadata.getBlobMaterialization() == BlobMaterialization.Implicit) { store.storeEmptyFolder(parentKey, createPermissionStatus(FsPermission.getDefault())); @@ -3511,7 +3445,7 @@ public void setPermission(Path p, FsPermission permission) throws FileNotFoundEx && !isAllowedUser(currentUgi.getShortUserName(), daemonUsers)) { //Check if the user is the owner of the file. - String owner = metadata.getPermissionStatus().getUserName(); + String owner = metadata.getOwner(); if (!currentUgi.getShortUserName().equals(owner)) { throw new WasbAuthorizationException( String.format("user '%s' does not have the privilege to " @@ -3522,16 +3456,16 @@ public void setPermission(Path p, FsPermission permission) throws FileNotFoundEx } permission = applyUMask(permission, - metadata.isDir() ? UMaskApplyMode.ChangeExistingDirectory + metadata.isDirectory() ? UMaskApplyMode.ChangeExistingDirectory : UMaskApplyMode.ChangeExistingFile); if (metadata.getBlobMaterialization() == BlobMaterialization.Implicit) { // It's an implicit folder, need to materialize it. store.storeEmptyFolder(key, createPermissionStatus(permission)); - } else if (!metadata.getPermissionStatus().getPermission(). + } else if (!metadata.getPermission(). equals(permission)) { store.changePermissionStatus(key, new PermissionStatus( - metadata.getPermissionStatus().getUserName(), - metadata.getPermissionStatus().getGroupName(), + metadata.getOwner(), + metadata.getGroup(), permission)); } } @@ -3579,10 +3513,10 @@ public void setOwner(Path p, String username, String groupname) PermissionStatus newPermissionStatus = new PermissionStatus( username == null ? - metadata.getPermissionStatus().getUserName() : username, + metadata.getOwner() : username, groupname == null ? - metadata.getPermissionStatus().getGroupName() : groupname, - metadata.getPermissionStatus().getPermission()); + metadata.getGroup() : groupname, + metadata.getPermission()); if (metadata.getBlobMaterialization() == BlobMaterialization.Implicit) { // It's an implicit folder, need to materialize it. store.storeEmptyFolder(key, newPermissionStatus); @@ -3778,30 +3712,26 @@ private void handleFilesWithDanglingTempData(Path root, AZURE_TEMP_EXPIRY_DEFAULT) * 1000; // Go over all the blobs under the given root and look for blobs to // recover. - String priorLastKey = null; - do { - PartialListing listing = store.listAll(pathToKey(root), AZURE_LIST_ALL, - AZURE_UNBOUNDED_DEPTH, priorLastKey); - - for (FileMetadata file : listing.getFiles()) { - if (!file.isDir()) { // We don't recover directory blobs - // See if this blob has a link in it (meaning it's a place-holder - // blob for when the upload to the temp blob is complete). - String link = store.getLinkInFileMetadata(file.getKey()); - if (link != null) { - // It has a link, see if the temp blob it is pointing to is - // existent and old enough to be considered dangling. - FileMetadata linkMetadata = store.retrieveMetadata(link); - if (linkMetadata != null - && linkMetadata.getLastModified() >= cutoffForDangling) { - // Found one! - handler.handleFile(file, linkMetadata); - } + FileMetadata[] listing = store.list(pathToKey(root), AZURE_LIST_ALL, + AZURE_UNBOUNDED_DEPTH); + + for (FileMetadata file : listing) { + if (!file.isDirectory()) { // We don't recover directory blobs + // See if this blob has a link in it (meaning it's a place-holder + // blob for when the upload to the temp blob is complete). + String link = store.getLinkInFileMetadata(file.getKey()); + if (link != null) { + // It has a link, see if the temp blob it is pointing to is + // existent and old enough to be considered dangling. + FileMetadata linkMetadata = store.retrieveMetadata(link); + if (linkMetadata != null + && linkMetadata.getModificationTime() >= cutoffForDangling) { + // Found one! + handler.handleFile(file, linkMetadata); } } } - priorLastKey = listing.getPriorLastKey(); - } while (priorLastKey != null); + } } /** @@ -3888,7 +3818,7 @@ public String getOwnerForPath(Path absolutePath) throws IOException { meta = store.retrieveMetadata(key); if (meta != null) { - owner = meta.getPermissionStatus().getUserName(); + owner = meta.getOwner(); LOG.debug("Retrieved '{}' as owner for path - {}", owner, absolutePath); } else { // meta will be null if file/folder doen not exist diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java index b67ab1b297b..36e3819c32b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java @@ -58,20 +58,21 @@ DataOutputStream storefile(String keyEncoded, boolean isAtomicRenameKey(String key); + /** + * Returns the file block size. This is a fake value used for integration + * of the Azure store with Hadoop. + * @return The file block size. + */ + long getHadoopBlockSize(); + void storeEmptyLinkFile(String key, String tempBlobKey, PermissionStatus permissionStatus) throws AzureException; String getLinkInFileMetadata(String key) throws AzureException; - PartialListing list(String prefix, final int maxListingCount, + FileMetadata[] list(String prefix, final int maxListingCount, final int maxListingDepth) throws IOException; - PartialListing list(String prefix, final int maxListingCount, - final int maxListingDepth, String priorLastKey) throws IOException; - - PartialListing listAll(String prefix, final int maxListingCount, - final int maxListingDepth, String priorLastKey) throws IOException; - void changePermissionStatus(String key, PermissionStatus newPermission) throws AzureException; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java index 6e98755e77e..591c2ec50de 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java @@ -29,11 +29,13 @@ import java.io.IOException; import java.io.OutputStream; import java.util.Arrays; +import java.util.Locale; import java.util.concurrent.CountDownLatch; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.fs.azure.StorageInterface.CloudPageBlobWrapper; import org.apache.commons.lang3.exception.ExceptionUtils; @@ -52,7 +54,7 @@ * An output stream that write file data to a page blob stored using ASV's * custom format. */ -final class PageBlobOutputStream extends OutputStream implements Syncable { +final class PageBlobOutputStream extends OutputStream implements Syncable, StreamCapabilities { /** * The maximum number of raw bytes Azure Storage allows us to upload in a * single request (4 MB). @@ -195,6 +197,23 @@ private void checkStreamState() throws IOException { } } + /** + * Query the stream for a specific capability. + * + * @param capability string to query the stream support for. + * @return true for hsync and hflush. + */ + @Override + public boolean hasCapability(String capability) { + switch (capability.toLowerCase(Locale.ENGLISH)) { + case StreamCapabilities.HSYNC: + case StreamCapabilities.HFLUSH: + return true; + default: + return false; + } + } + /** * Closes this output stream and releases any system resources associated with * this stream. If any data remains in the buffer it is committed to the diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PartialListing.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PartialListing.java deleted file mode 100644 index 4a80d2ef8f0..00000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PartialListing.java +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - *

- * Holds information on a directory listing for a {@link NativeFileSystemStore}. - * This includes the {@link FileMetadata files} and directories (their names) - * contained in a directory. - *

- *

- * This listing may be returned in chunks, so a priorLastKey is - * provided so that the next chunk may be requested. - *

- * - * @see NativeFileSystemStore#list(String, int, String) - */ -@InterfaceAudience.Private -class PartialListing { - - private final String priorLastKey; - private final FileMetadata[] files; - private final String[] commonPrefixes; - - public PartialListing(String priorLastKey, FileMetadata[] files, - String[] commonPrefixes) { - this.priorLastKey = priorLastKey; - this.files = files; - this.commonPrefixes = commonPrefixes; - } - - public FileMetadata[] getFiles() { - return files; - } - - public String[] getCommonPrefixes() { - return commonPrefixes; - } - - public String getPriorLastKey() { - return priorLastKey; - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestListPerformance.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestListPerformance.java new file mode 100644 index 00000000000..e7a3fa88511 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestListPerformance.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure; + +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import com.microsoft.azure.storage.blob.CloudBlobContainer; +import com.microsoft.azure.storage.blob.CloudBlockBlob; +import org.junit.Assume; +import org.junit.FixMethodOrder; +import org.junit.Test; +import org.junit.runners.MethodSorters; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; +import org.apache.hadoop.fs.contract.ContractTestUtils; + +/** + * Test list performance. + */ +@FixMethodOrder(MethodSorters.NAME_ASCENDING) + +public class ITestListPerformance extends AbstractAzureScaleTest { + private static final Logger LOG = LoggerFactory.getLogger( + ITestListPerformance.class); + + private static final Path TEST_DIR_PATH = new Path( + "DirectoryWithManyFiles"); + + private static final int NUMBER_OF_THREADS = 10; + private static final int NUMBER_OF_FILES_PER_THREAD = 1000; + + private int threads; + + private int filesPerThread; + + private int expectedFileCount; + + @Override + public void setUp() throws Exception { + super.setUp(); + Configuration conf = getConfiguration(); + // fail fast + threads = AzureTestUtils.getTestPropertyInt(conf, + "fs.azure.scale.test.list.performance.threads", NUMBER_OF_THREADS); + filesPerThread = AzureTestUtils.getTestPropertyInt(conf, + "fs.azure.scale.test.list.performance.files", NUMBER_OF_FILES_PER_THREAD); + expectedFileCount = threads * filesPerThread; + LOG.info("Thread = {}, Files per Thread = {}, expected files = {}", + threads, filesPerThread, expectedFileCount); + conf.set("fs.azure.io.retry.max.retries", "1"); + conf.set("fs.azure.delete.threads", "16"); + createTestAccount(); + } + + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.create( + "itestlistperformance", + EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), + null, + true); + } + + @Test + public void test_0101_CreateDirectoryWithFiles() throws Exception { + Assume.assumeFalse("Test path exists; skipping", fs.exists(TEST_DIR_PATH)); + + ExecutorService executorService = Executors.newFixedThreadPool(threads); + CloudBlobContainer container = testAccount.getRealContainer(); + + final String basePath = (fs.getWorkingDirectory().toUri().getPath() + "/" + TEST_DIR_PATH + "/").substring(1); + + ArrayList> tasks = new ArrayList<>(threads); + fs.mkdirs(TEST_DIR_PATH); + ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); + for (int i = 0; i < threads; i++) { + tasks.add( + new Callable() { + public Integer call() { + int written = 0; + for (int j = 0; j < filesPerThread; j++) { + String blobName = basePath + UUID.randomUUID().toString(); + try { + CloudBlockBlob blob = container.getBlockBlobReference( + blobName); + blob.uploadText(""); + written ++; + } catch (Exception e) { + LOG.error("Filed to write {}", blobName, e); + break; + } + } + LOG.info("Thread completed with {} files written", written); + return written; + } + } + ); + } + + List> futures = executorService.invokeAll(tasks, + getTestTimeoutMillis(), TimeUnit.MILLISECONDS); + long elapsedMs = timer.elapsedTimeMs(); + LOG.info("time to create files: {} millis", elapsedMs); + + for (Future future : futures) { + assertTrue("Future timed out", future.isDone()); + assertEquals("Future did not write all files timed out", + filesPerThread, future.get().intValue()); + } + } + + @Test + public void test_0200_ListStatusPerformance() throws Exception { + ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); + FileStatus[] fileList = fs.listStatus(TEST_DIR_PATH); + long elapsedMs = timer.elapsedTimeMs(); + LOG.info(String.format( + "files=%1$d, elapsedMs=%2$d", + fileList.length, + elapsedMs)); + Map foundInList =new HashMap<>(expectedFileCount); + + for (FileStatus fileStatus : fileList) { + foundInList.put(fileStatus.getPath(), fileStatus); + LOG.info("{}: {}", fileStatus.getPath(), + fileStatus.isDirectory() ? "dir" : "file"); + } + assertEquals("Mismatch between expected files and actual", + expectedFileCount, fileList.length); + + + // now do a listFiles() recursive + ContractTestUtils.NanoTimer initialStatusCallTimer + = new ContractTestUtils.NanoTimer(); + RemoteIterator listing + = fs.listFiles(TEST_DIR_PATH, true); + long initialListTime = initialStatusCallTimer.elapsedTimeMs(); + timer = new ContractTestUtils.NanoTimer(); + while (listing.hasNext()) { + FileStatus fileStatus = listing.next(); + Path path = fileStatus.getPath(); + FileStatus removed = foundInList.remove(path); + assertNotNull("Did not find " + path + "{} in the previous listing", + removed); + } + elapsedMs = timer.elapsedTimeMs(); + LOG.info("time for listFiles() initial call: {} millis;" + + " time to iterate: {} millis", initialListTime, elapsedMs); + assertEquals("Not all files from listStatus() were found in listFiles()", + 0, foundInList.size()); + + } + + @Test + public void test_0300_BulkDeletePerformance() throws Exception { + ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); + fs.delete(TEST_DIR_PATH,true); + long elapsedMs = timer.elapsedTimeMs(); + LOG.info("time for delete(): {} millis; {} nanoS per file", + elapsedMs, timer.nanosPerOperation(expectedFileCount)); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java index 87cac15d9cd..1c868ea0ff1 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java @@ -39,7 +39,7 @@ extends AbstractWasbTestBase { private static final int THREAD_COUNT = 102; - private static final int TEST_EXECUTION_TIMEOUT = 5000; + private static final int TEST_EXECUTION_TIMEOUT = 30000; @Override protected AzureBlobStorageTestAccount createTestAccount() throws Exception { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java index 9ac1f734014..b8edc4b7d65 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java @@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StreamCapabilities; import org.hamcrest.core.IsEqual; import org.hamcrest.core.IsNot; import org.junit.Test; @@ -186,6 +187,20 @@ public void testPageBlobClose() throws IOException { } } + // Page Blobs have StreamCapabilities.HFLUSH and StreamCapabilities.HSYNC. + @Test + public void testPageBlobCapabilities() throws IOException { + Path path = getBlobPathWithTestName(PAGE_BLOB_DIR); + try (FSDataOutputStream stream = fs.create(path)) { + assertTrue(stream.hasCapability(StreamCapabilities.HFLUSH)); + assertTrue(stream.hasCapability(StreamCapabilities.HSYNC)); + assertFalse(stream.hasCapability(StreamCapabilities.DROPBEHIND)); + assertFalse(stream.hasCapability(StreamCapabilities.READAHEAD)); + assertFalse(stream.hasCapability(StreamCapabilities.UNBUFFER)); + stream.write(getRandomBytes()); + } + } + // Verify flush does not write data to storage for Block Blobs @Test public void testBlockBlobFlush() throws Exception { @@ -265,6 +280,20 @@ public void testBlockBlobClose() throws IOException { } } + // Block Blobs do not have any StreamCapabilities. + @Test + public void testBlockBlobCapabilities() throws IOException { + Path path = getBlobPathWithTestName(BLOCK_BLOB_DIR); + try (FSDataOutputStream stream = fs.create(path)) { + assertFalse(stream.hasCapability(StreamCapabilities.HFLUSH)); + assertFalse(stream.hasCapability(StreamCapabilities.HSYNC)); + assertFalse(stream.hasCapability(StreamCapabilities.DROPBEHIND)); + assertFalse(stream.hasCapability(StreamCapabilities.READAHEAD)); + assertFalse(stream.hasCapability(StreamCapabilities.UNBUFFER)); + stream.write(getRandomBytes()); + } + } + // Verify flush writes data to storage for Block Blobs with compaction @Test public void testBlockBlobCompactionFlush() throws Exception { @@ -347,6 +376,20 @@ public void testBlockBlobCompactionClose() throws IOException { } } + // Block Blobs with Compaction have StreamCapabilities.HFLUSH and HSYNC. + @Test + public void testBlockBlobCompactionCapabilities() throws IOException { + Path path = getBlobPathWithTestName(BLOCK_BLOB_COMPACTION_DIR); + try (FSDataOutputStream stream = fs.create(path)) { + assertTrue(stream.hasCapability(StreamCapabilities.HFLUSH)); + assertTrue(stream.hasCapability(StreamCapabilities.HSYNC)); + assertFalse(stream.hasCapability(StreamCapabilities.DROPBEHIND)); + assertFalse(stream.hasCapability(StreamCapabilities.READAHEAD)); + assertFalse(stream.hasCapability(StreamCapabilities.UNBUFFER)); + stream.write(getRandomBytes()); + } + } + // A small write does not write data to storage for Page Blobs @Test public void testPageBlobSmallWrite() throws IOException { diff --git a/hadoop-tools/hadoop-datajoin/src/main/java/org/apache/hadoop/contrib/utils/join/JobBase.java b/hadoop-tools/hadoop-datajoin/src/main/java/org/apache/hadoop/contrib/utils/join/JobBase.java index 9ef21b3085e..7267fdecac4 100644 --- a/hadoop-tools/hadoop-datajoin/src/main/java/org/apache/hadoop/contrib/utils/join/JobBase.java +++ b/hadoop-tools/hadoop-datajoin/src/main/java/org/apache/hadoop/contrib/utils/join/JobBase.java @@ -23,8 +23,8 @@ import java.util.Map.Entry; import java.util.Iterator; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.Reducer; @@ -36,7 +36,7 @@ */ public abstract class JobBase implements Mapper, Reducer { - public static final Log LOG = LogFactory.getLog("datajoin.job"); + public static final Logger LOG = LoggerFactory.getLogger("datajoin.job"); private SortedMap longCounters = null; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java index 9a40a4933ab..e018b0b9573 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java @@ -27,8 +27,8 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.tools.util.DistCpUtils; import org.apache.hadoop.security.Credentials; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.lang.reflect.Constructor; @@ -48,7 +48,7 @@ public abstract class CopyListing extends Configured { private Credentials credentials; - static final Log LOG = LogFactory.getLog(DistCp.class); + static final Logger LOG = LoggerFactory.getLogger(DistCp.class); /** * Build listing function creates the input listing that distcp uses to * perform the copy. diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java index 909494b4bb3..4f79975ede6 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java @@ -22,8 +22,8 @@ import java.util.Random; import com.google.common.base.Preconditions; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -64,7 +64,7 @@ */ static final int SHUTDOWN_HOOK_PRIORITY = 30; - static final Log LOG = LogFactory.getLog(DistCp.class); + static final Logger LOG = LoggerFactory.getLogger(DistCp.class); @VisibleForTesting DistCpContext context; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java index 9db0eb549c2..aca5d0e414d 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java @@ -387,7 +387,10 @@ public void appendToConf(Configuration conf) { DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.TRACK_MISSING, String.valueOf(trackPath)); } - + if (numListstatusThreads > 0) { + DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.NUM_LISTSTATUS_THREADS, + Integer.toString(numListstatusThreads)); + } } /** diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java index 63c6f436e59..c97f5c52773 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -35,7 +35,7 @@ * listing-file by "globbing" all specified source paths (wild-cards and all.) */ public class GlobbedCopyListing extends CopyListing { - private static final Log LOG = LogFactory.getLog(GlobbedCopyListing.class); + private static final Logger LOG = LoggerFactory.getLogger(GlobbedCopyListing.class); private final CopyListing simpleListing; /** diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java index e49feb5f69f..83c6ff3e40a 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java @@ -29,8 +29,8 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import com.google.common.base.Preconditions; @@ -41,7 +41,7 @@ */ public class OptionsParser { - static final Log LOG = LogFactory.getLog(OptionsParser.class); + static final Logger LOG = LoggerFactory.getLogger(OptionsParser.class); private static final Options cliOptions = new Options(); diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java index f6c496e76c6..d91b28253d1 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; @@ -43,7 +43,7 @@ */ public class RegexCopyFilter extends CopyFilter { - private static final Log LOG = LogFactory.getLog(RegexCopyFilter.class); + private static final Logger LOG = LoggerFactory.getLogger(RegexCopyFilter.class); private File filtersFile; private List filters; @@ -77,7 +77,7 @@ public void initialize() { LOG.error("An error occurred while attempting to read from " + filtersFile); } finally { - IOUtils.cleanup(LOG, reader); + IOUtils.cleanupWithLogger(LOG, reader); } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java index 8111b047571..a908e1223ae 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java @@ -20,8 +20,8 @@ import com.google.common.collect.Lists; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileStatus; @@ -60,7 +60,7 @@ * Note: The SimpleCopyListing doesn't handle wild-cards in the input-paths. */ public class SimpleCopyListing extends CopyListing { - private static final Log LOG = LogFactory.getLog(SimpleCopyListing.class); + private static final Logger LOG = LoggerFactory.getLogger(SimpleCopyListing.class); public static final int DEFAULT_FILE_STATUS_SIZE = 1000; public static final boolean DEFAULT_RANDOMIZE_FILE_LISTING = true; @@ -309,7 +309,7 @@ protected void doBuildListingWithSnapshotDiff( fileListWriter.close(); fileListWriter = null; } finally { - IOUtils.cleanup(LOG, fileListWriter); + IOUtils.cleanupWithLogger(LOG, fileListWriter); } } @@ -402,7 +402,7 @@ protected void doBuildListing(SequenceFile.Writer fileListWriter, LOG.info("Build file listing completed."); fileListWriter = null; } finally { - IOUtils.cleanup(LOG, fileListWriter); + IOUtils.cleanupWithLogger(LOG, fileListWriter); } } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java index 07eacb0483e..38106fae9b9 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java @@ -392,6 +392,9 @@ private void deleteMissing(Configuration conf) throws IOException { Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); Path sortedSourceListing = DistCpUtils.sortListing(conf, sourceListing); + long sourceListingCompleted = System.currentTimeMillis(); + LOG.info("Source listing completed in {}", + formatDuration(sourceListingCompleted - listingStart)); // Similarly, create the listing of target-files. Sort alphabetically. Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); @@ -409,8 +412,8 @@ private void deleteMissing(Configuration conf) throws IOException { // Walk both source and target file listings. // Delete all from target that doesn't also exist on source. long deletionStart = System.currentTimeMillis(); - LOG.info("Listing completed in {}", - formatDuration(deletionStart - listingStart)); + LOG.info("Destination listing completed in {}", + formatDuration(deletionStart - sourceListingCompleted)); long deletedEntries = 0; long filesDeleted = 0; @@ -545,9 +548,15 @@ private Path listTargetFiles(final Configuration conf, // Set up options to be the same from the CopyListing.buildListing's // perspective, so to collect similar listings as when doing the copy // + // thread count is picked up from the job + int threads = conf.getInt(DistCpConstants.CONF_LABEL_LISTSTATUS_THREADS, + DistCpConstants.DEFAULT_LISTSTATUS_THREADS); + LOG.info("Scanning destination directory {} with thread count: {}", + targetFinalPath, threads); DistCpOptions options = new DistCpOptions.Builder(targets, resultNonePath) .withOverwrite(overwrite) .withSyncFolder(syncFolder) + .withNumListstatusThreads(threads) .build(); DistCpContext distCpContext = new DistCpContext(options); distCpContext.setTargetPathExists(targetPathExists); diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java index c486bdbc231..63a61b861f9 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java @@ -23,8 +23,8 @@ import java.util.EnumSet; import org.apache.commons.lang3.exception.ExceptionUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileStatus; @@ -74,7 +74,7 @@ OVERWRITE, // Overwrite the whole file } - private static Log LOG = LogFactory.getLog(CopyMapper.class); + private static Logger LOG = LoggerFactory.getLogger(CopyMapper.class); private Configuration conf; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java index 55f90d032c8..51579bc437c 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java @@ -23,8 +23,8 @@ import java.io.OutputStream; import java.util.EnumSet; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; @@ -53,7 +53,7 @@ */ public class RetriableFileCopyCommand extends RetriableCommand { - private static Log LOG = LogFactory.getLog(RetriableFileCopyCommand.class); + private static Logger LOG = LoggerFactory.getLogger(RetriableFileCopyCommand.class); private boolean skipCrc = false; private FileAction action; @@ -297,7 +297,7 @@ long copyBytes(CopyListingFileStatus source2, long sourceOffset, outStream.close(); outStream = null; } finally { - IOUtils.cleanup(LOG, outStream, inStream); + IOUtils.cleanupWithLogger(LOG, outStream, inStream); } return totalBytesRead; } diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java index d1c18ea8d16..bee3c7b3399 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools.mapred; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.IOUtils; @@ -50,8 +50,8 @@ */ public class UniformSizeInputFormat extends InputFormat { - private static final Log LOG - = LogFactory.getLog(UniformSizeInputFormat.class); + private static final Logger LOG + = LoggerFactory.getLogger(UniformSizeInputFormat.class); /** * Implementation of InputFormat::getSplits(). Returns a list of InputSplits, diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java index 23358547f5f..638c2b2c696 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.tools.mapred.lib; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; @@ -42,7 +42,7 @@ * consumed. */ class DynamicInputChunk { - private static Log LOG = LogFactory.getLog(DynamicInputChunk.class); + private static Logger LOG = LoggerFactory.getLogger(DynamicInputChunk.class); private Path chunkFilePath; private SequenceFileRecordReader reader; private SequenceFile.Writer writer; @@ -78,7 +78,7 @@ public void write(Text key, CopyListingFileStatus value) throws IOException { * Closes streams opened to the chunk-file. */ public void close() { - IOUtils.cleanup(LOG, reader, writer); + IOUtils.cleanupWithLogger(LOG, reader, writer); } /** diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunkContext.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunkContext.java index 043ff1ca354..82208bdfd7a 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunkContext.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunkContext.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools.mapred.lib; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.Log; +import org.slf4j.LoggerFactory; +import org.slf4j.Logger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; @@ -34,7 +34,7 @@ */ class DynamicInputChunkContext { - private static Log LOG = LogFactory.getLog(DynamicInputChunkContext.class); + private static Logger LOG = LoggerFactory.getLogger(DynamicInputChunkContext.class); private Configuration configuration; private Path chunkRootPath = null; private String chunkFilePrefix; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java index fe8604a745e..a197b47f0a9 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools.mapred.lib; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.tools.DistCpConstants; @@ -49,7 +49,7 @@ * performance characteristics. */ public class DynamicInputFormat extends InputFormat { - private static final Log LOG = LogFactory.getLog(DynamicInputFormat.class); + private static final Logger LOG = LoggerFactory.getLogger(DynamicInputFormat.class); private static final String CONF_LABEL_LISTING_SPLIT_RATIO = "mapred.listing.split.ratio"; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicRecordReader.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicRecordReader.java index 87b8f089b06..33b678aa45e 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicRecordReader.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicRecordReader.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools.mapred.lib; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.tools.util.DistCpUtils; import org.apache.hadoop.tools.DistCpConstants; import org.apache.hadoop.mapreduce.*; @@ -37,7 +37,7 @@ * transparently. */ public class DynamicRecordReader extends RecordReader { - private static final Log LOG = LogFactory.getLog(DynamicRecordReader.class); + private static final Logger LOG = LoggerFactory.getLogger(DynamicRecordReader.class); private TaskAttemptContext taskAttemptContext; private Configuration configuration; private DynamicInputChunk chunk; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java index 2a60e801124..2da525134b7 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java @@ -20,8 +20,8 @@ import com.google.common.collect.Maps; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileChecksum; @@ -56,7 +56,7 @@ */ public class DistCpUtils { - private static final Log LOG = LogFactory.getLog(DistCpUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(DistCpUtils.class); /** * Retrieves size of the file at the specified path. diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java index 906e1ea4dcb..6c348515946 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools.util; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -34,7 +34,7 @@ * WorkReport{@literal } to the outputQueue. */ public class ProducerConsumer { - private Log LOG = LogFactory.getLog(ProducerConsumer.class); + private Logger LOG = LoggerFactory.getLogger(ProducerConsumer.class); private LinkedBlockingQueue> inputQueue; private LinkedBlockingQueue> outputQueue; private ExecutorService executor; diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java index c27b2e1758e..17a80adaeb8 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java @@ -19,8 +19,8 @@ package org.apache.hadoop.tools.util; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryPolicy.RetryAction; import org.apache.hadoop.io.retry.RetryPolicies; @@ -35,7 +35,7 @@ */ public abstract class RetriableCommand { - private static Log LOG = LogFactory.getLog(RetriableCommand.class); + private static Logger LOG = LoggerFactory.getLogger(RetriableCommand.class); private static final long DELAY_MILLISECONDS = 500; private static final int MAX_RETRIES = 3; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java index 97a6f62444b..ed7650f9659 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java @@ -20,8 +20,8 @@ import static org.mockito.Mockito.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.conf.Configuration; @@ -51,7 +51,7 @@ @RunWith(value = Parameterized.class) public class TestCopyListing extends SimpleCopyListing { - private static final Log LOG = LogFactory.getLog(TestCopyListing.class); + private static final Logger LOG = LoggerFactory.getLogger(TestCopyListing.class); private static final Credentials CREDENTIALS = new Credentials(); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java index b2266b3344d..14cce42e0f8 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSystem.java @@ -31,8 +31,8 @@ import java.util.List; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; @@ -57,8 +57,8 @@ */ public class TestDistCpSystem { - private static final Log LOG = - LogFactory.getLog(TestDistCpSystem.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestDistCpSystem.class); @Rule public Timeout globalTimeout = new Timeout(30000); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpViewFs.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpViewFs.java index d6d05421a57..401cf5d942d 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpViewFs.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpViewFs.java @@ -18,9 +18,9 @@ package org.apache.hadoop.tools; -import org.apache.commons.logging.Log; +import org.slf4j.Logger; import org.apache.hadoop.fs.viewfs.*; -import org.apache.commons.logging.LogFactory; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -37,7 +37,7 @@ import java.net.URISyntaxException; public class TestDistCpViewFs { - private static final Log LOG = LogFactory.getLog(TestDistCpViewFs.class); + private static final Logger LOG = LoggerFactory.getLogger(TestDistCpViewFs.class); private static FileSystem fs; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestExternalCall.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestExternalCall.java index 8d6e28ba649..06122e6428d 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestExternalCall.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestExternalCall.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -37,7 +37,7 @@ public class TestExternalCall { - private static final Log LOG = LogFactory.getLog(TestExternalCall.class); + private static final Logger LOG = LoggerFactory.getLogger(TestExternalCall.class); private static FileSystem fs; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestFileBasedCopyListing.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestFileBasedCopyListing.java index 203de1a2adf..0ec58f2a7a8 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestFileBasedCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestFileBasedCopyListing.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -40,7 +40,7 @@ import java.util.Map; public class TestFileBasedCopyListing { - private static final Log LOG = LogFactory.getLog(TestFileBasedCopyListing.class); + private static final Logger LOG = LoggerFactory.getLogger(TestFileBasedCopyListing.class); private static final Credentials CREDENTIALS = new Credentials(); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java index 7574dedea2e..73cdf24789a 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -43,7 +43,7 @@ @RunWith(value = Parameterized.class) public class TestIntegration { - private static final Log LOG = LogFactory.getLog(TestIntegration.class); + private static final Logger LOG = LoggerFactory.getLogger(TestIntegration.class); private static FileSystem fs; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java index a5e0a033576..0757a66223e 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java @@ -249,7 +249,7 @@ void assertCounterInRange(Job job, Enum counter, long min, long max) Counter c = job.getCounters().findCounter(counter); long value = c.getValue(); String description = - String.format("%s value %s", c.getDisplayName(), value); + String.format("%s value %s", c.getDisplayName(), value, false); if (min >= 0) { assertTrue(description + " too below minimum " + min, @@ -523,7 +523,7 @@ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB, DEFAULT_DISTCP_SIZE_KB); int fileSizeMb = fileSizeKb / 1024; - getLog().info("{} with file size {}", testName.getMethodName(), fileSizeMb); + getLogger().info("{} with file size {}", testName.getMethodName(), fileSizeMb); byte[] data1 = dataset((fileSizeMb + 1) * MB, 33, 43); createFile(srcFS, largeFile1, true, data1); byte[] data2 = dataset((fileSizeMb + 2) * MB, 43, 53); @@ -572,7 +572,7 @@ private Job runDistCp(final DistCpOptions options) throws Exception { private DistCpOptions buildWithStandardOptions( DistCpOptions.Builder builder) { return builder - .withNumListstatusThreads(8) + .withNumListstatusThreads(DistCpOptions.MAX_NUM_LISTSTATUS_THREADS) .build(); } diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java index e00241bafbf..8d9f8676c2c 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools.mapred; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -47,7 +47,7 @@ import static org.apache.hadoop.tools.util.TestDistCpUtils.*; public class TestCopyCommitter { - private static final Log LOG = LogFactory.getLog(TestCopyCommitter.class); + private static final Logger LOG = LoggerFactory.getLogger(TestCopyCommitter.class); private static final Random rand = new Random(); diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java index 30e46836842..b4a267db6c3 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java @@ -27,8 +27,8 @@ import java.util.List; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; @@ -62,7 +62,7 @@ import static org.apache.hadoop.test.MetricsAsserts.getMetrics; public class TestCopyMapper { - private static final Log LOG = LogFactory.getLog(TestCopyMapper.class); + private static final Logger LOG = LoggerFactory.getLogger(TestCopyMapper.class); private static List pathList = new ArrayList(); private static int nFiles = 0; private static final int DEFAULT_FILE_SIZE = 1024; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyOutputFormat.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyOutputFormat.java index 69cec31fcc9..4fccfe6e417 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyOutputFormat.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyOutputFormat.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools.mapred; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.apache.hadoop.mapreduce.task.JobContextImpl; @@ -32,7 +32,7 @@ import java.io.IOException; public class TestCopyOutputFormat { - private static final Log LOG = LogFactory.getLog(TestCopyOutputFormat.class); + private static final Logger LOG = LoggerFactory.getLogger(TestCopyOutputFormat.class); @Test public void testSetCommitDirectory() { diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java index 87290caa8a9..61a17459752 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java @@ -21,8 +21,8 @@ import org.apache.hadoop.tools.DistCpConstants; import org.apache.hadoop.tools.DistCpContext; import org.junit.Assert; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -46,7 +46,7 @@ import java.util.List; public class TestDynamicInputFormat { - private static final Log LOG = LogFactory.getLog(TestDynamicInputFormat.class); + private static final Logger LOG = LoggerFactory.getLogger(TestDynamicInputFormat.class); private static MiniDFSCluster cluster; private static final int N_FILES = 1000; private static final int NUM_SPLITS = 7; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java index 311c1b3a728..54804eb82b4 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools.util; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -50,7 +50,7 @@ import static org.junit.Assert.assertTrue; public class TestDistCpUtils { - private static final Log LOG = LogFactory.getLog(TestDistCpUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(TestDistCpUtils.class); private static final Configuration config = new Configuration(); private static MiniDFSCluster cluster; diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestThrottledInputStream.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestThrottledInputStream.java index 0ac89980c58..6a572177192 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestThrottledInputStream.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestThrottledInputStream.java @@ -18,8 +18,8 @@ package org.apache.hadoop.tools.util; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.io.IOUtils; import org.junit.Assert; import org.junit.Test; @@ -27,7 +27,7 @@ import java.io.*; public class TestThrottledInputStream { - private static final Log LOG = LogFactory.getLog(TestThrottledInputStream.class); + private static final Logger LOG = LoggerFactory.getLogger(TestThrottledInputStream.class); private static final int BUFF_SIZE = 1024; private enum CB {ONE_C, BUFFER, BUFF_OFFSET} @@ -89,7 +89,7 @@ private long copyAndAssert(File tmpFile, File outFile, copyByteByByte(in, out); } - LOG.info(in); + LOG.info("{}", in); bandwidth = in.getBytesPerSec(); Assert.assertEquals(in.getTotalBytesRead(), tmpFile.length()); Assert.assertTrue(in.getBytesPerSec() > maxBandwidth / (factor * 1.2)); diff --git a/hadoop-tools/hadoop-extras/src/main/java/org/apache/hadoop/tools/DistTool.java b/hadoop-tools/hadoop-extras/src/main/java/org/apache/hadoop/tools/DistTool.java index cdd7caceaa5..fb56b90186c 100644 --- a/hadoop-tools/hadoop-extras/src/main/java/org/apache/hadoop/tools/DistTool.java +++ b/hadoop-tools/hadoop-extras/src/main/java/org/apache/hadoop/tools/DistTool.java @@ -27,8 +27,8 @@ import java.util.List; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -40,7 +40,7 @@ * An abstract class for distributed tool for file related operations. */ abstract class DistTool implements org.apache.hadoop.util.Tool { - protected static final Log LOG = LogFactory.getLog(DistTool.class); + protected static final Logger LOG = LoggerFactory.getLogger(DistTool.class); protected JobConf jobconf; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ClusterSummarizer.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ClusterSummarizer.java index cf6da253116..4aa9d715cb6 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ClusterSummarizer.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ClusterSummarizer.java @@ -18,8 +18,8 @@ package org.apache.hadoop.mapred.gridmix; import org.apache.commons.lang3.time.FastDateFormat; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; @@ -40,7 +40,7 @@ * addresses are also recorded in the summary. */ class ClusterSummarizer implements StatListener { - static final Log LOG = LogFactory.getLog(ClusterSummarizer.class); + static final Logger LOG = LoggerFactory.getLogger(ClusterSummarizer.class); private int numBlacklistedTrackers; private int numActiveTrackers; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/CompressionEmulationUtil.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/CompressionEmulationUtil.java index bd7e878497b..71db9bfb25c 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/CompressionEmulationUtil.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/CompressionEmulationUtil.java @@ -25,8 +25,8 @@ import java.util.HashMap; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -58,7 +58,7 @@ * This is a utility class for all the compression related modules. */ class CompressionEmulationUtil { - static final Log LOG = LogFactory.getLog(CompressionEmulationUtil.class); + static final Logger LOG = LoggerFactory.getLogger(CompressionEmulationUtil.class); /** * Enable compression usage in GridMix runs. diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java index 72027c1eab5..56f67e5a73c 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java @@ -18,8 +18,8 @@ package org.apache.hadoop.mapred.gridmix; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -83,8 +83,8 @@ @InterfaceAudience.Private @InterfaceStability.Evolving class DistributedCacheEmulator { - private static final Log LOG = - LogFactory.getLog(DistributedCacheEmulator.class); + private static final Logger LOG = + LoggerFactory.getLogger(DistributedCacheEmulator.class); static final long AVG_BYTES_PER_MAP = 128 * 1024 * 1024L;// 128MB diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/EchoUserResolver.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/EchoUserResolver.java index 2fcb39dcc05..43f54bc96e9 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/EchoUserResolver.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/EchoUserResolver.java @@ -22,14 +22,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Echos the UGI offered. */ public class EchoUserResolver implements UserResolver { - public static final Log LOG = LogFactory.getLog(Gridmix.class); + public static final Logger LOG = LoggerFactory.getLogger(Gridmix.class); public EchoUserResolver() { LOG.info(" Current user resolver is EchoUserResolver "); diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java index 973838acb97..9ecd9e8e5da 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java @@ -20,8 +20,8 @@ import java.io.IOException; import org.apache.commons.lang3.time.FastDateFormat; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -47,7 +47,7 @@ * */ class ExecutionSummarizer implements StatListener { - static final Log LOG = LogFactory.getLog(ExecutionSummarizer.class); + static final Logger LOG = LoggerFactory.getLogger(ExecutionSummarizer.class); private static final FastDateFormat UTIL = FastDateFormat.getInstance(); private int numJobsInInputTrace; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/FilePool.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/FilePool.java index ba83bd9dbbf..9a0cca380bc 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/FilePool.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/FilePool.java @@ -37,8 +37,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapred.gridmix.RandomAlgorithms.Selector; /** @@ -47,7 +47,7 @@ */ class FilePool { - public static final Log LOG = LogFactory.getLog(FilePool.class); + public static final Logger LOG = LoggerFactory.getLogger(FilePool.class); /** * The minimum file size added to the pool. Default 128MiB. diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java index 07d8878b88e..325c15c9971 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java @@ -49,8 +49,8 @@ import org.apache.hadoop.tools.rumen.JobStory; import static org.apache.hadoop.tools.rumen.datatypes.util.MapReduceJobPropertiesParser.extractMaxHeapOpts; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Synthetic job generated from a trace description. @@ -59,7 +59,7 @@ // Gridmix job name format is GRIDMIX<6 digit sequence number> public static final String JOB_NAME_PREFIX = "GRIDMIX"; - public static final Log LOG = LogFactory.getLog(GridmixJob.class); + public static final Logger LOG = LoggerFactory.getLogger(GridmixJob.class); private static final ThreadLocal nameFormat = new ThreadLocal() { diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/InputStriper.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/InputStriper.java index 6cdcc4e4b10..4867fa4b8c1 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/InputStriper.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/InputStriper.java @@ -33,15 +33,15 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Given a {@link #FilePool}, obtain a set of files capable of satisfying * a full set of splits, then iterate over each source to fill the request. */ class InputStriper { - public static final Log LOG = LogFactory.getLog(InputStriper.class); + public static final Logger LOG = LoggerFactory.getLogger(InputStriper.class); int idx; long currentStart; FileStatus current; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobFactory.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobFactory.java index 73662bf8aa7..4536e513361 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobFactory.java @@ -18,8 +18,8 @@ package org.apache.hadoop.mapred.gridmix; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; @@ -52,7 +52,7 @@ */ abstract class JobFactory implements Gridmix.Component,StatListener { - public static final Log LOG = LogFactory.getLog(JobFactory.class); + public static final Logger LOG = LoggerFactory.getLogger(JobFactory.class); protected final Path scratch; protected final float rateFactor; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobMonitor.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobMonitor.java index 341c49a9c99..0b06911be08 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobMonitor.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobMonitor.java @@ -29,8 +29,8 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapred.gridmix.Statistics.JobStats; import org.apache.hadoop.mapreduce.Job; @@ -54,7 +54,7 @@ */ class JobMonitor implements Gridmix.Component { - public static final Log LOG = LogFactory.getLog(JobMonitor.class); + public static final Logger LOG = LoggerFactory.getLogger(JobMonitor.class); private final Queue mJobs; private ExecutorService executor; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobSubmitter.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobSubmitter.java index 868ba234811..ac41256da9e 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobSubmitter.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobSubmitter.java @@ -26,8 +26,8 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapred.gridmix.Statistics.JobStats; /** @@ -39,7 +39,7 @@ */ class JobSubmitter implements Gridmix.Component { - public static final Log LOG = LogFactory.getLog(JobSubmitter.class); + public static final Logger LOG = LoggerFactory.getLogger(JobSubmitter.class); private final Semaphore sem; private final Statistics statistics; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java index e50b230c487..d1229ce2d8f 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.mapred.gridmix; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; @@ -54,7 +54,7 @@ */ class LoadJob extends GridmixJob { - public static final Log LOG = LogFactory.getLog(LoadJob.class); + public static final Logger LOG = LoggerFactory.getLogger(LoadJob.class); public LoadJob(final Configuration conf, long submissionMillis, final JobStory jobdesc, Path outRoot, UserGroupInformation ugi, diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/RandomTextDataGenerator.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/RandomTextDataGenerator.java index 494b9a11c9b..d5b7ad8c4b9 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/RandomTextDataGenerator.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/RandomTextDataGenerator.java @@ -22,15 +22,15 @@ import java.util.Random; import org.apache.commons.lang3.RandomStringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; /** * A random text generator. The words are simply sequences of alphabets. */ class RandomTextDataGenerator { - static final Log LOG = LogFactory.getLog(RandomTextDataGenerator.class); + static final Logger LOG = LoggerFactory.getLogger(RandomTextDataGenerator.class); /** * Configuration key for random text data generator's list size. diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReplayJobFactory.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReplayJobFactory.java index d1b14817454..bdbfc3beec7 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReplayJobFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/ReplayJobFactory.java @@ -23,15 +23,15 @@ import org.apache.hadoop.tools.rumen.JobStory; import org.apache.hadoop.tools.rumen.JobStoryProducer; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; class ReplayJobFactory extends JobFactory { - public static final Log LOG = LogFactory.getLog(ReplayJobFactory.class); + public static final Logger LOG = LoggerFactory.getLogger(ReplayJobFactory.class); /** * Creating a new instance does not start the thread. diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/RoundRobinUserResolver.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/RoundRobinUserResolver.java index c06b0a24a25..28379ae6639 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/RoundRobinUserResolver.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/RoundRobinUserResolver.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.mapred.gridmix; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -34,7 +34,7 @@ import java.util.List; public class RoundRobinUserResolver implements UserResolver { - public static final Log LOG = LogFactory.getLog(RoundRobinUserResolver.class); + public static final Logger LOG = LoggerFactory.getLogger(RoundRobinUserResolver.class); private int uidx = 0; private List users = Collections.emptyList(); diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java index 471d140149e..e8c7d61a64d 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java @@ -24,8 +24,8 @@ import org.apache.hadoop.tools.rumen.JobStoryProducer; import org.apache.hadoop.mapred.gridmix.Statistics.JobStats; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.concurrent.CountDownLatch; @@ -33,7 +33,7 @@ public class SerialJobFactory extends JobFactory { - public static final Log LOG = LogFactory.getLog(SerialJobFactory.class); + public static final Logger LOG = LoggerFactory.getLogger(SerialJobFactory.class); private final Condition jobCompleted = lock.newCondition(); /** diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SleepJob.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SleepJob.java index 87a67f19a86..50261139f94 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SleepJob.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SleepJob.java @@ -50,12 +50,12 @@ import org.apache.hadoop.tools.rumen.ReduceTaskAttemptInfo; import org.apache.hadoop.tools.rumen.TaskAttemptInfo; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.util.Time; public class SleepJob extends GridmixJob { - public static final Log LOG = LogFactory.getLog(SleepJob.class); + public static final Logger LOG = LoggerFactory.getLogger(SleepJob.class); private static final ThreadLocal rand = new ThreadLocal () { @Override protected Random initialValue() { diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Statistics.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Statistics.java index 915788bac17..bf73f2a1faa 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Statistics.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Statistics.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.mapred.gridmix; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; @@ -50,7 +50,7 @@ * notified either on every job completion event or some fixed time interval. */ public class Statistics implements Component { - public static final Log LOG = LogFactory.getLog(Statistics.class); + public static final Logger LOG = LoggerFactory.getLogger(Statistics.class); private final StatCollector statistics = new StatCollector(); private JobClient cluster; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java index 4dcc1a2561f..bd15c2999e5 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.mapred.gridmix; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; @@ -38,7 +38,7 @@ import java.util.concurrent.atomic.AtomicBoolean; public class StressJobFactory extends JobFactory { - public static final Log LOG = LogFactory.getLog(StressJobFactory.class); + public static final Logger LOG = LoggerFactory.getLogger(StressJobFactory.class); private final LoadStatus loadStatus = new LoadStatus(); /** diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SubmitterUserResolver.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SubmitterUserResolver.java index d0d552ad243..32ddad9913a 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SubmitterUserResolver.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SubmitterUserResolver.java @@ -21,14 +21,14 @@ import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Resolves all UGIs to the submitting user. */ public class SubmitterUserResolver implements UserResolver { - public static final Log LOG = LogFactory.getLog(SubmitterUserResolver.class); + public static final Logger LOG = LoggerFactory.getLogger(SubmitterUserResolver.class); private UserGroupInformation ugi = null; diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/CommonJobTest.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/CommonJobTest.java index 6a45ba7fb6b..73c03fd948a 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/CommonJobTest.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/CommonJobTest.java @@ -33,8 +33,8 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; @@ -57,7 +57,7 @@ import org.apache.hadoop.util.ToolRunner; public class CommonJobTest { - public static final Log LOG = LogFactory.getLog(Gridmix.class); + public static final Logger LOG = LoggerFactory.getLogger(Gridmix.class); protected static int NJOBS = 2; protected static final long GENDATA = 1; // in megabytes diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java index e9d23a3d233..8c109fff254 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java @@ -40,12 +40,12 @@ import java.util.concurrent.TimeUnit; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class DebugJobProducer implements JobStoryProducer { - public static final Log LOG = LogFactory.getLog(DebugJobProducer.class); + public static final Logger LOG = LoggerFactory.getLogger(DebugJobProducer.class); final ArrayList submitted; private final Configuration conf; private final AtomicInteger numJobs; diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/GridmixTestUtils.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/GridmixTestUtils.java index 3da5d9d934e..b3dbd545710 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/GridmixTestUtils.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/GridmixTestUtils.java @@ -16,8 +16,8 @@ */ package org.apache.hadoop.mapred.gridmix; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.permission.FsPermission; @@ -35,7 +35,7 @@ * This is a test class. */ public class GridmixTestUtils { - private static final Log LOG = LogFactory.getLog(GridmixTestUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(GridmixTestUtils.class); static final Path DEST = new Path("/gridmix"); static FileSystem dfs = null; static MiniDFSCluster dfsCluster = null; diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestFilePool.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestFilePool.java index a75414accc0..f4a4ee60647 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestFilePool.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestFilePool.java @@ -28,8 +28,8 @@ import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; @@ -40,7 +40,7 @@ public class TestFilePool { - static final Log LOG = LogFactory.getLog(TestFileQueue.class); + static final Logger LOG = LoggerFactory.getLogger(TestFileQueue.class); static final int NFILES = 26; static final Path base = getBaseDir(); diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestFileQueue.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestFileQueue.java index e68e83f6c6b..41925655d13 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestFileQueue.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestFileQueue.java @@ -26,8 +26,8 @@ import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -36,7 +36,7 @@ public class TestFileQueue { - static final Log LOG = LogFactory.getLog(TestFileQueue.class); + static final Logger LOG = LoggerFactory.getLogger(TestFileQueue.class); static final int NFILES = 4; static final int BLOCK = 256; static final Path[] paths = new Path[NFILES]; diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridMixClasses.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridMixClasses.java index 397494cf7fc..362b35e5de6 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridMixClasses.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridMixClasses.java @@ -30,8 +30,8 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.CustomOutputCommitter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -85,7 +85,7 @@ import static org.junit.Assert.*; public class TestGridMixClasses { - private static final Log LOG = LogFactory.getLog(TestGridMixClasses.class); + private static final Logger LOG = LoggerFactory.getLogger(TestGridMixClasses.class); /* * simple test LoadSplit (getters,copy, write, read...) diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixRecord.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixRecord.java index b3b72778f77..ddd98df94cc 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixRecord.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixRecord.java @@ -23,8 +23,8 @@ import org.junit.Test; import static org.junit.Assert.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; @@ -32,7 +32,7 @@ import org.apache.hadoop.io.WritableUtils; public class TestGridmixRecord { - private static final Log LOG = LogFactory.getLog(TestGridmixRecord.class); + private static final Logger LOG = LoggerFactory.getLogger(TestGridmixRecord.class); static void lengthTest(GridmixRecord x, GridmixRecord y, int min, int max) throws Exception { diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestRecordFactory.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestRecordFactory.java index 2ab244472c4..5050e133c07 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestRecordFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestRecordFactory.java @@ -22,14 +22,14 @@ import org.junit.Test; import static org.junit.Assert.*; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataOutputBuffer; public class TestRecordFactory { - private static final Log LOG = LogFactory.getLog(TestRecordFactory.class); + private static final Logger LOG = LoggerFactory.getLogger(TestRecordFactory.class); public static void testFactory(long targetBytes, long targetRecs) throws Exception { diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/DeskewedJobTraceReader.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/DeskewedJobTraceReader.java index e651ae8e05e..369c54b6ca4 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/DeskewedJobTraceReader.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/DeskewedJobTraceReader.java @@ -27,8 +27,8 @@ import java.util.TreeMap; import java.util.TreeSet; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class DeskewedJobTraceReader implements Closeable { // underlying engine @@ -57,8 +57,8 @@ private final PriorityQueue skewBuffer; - static final private Log LOG = - LogFactory.getLog(DeskewedJobTraceReader.class); + static final private Logger LOG = + LoggerFactory.getLogger(DeskewedJobTraceReader.class); static private class JobComparator implements Comparator, Serializable { diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Folder.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Folder.java index cf6643cb63e..424405aa7e6 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Folder.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Folder.java @@ -30,8 +30,8 @@ import java.util.Random; import java.util.Set; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; @@ -52,7 +52,7 @@ private int skewBufferLength = 0; private long startsAfter = -1; - static final private Log LOG = LogFactory.getLog(Folder.class); + static final private Logger LOG = LoggerFactory.getLogger(Folder.class); private DeskewedJobTraceReader reader = null; private Outputter outGen = null; diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java index eceb98d2cc2..381199a0b6a 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java @@ -36,8 +36,8 @@ import java.util.regex.Pattern; import com.fasterxml.jackson.core.JsonProcessingException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; @@ -139,7 +139,7 @@ private Histogram successfulNthReducerAttempts; private Histogram mapperLocality; - static final private Log LOG = LogFactory.getLog(HadoopLogsAnalyzer.class); + static final private Logger LOG = LoggerFactory.getLogger(HadoopLogsAnalyzer.class); private int[] attemptTimesPercentiles; diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/HistoryEventEmitter.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/HistoryEventEmitter.java index 2103709f54e..ab4ea71f9ba 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/HistoryEventEmitter.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/HistoryEventEmitter.java @@ -22,13 +22,13 @@ import java.util.List; import java.util.Queue; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.jobhistory.HistoryEvent; abstract class HistoryEventEmitter { - static final private Log LOG = LogFactory.getLog(HistoryEventEmitter.class); + static final private Logger LOG = LoggerFactory.getLogger(HistoryEventEmitter.class); abstract List nonFinalSEEs(); diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java index 890f388d4a3..1213e6a46f4 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java @@ -26,8 +26,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapred.TaskStatus; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskType; @@ -69,7 +69,7 @@ private static final long BYTES_IN_MEG = StringUtils.TraditionalBinaryPrefix.string2long("1m"); - static final private Log LOG = LogFactory.getLog(JobBuilder.class); + static final private Logger LOG = LoggerFactory.getLogger(JobBuilder.class); private String jobID; diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedJob.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedJob.java index dcd854968ac..0d19ee4eb3c 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedJob.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedJob.java @@ -25,8 +25,8 @@ import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapreduce.JobACL; import org.apache.hadoop.security.authorize.AccessControlList; @@ -37,7 +37,7 @@ */ public class ParsedJob extends LoggedJob { - private static final Log LOG = LogFactory.getLog(ParsedJob.class); + private static final Logger LOG = LoggerFactory.getLogger(ParsedJob.class); private Map totalCountersMap = new HashMap(); private Map mapCountersMap = new HashMap(); diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedTask.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedTask.java index 11cf12d042f..a05af643fd8 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedTask.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedTask.java @@ -22,8 +22,8 @@ import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapreduce.jobhistory.JhCounters; /** @@ -33,7 +33,7 @@ */ public class ParsedTask extends LoggedTask { - private static final Log LOG = LogFactory.getLog(ParsedTask.class); + private static final Logger LOG = LoggerFactory.getLogger(ParsedTask.class); private String diagnosticInfo; private String failedDueToAttempt; diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedTaskAttempt.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedTaskAttempt.java index 6374368b720..f0e7bdd6d00 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedTaskAttempt.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedTaskAttempt.java @@ -21,8 +21,8 @@ import java.util.HashMap; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.mapreduce.jobhistory.JhCounters; /** @@ -32,7 +32,7 @@ */ public class ParsedTaskAttempt extends LoggedTaskAttempt { - private static final Log LOG = LogFactory.getLog(ParsedTaskAttempt.class); + private static final Logger LOG = LoggerFactory.getLogger(ParsedTaskAttempt.class); private String diagnosticInfo; private String trackerName; diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/RandomSeedGenerator.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/RandomSeedGenerator.java index 014fb6c33d2..ecd5f0bbfc1 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/RandomSeedGenerator.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/RandomSeedGenerator.java @@ -21,8 +21,8 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * The purpose of this class is to generate new random seeds from a master @@ -42,7 +42,7 @@ * http://www.iro.umontreal.ca/~lecuyer/myftp/streams00/ */ public class RandomSeedGenerator { - private static Log LOG = LogFactory.getLog(RandomSeedGenerator.class); + private static Logger LOG = LoggerFactory.getLogger(RandomSeedGenerator.class); private static final Charset UTF_8 = Charset.forName("UTF-8"); /** MD5 algorithm instance, one for each thread. */ diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/TraceBuilder.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/TraceBuilder.java index 2fb52931232..b402a16cee0 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/TraceBuilder.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/TraceBuilder.java @@ -30,8 +30,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileStatus; @@ -49,7 +49,7 @@ * The main driver of the Rumen Parser. */ public class TraceBuilder extends Configured implements Tool { - static final private Log LOG = LogFactory.getLog(TraceBuilder.class); + static final private Logger LOG = LoggerFactory.getLogger(TraceBuilder.class); static final int RUN_METHOD_FAILED_EXIT_CODE = 3; @@ -310,6 +310,6 @@ void processJobHistory(JobHistoryParser parser, JobBuilder jobBuilder) } void finish() { - IOUtils.cleanup(LOG, traceWriter, topologyWriter); + IOUtils.cleanupWithLogger(LOG, traceWriter, topologyWriter); } } diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ZombieJob.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ZombieJob.java index 64008403d7e..36625aa0c77 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ZombieJob.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ZombieJob.java @@ -23,8 +23,8 @@ import java.util.Random; import java.util.HashMap; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TaskStatus.State; @@ -49,7 +49,7 @@ */ @SuppressWarnings("deprecation") public class ZombieJob implements JobStory { - static final Log LOG = LogFactory.getLog(ZombieJob.class); + static final Logger LOG = LoggerFactory.getLogger(ZombieJob.class); private final LoggedJob job; private Map loggedTaskMap; private Map loggedTaskAttemptMap; diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java index e85973258a0..1e83e405597 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java @@ -60,6 +60,7 @@ import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.ReservationId; import org.apache.hadoop.yarn.api.records.Resource; @@ -298,30 +299,20 @@ private void startNM() throws YarnException, IOException, SLSConfiguration.NM_RESOURCE_UTILIZATION_RATIO, SLSConfiguration.NM_RESOURCE_UTILIZATION_RATIO_DEFAULT); // nm information (fetch from topology file, or from sls/rumen json file) - Map nodeResourceMap = new HashMap<>(); - Set nodeSet; + Set nodeSet = null; if (nodeFile.isEmpty()) { for (String inputTrace : inputTraces) { switch (inputType) { case SLS: nodeSet = SLSUtils.parseNodesFromSLSTrace(inputTrace); - for (String node : nodeSet) { - nodeResourceMap.put(node, null); - } break; case RUMEN: nodeSet = SLSUtils.parseNodesFromRumenTrace(inputTrace); - for (String node : nodeSet) { - nodeResourceMap.put(node, null); - } break; case SYNTH: stjp = new SynthTraceJobProducer(getConf(), new Path(inputTraces[0])); nodeSet = SLSUtils.generateNodes(stjp.getNumNodes(), stjp.getNumNodes()/stjp.getNodesPerRack()); - for (String node : nodeSet) { - nodeResourceMap.put(node, null); - } break; default: throw new YarnException("Input configuration not recognized, " @@ -329,11 +320,11 @@ private void startNM() throws YarnException, IOException, } } } else { - nodeResourceMap = SLSUtils.parseNodesFromNodeFile(nodeFile, + nodeSet = SLSUtils.parseNodesFromNodeFile(nodeFile, nodeManagerResource); } - if (nodeResourceMap.size() == 0) { + if (nodeSet == null || nodeSet.isEmpty()) { throw new YarnException("No node! Please configure nodes."); } @@ -344,20 +335,21 @@ private void startNM() throws YarnException, IOException, SLSConfiguration.RUNNER_POOL_SIZE_DEFAULT); ExecutorService executorService = Executors. newFixedThreadPool(threadPoolSize); - for (Map.Entry entry : nodeResourceMap.entrySet()) { + for (NodeDetails nodeDetails : nodeSet) { executorService.submit(new Runnable() { @Override public void run() { try { // we randomize the heartbeat start time from zero to 1 interval NMSimulator nm = new NMSimulator(); Resource nmResource = nodeManagerResource; - String hostName = entry.getKey(); - if (entry.getValue() != null) { - nmResource = entry.getValue(); + String hostName = nodeDetails.getHostname(); + if (nodeDetails.getNodeResource() != null) { + nmResource = nodeDetails.getNodeResource(); } + Set nodeLabels = nodeDetails.getLabels(); nm.init(hostName, nmResource, random.nextInt(heartbeatInterval), - heartbeatInterval, rm, resourceUtilizationRatio); + heartbeatInterval, rm, resourceUtilizationRatio, nodeLabels); nmMap.put(nm.getNode().getNodeID(), nm); runner.schedule(nm); rackSet.add(nm.getNode().getRackName()); @@ -452,6 +444,11 @@ private void createAMForJob(Map jsonJob) throws YarnException { jsonJob.get(SLSConfiguration.JOB_END_MS).toString()); } + String jobLabelExpr = null; + if (jsonJob.containsKey(SLSConfiguration.JOB_LABEL_EXPR)) { + jobLabelExpr = jsonJob.get(SLSConfiguration.JOB_LABEL_EXPR).toString(); + } + String user = (String) jsonJob.get(SLSConfiguration.JOB_USER); if (user == null) { user = "default"; @@ -481,7 +478,8 @@ private void createAMForJob(Map jsonJob) throws YarnException { for (int i = 0; i < jobCount; i++) { runNewAM(amType, user, queue, oldAppId, jobStartTime, jobFinishTime, - getTaskContainers(jsonJob), getAMContainerResource(jsonJob)); + getTaskContainers(jsonJob), getAMContainerResource(jsonJob), + jobLabelExpr); } } @@ -730,7 +728,7 @@ private void startAMFromSynthGenerator() throws YarnException, IOException { runNewAM(job.getType(), user, jobQueue, oldJobId, jobStartTimeMS, jobFinishTimeMS, containerList, reservationId, - job.getDeadline(), getAMContainerResource(null), + job.getDeadline(), getAMContainerResource(null), null, job.getParams()); } } @@ -775,15 +773,24 @@ private void runNewAM(String jobType, String user, Resource amContainerResource) { runNewAM(jobType, user, jobQueue, oldJobId, jobStartTimeMS, jobFinishTimeMS, containerList, null, -1, - amContainerResource, null); + amContainerResource, null, null); } private void runNewAM(String jobType, String user, String jobQueue, String oldJobId, long jobStartTimeMS, long jobFinishTimeMS, List containerList, - ReservationId reservationId, long deadline, Resource amContainerResource, - Map params) { + Resource amContainerResource, String labelExpr) { + runNewAM(jobType, user, jobQueue, oldJobId, jobStartTimeMS, + jobFinishTimeMS, containerList, null, -1, + amContainerResource, labelExpr, null); + } + @SuppressWarnings("checkstyle:parameternumber") + private void runNewAM(String jobType, String user, + String jobQueue, String oldJobId, long jobStartTimeMS, + long jobFinishTimeMS, List containerList, + ReservationId reservationId, long deadline, Resource amContainerResource, + String labelExpr, Map params) { AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance( amClassMap.get(jobType), new Configuration()); @@ -799,7 +806,7 @@ private void runNewAM(String jobType, String user, AM_ID++; amSim.init(heartbeatInterval, containerList, rm, this, jobStartTimeMS, jobFinishTimeMS, user, jobQueue, isTracked, oldJobId, - runner.getStartTimeMS(), amContainerResource, params); + runner.getStartTimeMS(), amContainerResource, labelExpr, params); if(reservationId != null) { // if we have a ReservationId, delegate reservation creation to // AMSim (reservation shape is impl specific) @@ -985,4 +992,42 @@ static void printUsage() { System.err.println(); } + /** + * Class to encapsulate all details about the node. + */ + @Private + @Unstable + public static class NodeDetails { + private String hostname; + private Resource nodeResource; + private Set labels; + + public NodeDetails(String nodeHostname) { + this.hostname = nodeHostname; + } + + public String getHostname() { + return hostname; + } + + public void setHostname(String hostname) { + this.hostname = hostname; + } + + public Resource getNodeResource() { + return nodeResource; + } + + public void setNodeResource(Resource nodeResource) { + this.nodeResource = nodeResource; + } + + public Set getLabels() { + return labels; + } + + public void setLabels(Set labels) { + this.labels = labels; + } + } } diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java index 8e1c256c636..5f34cfccfb8 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java @@ -88,6 +88,8 @@ private int responseId = 0; // user name private String user; + // nodelabel expression + private String nodeLabelExpression; // queue name protected String queue; // am type @@ -123,7 +125,8 @@ public void init(int heartbeatInterval, List containerList, ResourceManager resourceManager, SLSRunner slsRunnner, long startTime, long finishTime, String simUser, String simQueue, boolean tracked, String oldApp, long baseTimeMS, - Resource amResource, Map params) { + Resource amResource, String nodeLabelExpr, + Map params) { super.init(startTime, startTime + 1000000L * heartbeatInterval, heartbeatInterval); this.user = simUser; @@ -136,6 +139,7 @@ public void init(int heartbeatInterval, this.traceStartTimeMS = startTime; this.traceFinishTimeMS = finishTime; this.amContainerResource = amResource; + this.nodeLabelExpression = nodeLabelExpr; } /** @@ -327,6 +331,9 @@ private void submitApp(ReservationId reservationId) conLauContext.setServiceData(new HashMap<>()); appSubContext.setAMContainerSpec(conLauContext); appSubContext.setResource(amContainerResource); + if (nodeLabelExpression != null) { + appSubContext.setNodeLabelExpression(nodeLabelExpression); + } if(reservationId != null) { appSubContext.setReservationID(reservationId); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java index 6f0f85ff904..71fc5b2772d 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java @@ -126,10 +126,11 @@ public void init(int heartbeatInterval, List containerList, ResourceManager rm, SLSRunner se, long traceStartTime, long traceFinishTime, String user, String queue, boolean isTracked, String oldAppId, long baselineStartTimeMS, - Resource amContainerResource, Map params) { + Resource amContainerResource, String nodeLabelExpr, + Map params) { super.init(heartbeatInterval, containerList, rm, se, traceStartTime, traceFinishTime, user, queue, isTracked, oldAppId, - baselineStartTimeMS, amContainerResource, params); + baselineStartTimeMS, amContainerResource, nodeLabelExpr, params); amtype = "mapreduce"; // get map/reduce tasks diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/StreamAMSimulator.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/StreamAMSimulator.java index b41f5f20296..862e5ec0ac0 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/StreamAMSimulator.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/StreamAMSimulator.java @@ -96,10 +96,11 @@ public void init(int heartbeatInterval, List containerList, ResourceManager rm, SLSRunner se, long traceStartTime, long traceFinishTime, String user, String queue, boolean isTracked, String oldAppId, long baselineStartTimeMS, - Resource amContainerResource, Map params) { + Resource amContainerResource, String nodeLabelExpr, + Map params) { super.init(heartbeatInterval, containerList, rm, se, traceStartTime, traceFinishTime, user, queue, isTracked, oldAppId, baselineStartTimeMS, - amContainerResource, params); + amContainerResource, nodeLabelExpr, params); amtype = "stream"; allStreams.addAll(containerList); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/conf/SLSConfiguration.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/conf/SLSConfiguration.java index ea73befc170..09f653f375f 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/conf/SLSConfiguration.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/conf/SLSConfiguration.java @@ -104,6 +104,7 @@ public static Resource getAMContainerResource(Configuration conf) { public static final String JOB_START_MS = JOB_PREFIX + "start.ms"; public static final String JOB_END_MS = JOB_PREFIX + "end.ms"; public static final String JOB_QUEUE_NAME = JOB_PREFIX + "queue.name"; + public static final String JOB_LABEL_EXPR = JOB_PREFIX + "label.expression"; public static final String JOB_USER = JOB_PREFIX + "user"; public static final String JOB_COUNT = JOB_PREFIX + "count"; public static final String JOB_TASKS = JOB_PREFIX + "tasks"; diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java index 428a8397ed1..6a8430ef416 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.DelayQueue; @@ -35,6 +36,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -78,7 +80,7 @@ public void init(String nodeIdStr, Resource nodeResource, int dispatchTime, int heartBeatInterval, ResourceManager pRm, - float pResourceUtilizationRatio) + float pResourceUtilizationRatio, Set labels) throws IOException, YarnException { super.init(dispatchTime, dispatchTime + 1000000L * heartBeatInterval, heartBeatInterval); @@ -102,6 +104,7 @@ public void init(String nodeIdStr, Resource nodeResource, int dispatchTime, Records.newRecord(RegisterNodeManagerRequest.class); req.setNodeId(node.getNodeID()); req.setResource(node.getTotalCapability()); + req.setNodeLabels(labels); req.setHttpPort(80); RegisterNodeManagerResponse response = this.rm.getResourceTrackerService() .registerNodeManager(req); @@ -109,6 +112,14 @@ public void init(String nodeIdStr, Resource nodeResource, int dispatchTime, this.resourceUtilizationRatio = pResourceUtilizationRatio; } + public void init(String nodeIdStr, Resource nodeResource, int dispatchTime, + int heartBeatInterval, ResourceManager pRm, + float pResourceUtilizationRatio) + throws IOException, YarnException { + init(nodeIdStr, nodeResource, dispatchTime, heartBeatInterval, pRm, + pResourceUtilizationRatio, null); + } + @Override public void firstStep() { // do nothing diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java index 0c99139d547..2eee3517170 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.sls.nodemanager; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; @@ -32,12 +33,14 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.NodeAttribute; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode @@ -218,6 +221,16 @@ public Integer getDecommissioningTimeout() { return null; } + @Override + public Set getAllNodeAttributes() { + return Collections.emptySet(); + } + + @Override + public RMContext getRMContext() { + return null; + } + @Override public Resource getPhysicalResource() { return null; diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/resourcemanager/MockAMLauncher.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/resourcemanager/MockAMLauncher.java index b4ffb617c65..9fb83ec24db 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/resourcemanager/MockAMLauncher.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/resourcemanager/MockAMLauncher.java @@ -18,8 +18,8 @@ package org.apache.hadoop.yarn.sls.resourcemanager; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -42,7 +42,7 @@ public class MockAMLauncher extends ApplicationMasterLauncher implements EventHandler { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( MockAMLauncher.class); Map amMap; diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java index 78645e98d68..c73fb15be00 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java @@ -24,12 +24,14 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeAttribute; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode @@ -206,6 +208,16 @@ public Integer getDecommissioningTimeout() { return node.getAllocationTagsWithCount(); } + @Override + public Set getAllNodeAttributes() { + return node.getAllNodeAttributes(); + } + + @Override + public RMContext getRMContext() { + return node.getRMContext(); + } + @Override public Resource getPhysicalResource() { return null; diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SchedulerMetrics.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SchedulerMetrics.java index b8bc8be4315..2957d239f13 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SchedulerMetrics.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SchedulerMetrics.java @@ -32,7 +32,6 @@ import java.util.SortedMap; import java.util.Locale; import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.Lock; @@ -48,6 +47,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.concurrent.HadoopScheduledThreadPoolExecutor; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; @@ -169,7 +169,7 @@ void init(ResourceScheduler resourceScheduler, Configuration config) web.start(); // a thread to update histogram timer - pool = new ScheduledThreadPoolExecutor(2); + pool = new HadoopScheduledThreadPoolExecutor(2); pool.scheduleAtFixedRate(new HistogramsRunnable(), 0, 1000, TimeUnit.MILLISECONDS); @@ -518,7 +518,8 @@ public void run() { @Override public void run() { - if(running) { + SchedulerWrapper wrapper = (SchedulerWrapper) scheduler; + if(running && wrapper.getTracker().getQueueSet() != null) { // all WebApp to get real tracking json String trackingMetrics = web.generateRealTimeTrackingMetrics(); // output diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthJob.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthJob.java index 21dec9658f5..86a1dcf8b53 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthJob.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthJob.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.yarn.sls.synthetic; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.commons.math3.random.JDKRandomGenerator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; @@ -51,7 +51,7 @@ public class SynthJob implements JobStory { @SuppressWarnings("StaticVariableName") - private static Log LOG = LogFactory.getLog(SynthJob.class); + private static Logger LOG = LoggerFactory.getLogger(SynthJob.class); private static final long MIN_MEMORY = 1024; private static final long MIN_VCORES = 1; diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java index fa6f1fc69d1..7a3e22bd4cb 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.yarn.sls.synthetic; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.commons.math3.distribution.AbstractRealDistribution; import org.apache.commons.math3.random.JDKRandomGenerator; import org.apache.hadoop.conf.Configuration; @@ -53,7 +53,7 @@ public class SynthTraceJobProducer implements JobStoryProducer { @SuppressWarnings("StaticVariableName") - private static final Log LOG = LogFactory.getLog(SynthTraceJobProducer.class); + private static final Logger LOG = LoggerFactory.getLogger(SynthTraceJobProducer.class); private final Configuration conf; private final AtomicInteger numJobs; diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java index f2129d0141f..8bb4871e5b4 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java @@ -23,7 +23,6 @@ import java.io.InputStreamReader; import java.io.Reader; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -41,8 +40,11 @@ import org.apache.hadoop.tools.rumen.LoggedJob; import org.apache.hadoop.tools.rumen.LoggedTask; import org.apache.hadoop.tools.rumen.LoggedTaskAttempt; +import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.client.util.YarnClientUtils; +import org.apache.hadoop.yarn.sls.SLSRunner.NodeDetails; import org.apache.hadoop.yarn.sls.conf.SLSConfiguration; import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.resource.Resources; @@ -52,6 +54,10 @@ public class SLSUtils { public final static String DEFAULT_JOB_TYPE = "mapreduce"; + private static final String LABEL_FORMAT_ERR_MSG = + "Input format for adding node-labels is not correct, it should be " + + "labelName1[(exclusive=true/false)],labelName2[] .."; + // hostname includes the network path and the host name. for example // "/default-rack/hostFoo" or "/coreSwitchA/TORSwitchB/hostBar". // the function returns two Strings, the first element is the network @@ -66,9 +72,9 @@ /** * parse the rumen trace file, return each host name */ - public static Set parseNodesFromRumenTrace(String jobTrace) - throws IOException { - Set nodeSet = new HashSet(); + public static Set parseNodesFromRumenTrace( + String jobTrace) throws IOException { + Set nodeSet = new HashSet<>(); File fin = new File(jobTrace); Configuration conf = new Configuration(); @@ -85,7 +91,8 @@ } LoggedTaskAttempt taskAttempt = mapTask.getAttempts() .get(mapTask.getAttempts().size() - 1); - nodeSet.add(taskAttempt.getHostName().getValue()); + nodeSet.add(new NodeDetails( + taskAttempt.getHostName().getValue())); } for(LoggedTask reduceTask : job.getReduceTasks()) { if (reduceTask.getAttempts().size() == 0) { @@ -93,7 +100,8 @@ } LoggedTaskAttempt taskAttempt = reduceTask.getAttempts() .get(reduceTask.getAttempts().size() - 1); - nodeSet.add(taskAttempt.getHostName().getValue()); + nodeSet.add(new NodeDetails( + taskAttempt.getHostName().getValue())); } } } finally { @@ -106,9 +114,9 @@ /** * parse the sls trace file, return each host name */ - public static Set parseNodesFromSLSTrace(String jobTrace) - throws IOException { - Set nodeSet = new HashSet<>(); + public static Set parseNodesFromSLSTrace( + String jobTrace) throws IOException { + Set nodeSet = new HashSet<>(); JsonFactory jsonF = new JsonFactory(); ObjectMapper mapper = new ObjectMapper(); Reader input = @@ -124,7 +132,8 @@ return nodeSet; } - private static void addNodes(Set nodeSet, Map jsonEntry) { + private static void addNodes(Set nodeSet, + Map jsonEntry) { if (jsonEntry.containsKey(SLSConfiguration.NUM_NODES)) { int numNodes = Integer.parseInt( jsonEntry.get(SLSConfiguration.NUM_NODES).toString()); @@ -142,7 +151,7 @@ private static void addNodes(Set nodeSet, Map jsonEntry) { Map jsonTask = (Map) o; String hostname = (String) jsonTask.get(SLSConfiguration.TASK_HOST); if (hostname != null) { - nodeSet.add(hostname); + nodeSet.add(new NodeDetails(hostname)); } } } @@ -150,10 +159,11 @@ private static void addNodes(Set nodeSet, Map jsonEntry) { /** * parse the input node file, return each host name + * sample input: label1(exclusive=true),label2(exclusive=false),label3 */ - public static Map parseNodesFromNodeFile(String nodeFile, - Resource nmDefaultResource) throws IOException { - Map nodeResourceMap = new HashMap<>(); + public static Set parseNodesFromNodeFile( + String nodeFile, Resource nmDefaultResource) throws IOException { + Set nodeSet = new HashSet<>(); JsonFactory jsonF = new JsonFactory(); ObjectMapper mapper = new ObjectMapper(); Reader input = @@ -166,6 +176,8 @@ private static void addNodes(Set nodeSet, Map jsonEntry) { List tasks = (List) jsonE.get("nodes"); for (Object o : tasks) { Map jsonNode = (Map) o; + NodeDetails nodeDetails = new NodeDetails( + rack + "/" + jsonNode.get("node")); Resource nodeResource = Resources.clone(nmDefaultResource); ResourceInformation[] infors = ResourceUtils.getResourceTypesArray(); for (ResourceInformation info : infors) { @@ -174,18 +186,25 @@ private static void addNodes(Set nodeSet, Map jsonEntry) { Integer.parseInt(jsonNode.get(info.getName()).toString())); } } - nodeResourceMap.put(rack + "/" + jsonNode.get("node"), nodeResource); + nodeDetails.setNodeResource(nodeResource); + if (jsonNode.get("labels") != null) { + Set nodeLabels = new HashSet<>( + YarnClientUtils.buildNodeLabelsFromStr( + jsonNode.get("labels").toString())); + nodeDetails.setLabels(nodeLabels); + } + nodeSet.add(nodeDetails); } } } finally { input.close(); } - return nodeResourceMap; + return nodeSet; } - public static Set generateNodes(int numNodes, + public static Set generateNodes(int numNodes, int numRacks){ - Set nodeSet = new HashSet<>(); + Set nodeSet = new HashSet<>(); if (numRacks < 1) { numRacks = 1; } @@ -195,7 +214,8 @@ private static void addNodes(Set nodeSet, Map jsonEntry) { } for (int i = 0; i < numNodes; i++) { - nodeSet.add("/rack" + i % numRacks + "/node" + i); + nodeSet.add(new NodeDetails( + "/rack" + i % numRacks + "/node" + i)); } return nodeSet; } diff --git a/hadoop-tools/hadoop-sls/src/site/markdown/SchedulerLoadSimulator.md b/hadoop-tools/hadoop-sls/src/site/markdown/SchedulerLoadSimulator.md index 9df49985507..e487267e02e 100644 --- a/hadoop-tools/hadoop-sls/src/site/markdown/SchedulerLoadSimulator.md +++ b/hadoop-tools/hadoop-sls/src/site/markdown/SchedulerLoadSimulator.md @@ -27,7 +27,7 @@ YARN Scheduler Load Simulator (SLS) * [Metrics](#Metrics) * [Real-time Tracking](#Real-time_Tracking) * [Offline Analysis](#Offline_Analysis) - * [Synthetic Load Generator](#SynthGen) + * [Synthetic Load Generator](#Synthetic_Load_Generator) * [Appendix](#Appendix) * [Resources](#Resources) * [SLS JSON input file format](#SLS_JSON_input_file_format) diff --git a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/appmaster/TestAMSimulator.java b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/appmaster/TestAMSimulator.java index bc8ea70e46b..2efa8464417 100644 --- a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/appmaster/TestAMSimulator.java +++ b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/appmaster/TestAMSimulator.java @@ -19,10 +19,13 @@ import com.codahale.metrics.MetricRegistry; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ReservationId; +import org.apache.hadoop.yarn.client.cli.RMAdminCLI; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.sls.conf.SLSConfiguration; @@ -42,6 +45,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; +import java.util.concurrent.ConcurrentMap; @RunWith(Parameterized.class) public class TestAMSimulator { @@ -73,6 +77,7 @@ public void setup() { conf.set(SLSConfiguration.METRICS_OUTPUT_DIR, metricOutputDir.toString()); conf.set(YarnConfiguration.RM_SCHEDULER, slsScheduler.getName()); conf.set(SLSConfiguration.RM_SCHEDULER, scheduler.getName()); + conf.set(YarnConfiguration.NODE_LABELS_ENABLED, "true"); conf.setBoolean(SLSConfiguration.METRICS_SWITCH, true); rm = new ResourceManager(); rm.init(conf); @@ -140,7 +145,7 @@ public void testAMSimulator() throws Exception { String queue = "default"; List containers = new ArrayList<>(); app.init(1000, containers, rm, null, 0, 1000000L, "user1", queue, true, - appId, 0, SLSConfiguration.getAMContainerResource(conf), null); + appId, 0, SLSConfiguration.getAMContainerResource(conf), null, null); app.firstStep(); verifySchedulerMetrics(appId); @@ -152,6 +157,34 @@ public void testAMSimulator() throws Exception { app.lastStep(); } + @Test + public void testAMSimulatorWithNodeLabels() throws Exception { + if (scheduler.equals(CapacityScheduler.class)) { + // add label to the cluster + RMAdminCLI rmAdminCLI = new RMAdminCLI(conf); + String[] args = {"-addToClusterNodeLabels", "label1"}; + rmAdminCLI.run(args); + + MockAMSimulator app = new MockAMSimulator(); + String appId = "app1"; + String queue = "default"; + List containers = new ArrayList<>(); + app.init(1000, containers, rm, null, 0, 1000000L, "user1", queue, true, + appId, 0, SLSConfiguration.getAMContainerResource(conf), + "label1", null); + app.firstStep(); + + verifySchedulerMetrics(appId); + + ConcurrentMap rmApps = + rm.getRMContext().getRMApps(); + Assert.assertEquals(1, rmApps.size()); + RMApp rmApp = rmApps.get(app.appId); + Assert.assertNotNull(rmApp); + Assert.assertEquals("label1", rmApp.getAmNodeLabelExpression()); + } + } + @After public void tearDown() { if (rm != null) { diff --git a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/utils/TestSLSUtils.java b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/utils/TestSLSUtils.java index 5e586b13bec..c59c2af81bf 100644 --- a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/utils/TestSLSUtils.java +++ b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/utils/TestSLSUtils.java @@ -18,13 +18,13 @@ package org.apache.hadoop.yarn.sls.utils; -import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.NodeLabel; +import org.apache.hadoop.yarn.sls.SLSRunner.NodeDetails; import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.Assert; import org.junit.Test; import java.util.HashSet; -import java.util.Map; import java.util.Set; public class TestSLSUtils { @@ -45,28 +45,54 @@ public void testGetRackHostname() { @Test public void testParseNodesFromNodeFile() throws Exception { String nodeFile = "src/test/resources/nodes.json"; - Map nodeResourceMap = SLSUtils.parseNodesFromNodeFile( + Set nodeDetails = SLSUtils.parseNodesFromNodeFile( nodeFile, Resources.createResource(1024, 2)); - Assert.assertEquals(20, nodeResourceMap.size()); + Assert.assertEquals(20, nodeDetails.size()); nodeFile = "src/test/resources/nodes-with-resources.json"; - nodeResourceMap = SLSUtils.parseNodesFromNodeFile( + nodeDetails = SLSUtils.parseNodesFromNodeFile( nodeFile, Resources.createResource(1024, 2)); - Assert.assertEquals(4, - nodeResourceMap.size()); - Assert.assertEquals(2048, - nodeResourceMap.get("/rack1/node1").getMemorySize()); - Assert.assertEquals(6, - nodeResourceMap.get("/rack1/node1").getVirtualCores()); - Assert.assertEquals(1024, - nodeResourceMap.get("/rack1/node2").getMemorySize()); - Assert.assertEquals(2, - nodeResourceMap.get("/rack1/node2").getVirtualCores()); + Assert.assertEquals(4, nodeDetails.size()); + for (NodeDetails nodeDetail : nodeDetails) { + if (nodeDetail.getHostname().equals("/rack1/node1")) { + Assert.assertEquals(2048, + nodeDetail.getNodeResource().getMemorySize()); + Assert.assertEquals(6, + nodeDetail.getNodeResource().getVirtualCores()); + } else if (nodeDetail.getHostname().equals("/rack1/node2")) { + Assert.assertEquals(1024, + nodeDetail.getNodeResource().getMemorySize()); + Assert.assertEquals(2, + nodeDetail.getNodeResource().getVirtualCores()); + Assert.assertNull(nodeDetail.getLabels()); + } else if (nodeDetail.getHostname().equals("/rack1/node3")) { + Assert.assertEquals(1024, + nodeDetail.getNodeResource().getMemorySize()); + Assert.assertEquals(2, + nodeDetail.getNodeResource().getVirtualCores()); + Assert.assertEquals(2, nodeDetail.getLabels().size()); + for (NodeLabel nodeLabel : nodeDetail.getLabels()) { + if (nodeLabel.getName().equals("label1")) { + Assert.assertTrue(nodeLabel.isExclusive()); + } else if(nodeLabel.getName().equals("label2")) { + Assert.assertFalse(nodeLabel.isExclusive()); + } else { + Assert.assertTrue("Unexepected label", false); + } + } + } else if (nodeDetail.getHostname().equals("/rack1/node4")) { + Assert.assertEquals(6144, + nodeDetail.getNodeResource().getMemorySize()); + Assert.assertEquals(12, + nodeDetail.getNodeResource().getVirtualCores()); + Assert.assertEquals(2, nodeDetail.getLabels().size()); + } + } } @Test public void testGenerateNodes() { - Set nodes = SLSUtils.generateNodes(3, 3); + Set nodes = SLSUtils.generateNodes(3, 3); Assert.assertEquals("Number of nodes is wrong.", 3, nodes.size()); Assert.assertEquals("Number of racks is wrong.", 3, getNumRack(nodes)); @@ -83,10 +109,10 @@ public void testGenerateNodes() { Assert.assertEquals("Number of racks is wrong.", 1, getNumRack(nodes)); } - private int getNumRack(Set nodes) { + private int getNumRack(Set nodes) { Set racks = new HashSet<>(); - for (String node : nodes) { - String[] rackHostname = SLSUtils.getRackHostName(node); + for (NodeDetails node : nodes) { + String[] rackHostname = SLSUtils.getRackHostName(node.getHostname()); racks.add(rackHostname[0]); } return racks.size(); diff --git a/hadoop-tools/hadoop-sls/src/test/resources/nodes-with-resources.json b/hadoop-tools/hadoop-sls/src/test/resources/nodes-with-resources.json index 003918114ae..dc5f0203c56 100644 --- a/hadoop-tools/hadoop-sls/src/test/resources/nodes-with-resources.json +++ b/hadoop-tools/hadoop-sls/src/test/resources/nodes-with-resources.json @@ -10,10 +10,14 @@ "node": "node2" }, { - "node": "node3" + "node": "node3", + "labels": "label1, label2(exclusive=false)" }, { - "node": "node4" + "node": "node4", + "labels": "label1, label2(exclusive=false)", + "memory-mb" : 6144, + "vcores" : 12 } ] } diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/PipeMapRed.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/PipeMapRed.java index 77c72529770..ef62505c4c6 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/PipeMapRed.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/PipeMapRed.java @@ -24,7 +24,8 @@ import java.util.ArrayList; import java.util.Properties; -import org.apache.commons.logging.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; @@ -44,7 +45,7 @@ */ public abstract class PipeMapRed { - protected static final Log LOG = LogFactory.getLog(PipeMapRed.class.getName()); + protected static final Logger LOG = LoggerFactory.getLogger(PipeMapRed.class.getName()); /** * Returns the Configuration. @@ -397,7 +398,7 @@ public void run() { } } catch (Throwable th) { outerrThreadsThrowable = th; - LOG.warn(th); + LOG.warn("{}", th); } finally { try { if (clientIn_ != null) { @@ -405,7 +406,7 @@ public void run() { clientIn_ = null; } } catch (IOException io) { - LOG.info(io); + LOG.info("{}", io); } } } @@ -466,7 +467,7 @@ public void run() { } } catch (Throwable th) { outerrThreadsThrowable = th; - LOG.warn(th); + LOG.warn("{}", th); try { if (lineReader != null) { lineReader.close(); @@ -476,7 +477,7 @@ public void run() { clientErr_ = null; } } catch (IOException io) { - LOG.info(io); + LOG.info("{}", io); } } } @@ -531,13 +532,13 @@ public void mapRedFinished() { clientOut_.flush(); clientOut_.close(); } catch (IOException io) { - LOG.warn(io); + LOG.warn("{}", io); } } try { waitOutputThreads(); } catch (IOException io) { - LOG.warn(io); + LOG.warn("{}", io); } if (sim != null) sim.destroy(); LOG.info("mapRedFinished"); diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java index 85e5ab3e7bd..063ea51dac6 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java @@ -30,7 +30,8 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; -import org.apache.commons.logging.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Shared functionality for hadoopStreaming formats. @@ -40,7 +41,7 @@ */ public abstract class StreamBaseRecordReader implements RecordReader { - protected static final Log LOG = LogFactory.getLog(StreamBaseRecordReader.class.getName()); + protected static final Logger LOG = LoggerFactory.getLogger(StreamBaseRecordReader.class.getName()); // custom JobConf properties for this class are prefixed with this namespace final static String CONF_NS = "stream.recordreader."; diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java index 1fe8710f013..551a5f2dc40 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java @@ -36,8 +36,8 @@ import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.mapreduce.MRConfig; @@ -79,7 +79,7 @@ */ public class StreamJob implements Tool { - protected static final Log LOG = LogFactory.getLog(StreamJob.class.getName()); + protected static final Logger LOG = LoggerFactory.getLogger(StreamJob.class.getName()); final static String REDUCE_NONE = "NONE"; /** -----------Streaming CLI Implementation **/ diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/mapreduce/StreamBaseRecordReader.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/mapreduce/StreamBaseRecordReader.java index d71c20d23a1..43c1b1bec0a 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/mapreduce/StreamBaseRecordReader.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/mapreduce/StreamBaseRecordReader.java @@ -20,8 +20,8 @@ import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -40,8 +40,8 @@ */ public abstract class StreamBaseRecordReader extends RecordReader { - protected static final Log LOG = LogFactory - .getLog(StreamBaseRecordReader.class.getName()); + protected static final Logger LOG = LoggerFactory + .getLogger(StreamBaseRecordReader.class.getName()); // custom JobConf properties for this class are prefixed with this namespace final static String CONF_NS = "stream.recordreader."; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java index 8275c7e68cd..752268de3dc 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java @@ -25,8 +25,8 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -42,7 +42,7 @@ */ public class TestMultipleArchiveFiles extends TestStreaming { - private static final Log LOG = LogFactory.getLog(TestMultipleArchiveFiles.class); + private static final Logger LOG = LoggerFactory.getLogger(TestMultipleArchiveFiles.class); private StreamJob job; private String INPUT_DIR = "multiple-archive-files/"; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlMultipleRecords.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlMultipleRecords.java index 4d9017bb15c..fc8c20dd695 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlMultipleRecords.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlMultipleRecords.java @@ -23,8 +23,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.junit.Before; import org.junit.Test; @@ -38,7 +38,7 @@ */ public class TestStreamXmlMultipleRecords extends TestStreaming { - private static final Log LOG = LogFactory.getLog( + private static final Logger LOG = LoggerFactory.getLogger( TestStreamXmlMultipleRecords.class); private boolean hasPerl = false; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingBadRecords.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingBadRecords.java index 860fb89cfcf..0ef1ff0b328 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingBadRecords.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingBadRecords.java @@ -31,8 +31,8 @@ import java.util.Properties; import java.util.StringTokenizer; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.ClusterMapReduceTestCase; @@ -51,8 +51,8 @@ public class TestStreamingBadRecords extends ClusterMapReduceTestCase { - private static final Log LOG = - LogFactory.getLog(TestStreamingBadRecords.class); + private static final Logger LOG = + LoggerFactory.getLogger(TestStreamingBadRecords.class); private static final List MAPPER_BAD_RECORDS = Arrays.asList("hey022","hey023","hey099"); diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java index 31e4905423b..2378c7b4149 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java @@ -26,13 +26,13 @@ import java.util.Iterator; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.util.Shell.ShellCommandExecutor; class UtilTest { - private static final Log LOG = LogFactory.getLog(UtilTest.class); + private static final Logger LOG = LoggerFactory.getLogger(UtilTest.class); /** * Utility routine to recurisvely delete a directory. diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index 69afe6f88a7..8290fcda8d9 100755 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -55,6 +55,7 @@ function hadoop_usage hadoop_add_subcommand "timelinereader" client "run the timeline reader server" hadoop_add_subcommand "timelineserver" daemon "run the timeline server" hadoop_add_subcommand "top" client "view cluster information" + hadoop_add_subcommand "nodeattributes" client "node attributes cli client" hadoop_add_subcommand "version" client "print the version" hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" true } @@ -186,6 +187,10 @@ ${HADOOP_COMMON_HOME}/${HADOOP_COMMON_LIB_JARS_DIR}" hadoop_add_classpath "$HADOOP_YARN_HOME/$YARN_DIR/timelineservice/lib/*" HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderServer' ;; + nodeattributes) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="false" + HADOOP_CLASSNAME='org.apache.hadoop.yarn.client.cli.NodeAttributesCLI' + ;; timelineserver) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer' diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 5cc81e57ec6..216c3bdc06e 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -468,6 +468,10 @@ + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.1.1.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.1.1.xml new file mode 100644 index 00000000000..e3dbe6ac5f6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.1.1.xml @@ -0,0 +1,2920 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

+ If the user does not have VIEW_APP access then the following + fields in the report will be set to stubbed values: +

    +
  • host - set to "N/A"
  • +
  • RPC port - set to -1
  • +
  • client token - set to "N/A"
  • +
  • diagnostics - set to "N/A"
  • +
  • tracking URL - set to "N/A"
  • +
  • original tracking URL - set to "N/A"
  • +
  • resource usage report - all values are -1
  • +
+ + @param appId + {@link ApplicationId} of the application that needs a report + @return application report + @throws YarnException + @throws IOException]]> +
+
+ + + + + + Get a report (ApplicationReport) of all Applications in the cluster. +

+ +

+ If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

+ + @return a list of reports for all applications + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get a report of the given ApplicationAttempt. +

+ +

+ In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

+ + @param applicationAttemptId + {@link ApplicationAttemptId} of the application attempt that needs + a report + @return application attempt report + @throws YarnException + @throws ApplicationAttemptNotFoundException if application attempt + not found + @throws IOException]]> +
+
+ + + + + + + Get a report of all (ApplicationAttempts) of Application in the cluster. +

+ + @param applicationId + @return a list of reports for all application attempts for specified + application + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get a report of the given Container. +

+ +

+ In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

+ + @param containerId + {@link ContainerId} of the container that needs a report + @return container report + @throws YarnException + @throws ContainerNotFoundException if container not found + @throws IOException]]> +
+
+ + + + + + + Get a report of all (Containers) of ApplicationAttempt in the cluster. +

+ + @param applicationAttemptId + @return a list of reports of all containers for specified application + attempt + @throws YarnException + @throws IOException]]> +
+
+
+ + + + + + + + + {@code + AMRMClient.createAMRMClientContainerRequest() + } + @return the newly create AMRMClient instance.]]> + + + + + + + + + + + + + + + + RegisterApplicationMasterResponse + @throws YarnException + @throws IOException]]> + + + + + + + + + + + RegisterApplicationMasterResponse + @throws YarnException + @throws IOException]]> + + + + + + + + addContainerRequest are sent to the + ResourceManager. New containers assigned to the master are + retrieved. Status of completed containers and node health updates are also + retrieved. This also doubles up as a heartbeat to the ResourceManager and + must be made periodically. The call may not always return any new + allocations of containers. App should not make concurrent allocate + requests. May cause request loss. + +

+ Note : If the user has not removed container requests that have already + been satisfied, then the re-register may end up sending the entire + container requests to the RM (including matched requests). Which would mean + the RM could end up giving it a lot of new allocated containers. +

+ + @param progressIndicator Indicates progress made by the master + @return the response of the allocate request + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + + + + + + + + allocate + @param req Resource request]]> + + + + + + + + + + + + + allocate. + Any previous pending resource change request of the same container will be + removed. + + Application that calls this method is expected to maintain the + Containers that are returned from previous successful + allocations or resource changes. By passing in the existing container and a + target resource capability to this method, the application requests the + ResourceManager to change the existing resource allocation to the target + resource allocation. + + @deprecated use + {@link #requestContainerUpdate(Container, UpdateContainerRequest)} + + @param container The container returned from the last successful resource + allocation or resource change + @param capability The target resource capability of the container]]> + + + + + + + allocate. + Any previous pending update request of the same container will be + removed. + + @param container The container returned from the last successful resource + allocation or update + @param updateContainerRequest The UpdateContainerRequest.]]> + + + + + + + + + + + + + + + + + + + + + + + + ContainerRequests matching the given + parameters. These ContainerRequests should have been added via + addContainerRequest earlier in the lifecycle. For performance, + the AMRMClient may return its internal collection directly without creating + a copy. Users should not perform mutable operations on the return value. + Each collection in the list contains requests with identical + Resource size that fit in the given capability. In a + collection, requests will be returned in the same order as they were added. + + NOTE: This API only matches Container requests that were created by the + client WITHOUT the allocationRequestId being set. + + @return Collection of request matching the parameters]]> + + + + + + + + + ContainerRequests matching the given + parameters. These ContainerRequests should have been added via + addContainerRequest earlier in the lifecycle. For performance, + the AMRMClient may return its internal collection directly without creating + a copy. Users should not perform mutable operations on the return value. + Each collection in the list contains requests with identical + Resource size that fit in the given capability. In a + collection, requests will be returned in the same order as they were added. + specify an ExecutionType. + + NOTE: This API only matches Container requests that were created by the + client WITHOUT the allocationRequestId being set. + + @param priority Priority + @param resourceName Location + @param executionType ExecutionType + @param capability Capability + @return Collection of request matching the parameters]]> + + + + + + + + + + + + + ContainerRequests matching the given + allocationRequestId. These ContainerRequests should have been added via + addContainerRequest earlier in the lifecycle. For performance, + the AMRMClient may return its internal collection directly without creating + a copy. Users should not perform mutable operations on the return value. + + NOTE: This API only matches Container requests that were created by the + client WITH the allocationRequestId being set to a non-default value. + + @param allocationRequestId Allocation Request Id + @return Collection of request matching the parameters]]> + + + + + + + + + + + + + AMRMClient. This cache must + be shared with the {@link NMClient} used to manage containers for the + AMRMClient +

+ If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @param nmTokenCache the NM token cache to use.]]> + + + + + AMRMClient. This cache must be + shared with the {@link NMClient} used to manage containers for the + AMRMClient. +

+ If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @return the NM token cache.]]> + + + + + + + + + + + + + + + + + + check to return true for each 1000 ms. + See also {@link #waitFor(java.util.function.Supplier, int)} + and {@link #waitFor(java.util.function.Supplier, int, int)} + @param check the condition for which it should wait]]> + + + + + + + + check to return true for each + checkEveryMillis ms. + See also {@link #waitFor(java.util.function.Supplier, int, int)} + @param check user defined checker + @param checkEveryMillis interval to call check]]> + + + + + + + + + check to return true for each + checkEveryMillis ms. In the main loop, this method will log + the message "waiting in main loop" for each logInterval times + iteration to confirm the thread is alive. + @param check user defined checker + @param checkEveryMillis interval to call check + @param logInterval interval to log for each]]> + + + + + + + + + + + + + + + + + + + + + + + + + + Start an allocated container.

+ +

The ApplicationMaster or other applications that use the + client must provide the details of the allocated container, including the + Id, the assigned node's Id and the token via {@link Container}. In + addition, the AM needs to provide the {@link ContainerLaunchContext} as + well.

+ + @param container the allocated container + @param containerLaunchContext the context information needed by the + NodeManager to launch the + container + @return a map between the auxiliary service names and their outputs + @throws YarnException YarnException. + @throws IOException IOException.]]> +
+
+ + + + + + Increase the resource of a container.

+ +

The ApplicationMaster or other applications that use the + client must provide the details of the container, including the Id and + the target resource encapsulated in the updated container token via + {@link Container}. +

+ + @param container the container with updated token. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
+
+ + + + + + Update the resources of a container.

+ +

The ApplicationMaster or other applications that use the + client must provide the details of the container, including the Id and + the target resource encapsulated in the updated container token via + {@link Container}. +

+ + @param container the container with updated token. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
+
+ + + + + + + Stop an started container.

+ + @param containerId the Id of the started container + @param nodeId the Id of the NodeManager + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
+
+ + + + + + + Query the status of a container.

+ + @param containerId the Id of the started container + @param nodeId the Id of the NodeManager + + @return the status of a container. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
+
+ + + + + + + + Re-Initialize the Container.

+ + @param containerId the Id of the container to Re-Initialize. + @param containerLaunchContex the updated ContainerLaunchContext. + @param autoCommit commit re-initialization automatically ? + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
+
+ + + + + + Restart the specified container.

+ + @param containerId the Id of the container to restart. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
+
+ + + + + + Rollback last reInitialization of the specified container.

+ + @param containerId the Id of the container to restart. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
+
+ + + + + + Commit last reInitialization of the specified container.

+ + @param containerId the Id of the container to commit reInitialize. + + @throws YarnException YarnException. + @throws IOException IOException.]]> +
+
+ + + + Set whether the containers that are started by this client, and are + still running should be stopped when the client stops. By default, the + feature should be enabled.

However, containers will be stopped only + when service is stopped. i.e. after {@link NMClient#stop()}. + + @param enabled whether the feature is enabled or not]]> +
+
+ + + + NMClient. This cache must be + shared with the {@link AMRMClient} that requested the containers managed + by this NMClient +

+ If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @param nmTokenCache the NM token cache to use.]]> + + + + + NMClient. This cache must be + shared with the {@link AMRMClient} that requested the containers managed + by this NMClient +

+ If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} + singleton instance will be used. + + @return the NM token cache]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + By default YARN client libraries {@link AMRMClient} and {@link NMClient} use + {@link #getSingleton()} instance of the cache. +

    +
  • + Using the singleton instance of the cache is appropriate when running a + single ApplicationMaster in the same JVM. +
  • +
  • + When using the singleton, users don't need to do anything special, + {@link AMRMClient} and {@link NMClient} are already set up to use the + default singleton {@link NMTokenCache} +
  • +
+ If running multiple Application Masters in the same JVM, a different cache + instance should be used for each Application Master. +
    +
  • + If using the {@link AMRMClient} and the {@link NMClient}, setting up + and using an instance cache is as follows: +
    +   NMTokenCache nmTokenCache = new NMTokenCache();
    +   AMRMClient rmClient = AMRMClient.createAMRMClient();
    +   NMClient nmClient = NMClient.createNMClient();
    +   nmClient.setNMTokenCache(nmTokenCache);
    +   ...
    + 
    +
  • +
  • + If using the {@link AMRMClientAsync} and the {@link NMClientAsync}, + setting up and using an instance cache is as follows: +
    +   NMTokenCache nmTokenCache = new NMTokenCache();
    +   AMRMClient rmClient = AMRMClient.createAMRMClient();
    +   NMClient nmClient = NMClient.createNMClient();
    +   nmClient.setNMTokenCache(nmTokenCache);
    +   AMRMClientAsync rmClientAsync = new AMRMClientAsync(rmClient, 1000, [AMRM_CALLBACK]);
    +   NMClientAsync nmClientAsync = new NMClientAsync("nmClient", nmClient, [NM_CALLBACK]);
    +   ...
    + 
    +
  • +
  • + If using {@link ApplicationMasterProtocol} and + {@link ContainerManagementProtocol} directly, setting up and using an + instance cache is as follows: +
    +   NMTokenCache nmTokenCache = new NMTokenCache();
    +   ...
    +   ApplicationMasterProtocol amPro = ClientRMProxy.createRMProxy(conf, ApplicationMasterProtocol.class);
    +   ...
    +   AllocateRequest allocateRequest = ...
    +   ...
    +   AllocateResponse allocateResponse = rmClient.allocate(allocateRequest);
    +   for (NMToken token : allocateResponse.getNMTokens()) {
    +     nmTokenCache.setToken(token.getNodeId().toString(), token.getToken());
    +   }
    +   ...
    +   ContainerManagementProtocolProxy nmPro = ContainerManagementProtocolProxy(conf, nmTokenCache);
    +   ...
    +   nmPro.startContainer(container, containerContext);
    +   ...
    + 
    +
  • +
+ It is also possible to mix the usage of a client ({@code AMRMClient} or + {@code NMClient}, or the async versions of them) with a protocol proxy + ({@code ContainerManagementProtocolProxy} or + {@code ApplicationMasterProtocol}).]]> +
+
+ + + + + + + + + + + + + + The method to claim a resource with the SharedCacheManager. + The client uses a checksum to identify the resource and an + {@link ApplicationId} to identify which application will be using the + resource. +

+ +

+ The SharedCacheManager responds with whether or not the + resource exists in the cache. If the resource exists, a URL to + the resource in the shared cache is returned. If the resource does not + exist, null is returned instead. +

+ +

+ Once a URL has been returned for a resource, that URL is safe to use for + the lifetime of the application that corresponds to the provided + ApplicationId. +

+ + @param applicationId ApplicationId of the application using the resource + @param resourceKey the key (i.e. checksum) that identifies the resource + @return URL to the resource, or null if it does not exist]]> +
+
+ + + + + + + The method to release a resource with the SharedCacheManager. + This method is called once an application is no longer using a claimed + resource in the shared cache. The client uses a checksum to identify the + resource and an {@link ApplicationId} to identify which application is + releasing the resource. +

+ +

+ Note: This method is an optimization and the client is not required to call + it for correctness. +

+ + @param applicationId ApplicationId of the application releasing the + resource + @param resourceKey the key (i.e. checksum) that identifies the resource]]> +
+
+ + + + + + + + + + +
+ + + + + + + + + + + + + + + + Obtain a {@link YarnClientApplication} for a new application, + which in turn contains the {@link ApplicationSubmissionContext} and + {@link org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse} + objects. +

+ + @return {@link YarnClientApplication} built for a new application + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Submit a new application to YARN. It is a blocking call - it + will not return {@link ApplicationId} until the submitted application is + submitted successfully and accepted by the ResourceManager. +

+ +

+ Users should provide an {@link ApplicationId} as part of the parameter + {@link ApplicationSubmissionContext} when submitting a new application, + otherwise it will throw the {@link ApplicationIdNotProvidedException}. +

+ +

This internally calls {@link ApplicationClientProtocol#submitApplication + (SubmitApplicationRequest)}, and after that, it internally invokes + {@link ApplicationClientProtocol#getApplicationReport + (GetApplicationReportRequest)} and waits till it can make sure that the + application gets properly submitted. If RM fails over or RM restart + happens before ResourceManager saves the application's state, + {@link ApplicationClientProtocol + #getApplicationReport(GetApplicationReportRequest)} will throw + the {@link ApplicationNotFoundException}. This API automatically resubmits + the application with the same {@link ApplicationSubmissionContext} when it + catches the {@link ApplicationNotFoundException}

+ + @param appContext + {@link ApplicationSubmissionContext} containing all the details + needed to submit a new application + @return {@link ApplicationId} of the accepted application + @throws YarnException + @throws IOException + @see #createApplication()]]> +
+
+ + + + + + + Fail an application attempt identified by given ID. +

+ + @param applicationAttemptId + {@link ApplicationAttemptId} of the attempt to fail. + @throws YarnException + in case of errors or if YARN rejects the request due to + access-control restrictions. + @throws IOException + @see #getQueueAclsInfo()]]> +
+
+ + + + + + + Kill an application identified by given ID. +

+ + @param applicationId + {@link ApplicationId} of the application that needs to be killed + @throws YarnException + in case of errors or if YARN rejects the request due to + access-control restrictions. + @throws IOException + @see #getQueueAclsInfo()]]> +
+
+ + + + + + + + Kill an application identified by given ID. +

+ @param applicationId {@link ApplicationId} of the application that needs to + be killed + @param diagnostics for killing an application. + @throws YarnException in case of errors or if YARN rejects the request due + to access-control restrictions. + @throws IOException]]> +
+
+ + + + + + + Get a report of the given Application. +

+ +

+ In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

+ +

+ If the user does not have VIEW_APP access then the following + fields in the report will be set to stubbed values: +

    +
  • host - set to "N/A"
  • +
  • RPC port - set to -1
  • +
  • client token - set to "N/A"
  • +
  • diagnostics - set to "N/A"
  • +
  • tracking URL - set to "N/A"
  • +
  • original tracking URL - set to "N/A"
  • +
  • resource usage report - all values are -1
  • +
+ + @param appId + {@link ApplicationId} of the application that needs a report + @return application report + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + The AMRM token is required for AM to RM scheduling operations. For + managed Application Masters YARN takes care of injecting it. For unmanaged + Applications Masters, the token must be obtained via this method and set + in the {@link org.apache.hadoop.security.UserGroupInformation} of the + current user. +

+ The AMRM token will be returned only if all the following conditions are + met: +

    +
  • the requester is the owner of the ApplicationMaster
  • +
  • the application master is an unmanaged ApplicationMaster
  • +
  • the application master is in ACCEPTED state
  • +
+ Else this method returns NULL. + + @param appId {@link ApplicationId} of the application to get the AMRM token + @return the AMRM token if available + @throws YarnException + @throws IOException]]> +
+
+ + + + + + Get a report (ApplicationReport) of all Applications in the cluster. +

+ +

+ If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

+ + @return a list of reports of all running applications + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get a report (ApplicationReport) of Applications + matching the given application types in the cluster. +

+ +

+ If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

+ + @param applicationTypes set of application types you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get a report (ApplicationReport) of Applications matching the given + application states in the cluster. +

+ +

+ If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

+ + @param applicationStates set of application states you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + + Get a report (ApplicationReport) of Applications matching the given + application types and application states in the cluster. +

+ +

+ If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

+ + @param applicationTypes set of application types you are interested in + @param applicationStates set of application states you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + + + Get a report (ApplicationReport) of Applications matching the given + application types, application states and application tags in the cluster. +

+ +

+ If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

+ + @param applicationTypes set of application types you are interested in + @param applicationStates set of application states you are interested in + @param applicationTags set of application tags you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + + + + Get a report (ApplicationReport) of Applications matching the given users, + queues, application types and application states in the cluster. If any of + the params is set to null, it is not used when filtering. +

+ +

+ If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

+ + @param queues set of queues you are interested in + @param users set of users you are interested in + @param applicationTypes set of application types you are interested in + @param applicationStates set of application states you are interested in + @return a list of reports of applications + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get a list of ApplicationReports that match the given + {@link GetApplicationsRequest}. +

+ +

+ If the user does not have VIEW_APP access for an application + then the corresponding report will be filtered as described in + {@link #getApplicationReport(ApplicationId)}. +

+ + @param request the request object to get the list of applications. + @return The list of ApplicationReports that match the request + @throws YarnException Exception specific to YARN. + @throws IOException Exception mostly related to connection errors.]]> +
+
+ + + + + + Get metrics ({@link YarnClusterMetrics}) about the cluster. +

+ + @return cluster metrics + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get a report of nodes ({@link NodeReport}) in the cluster. +

+ + @param states The {@link NodeState}s to filter on. If no filter states are + given, nodes in all states will be returned. + @return A list of node reports + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get a delegation token so as to be able to talk to YARN using those tokens. + + @param renewer + Address of the renewer who can renew these tokens when needed by + securely talking to YARN. + @return a delegation token ({@link Token}) that can be used to + talk to YARN + @throws YarnException + @throws IOException]]> + + + + + + + + + Get information ({@link QueueInfo}) about a given queue. +

+ + @param queueName + Name of the queue whose information is needed + @return queue information + @throws YarnException + in case of errors or if YARN rejects the request due to + access-control restrictions. + @throws IOException]]> +
+
+ + + + + + Get information ({@link QueueInfo}) about all queues, recursively if there + is a hierarchy +

+ + @return a list of queue-information for all queues + @throws YarnException + @throws IOException]]> +
+
+ + + + + + Get information ({@link QueueInfo}) about top level queues. +

+ + @return a list of queue-information for all the top-level queues + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get information ({@link QueueInfo}) about all the immediate children queues + of the given queue +

+ + @param parent + Name of the queue whose child-queues' information is needed + @return a list of queue-information for all queues who are direct children + of the given parent queue. + @throws YarnException + @throws IOException]]> +
+
+ + + + + + Get information about acls for current user on all the + existing queues. +

+ + @return a list of queue acls ({@link QueueUserACLInfo}) for + current user + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get a report of the given ApplicationAttempt. +

+ +

+ In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

+ + @param applicationAttemptId + {@link ApplicationAttemptId} of the application attempt that needs + a report + @return application attempt report + @throws YarnException + @throws ApplicationAttemptNotFoundException if application attempt + not found + @throws IOException]]> +
+
+ + + + + + + Get a report of all (ApplicationAttempts) of Application in the cluster. +

+ + @param applicationId application id of the app + @return a list of reports for all application attempts for specified + application. + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + Get a report of the given Container. +

+ +

+ In secure mode, YARN verifies access to the application, queue + etc. before accepting the request. +

+ + @param containerId + {@link ContainerId} of the container that needs a report + @return container report + @throws YarnException + @throws ContainerNotFoundException if container not found. + @throws IOException]]> +
+
+ + + + + + + Get a report of all (Containers) of ApplicationAttempt in the cluster. +

+ + @param applicationAttemptId application attempt id + @return a list of reports of all containers for specified application + attempts + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + + Attempts to move the given application to the given queue. +

+ + @param appId + Application to move. + @param queue + Queue to place it in to. + @throws YarnException + @throws IOException]]> +
+
+ + + + + + Obtain a {@link GetNewReservationResponse} for a new reservation, + which contains the {@link ReservationId} object. +

+ + @return The {@link GetNewReservationResponse} containing a new + {@link ReservationId} object. + @throws YarnException if reservation cannot be created. + @throws IOException if reservation cannot be created.]]> +
+
+ + + + + + + The interface used by clients to submit a new reservation to the + {@code ResourceManager}. +

+ +

+ The client packages all details of its request in a + {@link ReservationSubmissionRequest} object. This contains information + about the amount of capacity, temporal constraints, and gang needs. + Furthermore, the reservation might be composed of multiple stages, with + ordering dependencies among them. +

+ +

+ In order to respond, a new admission control component in the + {@code ResourceManager} performs an analysis of the resources that have + been committed over the period of time the user is requesting, verify that + the user requests can be fulfilled, and that it respect a sharing policy + (e.g., {@code CapacityOverTimePolicy}). Once it has positively determined + that the ReservationRequest is satisfiable the {@code ResourceManager} + answers with a {@link ReservationSubmissionResponse} that includes a + {@link ReservationId}. Upon failure to find a valid allocation the response + is an exception with the message detailing the reason of failure. +

+ +

+ The semantics guarantees that the {@link ReservationId} returned, + corresponds to a valid reservation existing in the time-range request by + the user. The amount of capacity dedicated to such reservation can vary + overtime, depending of the allocation that has been determined. But it is + guaranteed to satisfy all the constraint expressed by the user in the + {@link ReservationDefinition} +

+ + @param request request to submit a new Reservation + @return response contains the {@link ReservationId} on accepting the + submission + @throws YarnException if the reservation cannot be created successfully + @throws IOException]]> +
+
+ + + + + + + The interface used by clients to update an existing Reservation. This is + referred to as a re-negotiation process, in which a user that has + previously submitted a Reservation. +

+ +

+ The allocation is attempted by virtually substituting all previous + allocations related to this Reservation with new ones, that satisfy the new + {@link ReservationDefinition}. Upon success the previous allocation is + atomically substituted by the new one, and on failure (i.e., if the system + cannot find a valid allocation for the updated request), the previous + allocation remains valid. +

+ + @param request to update an existing Reservation (the + {@link ReservationUpdateRequest} should refer to an existing valid + {@link ReservationId}) + @return response empty on successfully updating the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + updated successfully + @throws IOException]]> +
+
+ + + + + + + The interface used by clients to remove an existing Reservation. +

+ + @param request to remove an existing Reservation (the + {@link ReservationDeleteRequest} should refer to an existing valid + {@link ReservationId}) + @return response empty on successfully deleting the existing reservation + @throws YarnException if the request is invalid or reservation cannot be + deleted successfully + @throws IOException]]> +
+
+ + + + + + + The interface used by clients to get the list of reservations in a plan. + The reservationId will be used to search for reservations to list if it is + provided. Otherwise, it will select active reservations within the + startTime and endTime (inclusive). +

+ + @param request to list reservations in a plan. Contains fields to select + String queue, ReservationId reservationId, long startTime, + long endTime, and a bool includeReservationAllocations. + + queue: Required. Cannot be null or empty. Refers to the + reservable queue in the scheduler that was selected when + creating a reservation submission + {@link ReservationSubmissionRequest}. + + reservationId: Optional. If provided, other fields will + be ignored. + + startTime: Optional. If provided, only reservations that + end after the startTime will be selected. This defaults + to 0 if an invalid number is used. + + endTime: Optional. If provided, only reservations that + start on or before endTime will be selected. This defaults + to Long.MAX_VALUE if an invalid number is used. + + includeReservationAllocations: Optional. Flag that + determines whether the entire reservation allocations are + to be returned. Reservation allocations are subject to + change in the event of re-planning as described by + {@link ReservationDefinition}. + + @return response that contains information about reservations that are + being searched for. + @throws YarnException if the request is invalid + @throws IOException if the request failed otherwise]]> +
+
+ + + + + + The interface used by client to get node to labels mappings in existing cluster +

+ + @return node to labels mappings + @throws YarnException + @throws IOException]]> +
+
+ + + + + + The interface used by client to get labels to nodes mapping + in existing cluster +

+ + @return node to labels mappings + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + The interface used by client to get labels to nodes mapping + for specified labels in existing cluster +

+ + @param labels labels for which labels to nodes mapping has to be retrieved + @return labels to nodes mappings for specific labels + @throws YarnException + @throws IOException]]> +
+
+ + + + + + The interface used by client to get node labels in the cluster +

+ + @return cluster node labels collection + @throws YarnException when there is a failure in + {@link ApplicationClientProtocol} + @throws IOException when there is a failure in + {@link ApplicationClientProtocol}]]> +
+
+ + + + + + + + The interface used by client to set priority of an application +

+ @param applicationId + @param priority + @return updated priority of an application. + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + + Signal a container identified by given ID. +

+ + @param containerId + {@link ContainerId} of the container that needs to be signaled + @param command the signal container command + @throws YarnException + @throws IOException]]> +
+
+ + + + + + + + + + + Get the resource profiles available in the RM. +

+ @return a Map of the resource profile names to their capabilities + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other errors]]> +
+
+ + + + + + + Get the details of a specific resource profile from the RM. +

+ @param profile the profile name + @return resource profile name with its capabilities + @throws YARNFeatureNotEnabledException if resource-profile is disabled + @throws YarnException if any error happens inside YARN + @throws IOException in case of other others]]> +
+
+ + + + + + Get available resource types supported by RM. +

+ @return list of supported resource types with detailed information + @throws YarnException if any issue happens inside YARN + @throws IOException in case of other others]]> +
+
+
+ + + + + + + + + + + +
+ + + + + + + + + + + + + + + + Create a new instance of AMRMClientAsync.

+ + @param intervalMs heartbeat interval in milliseconds between AM and RM + @param callbackHandler callback handler that processes responses from + the ResourceManager]]> +
+
+ + + + + + Create a new instance of AMRMClientAsync.

+ + @param client the AMRMClient instance + @param intervalMs heartbeat interval in milliseconds between AM and RM + @param callbackHandler callback handler that processes responses from + the ResourceManager]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RegisterApplicationMasterResponse + @throws YarnException + @throws IOException]]> + + + + + + + + + + + + + + + + allocate + @param req Resource request]]> + + + + + + + + + + + + + allocate. + Any previous pending resource change request of the same container will be + removed. + + Application that calls this method is expected to maintain the + Containers that are returned from previous successful + allocations or resource changes. By passing in the existing container and a + target resource capability to this method, the application requests the + ResourceManager to change the existing resource allocation to the target + resource allocation. + + @deprecated use + {@link #requestContainerUpdate(Container, UpdateContainerRequest)} + + @param container The container returned from the last successful resource + allocation or resource change + @param capability The target resource capability of the container]]> + + + + + + + allocate. + Any previous pending update request of the same container will be + removed. + + @param container The container returned from the last successful resource + allocation or update + @param updateContainerRequest The UpdateContainerRequest.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + check to return true for each 1000 ms. + See also {@link #waitFor(java.util.function.Supplier, int)} + and {@link #waitFor(java.util.function.Supplier, int, int)} + @param check the condition for which it should wait]]> + + + + + + + + check to return true for each + checkEveryMillis ms. + See also {@link #waitFor(java.util.function.Supplier, int, int)} + @param check user defined checker + @param checkEveryMillis interval to call check]]> + + + + + + + + + check to return true for each + checkEveryMillis ms. In the main loop, this method will log + the message "waiting in main loop" for each logInterval times + iteration to confirm the thread is alive. + @param check user defined checker + @param checkEveryMillis interval to call check + @param logInterval interval to log for each]]> + + + + + + + + + + AMRMClientAsync handles communication with the ResourceManager + and provides asynchronous updates on events such as container allocations and + completions. It contains a thread that sends periodic heartbeats to the + ResourceManager. + + It should be used by implementing a CallbackHandler: +
+ {@code
+ class MyCallbackHandler extends AMRMClientAsync.AbstractCallbackHandler {
+   public void onContainersAllocated(List containers) {
+     [run tasks on the containers]
+   }
+
+   public void onContainersUpdated(List containers) {
+     [determine if resource allocation of containers have been increased in
+      the ResourceManager, and if so, inform the NodeManagers to increase the
+      resource monitor/enforcement on the containers]
+   }
+
+   public void onContainersCompleted(List statuses) {
+     [update progress, check whether app is done]
+   }
+   
+   public void onNodesUpdated(List updated) {}
+   
+   public void onReboot() {}
+ }
+ }
+ 
+ + The client's lifecycle should be managed similarly to the following: + +
+ {@code
+ AMRMClientAsync asyncClient = 
+     createAMRMClientAsync(appAttId, 1000, new MyCallbackhandler());
+ asyncClient.init(conf);
+ asyncClient.start();
+ RegisterApplicationMasterResponse response = asyncClient
+    .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
+       appMasterTrackingUrl);
+ asyncClient.addContainerRequest(containerRequest);
+ [... wait for application to complete]
+ asyncClient.unregisterApplicationMaster(status, appMsg, trackingUrl);
+ asyncClient.stop();
+ }
+ 
]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Update the resources of a container.

+ +

The ApplicationMaster or other applications that use the + client must provide the details of the container, including the Id and + the target resource encapsulated in the updated container token via + {@link Container}. +

+ + @param container the container with updated token.]]> +
+
+ + + + + + Re-Initialize the Container.

+ + @param containerId the Id of the container to Re-Initialize. + @param containerLaunchContex the updated ContainerLaunchContext. + @param autoCommit commit re-initialization automatically ?]]> +
+
+ + + + Restart the specified container.

+ + @param containerId the Id of the container to restart.]]> +
+
+ + + + Rollback last reInitialization of the specified container.

+ + @param containerId the Id of the container to restart.]]> +
+
+ + + + Commit last reInitialization of the specified container.

+ + @param containerId the Id of the container to commit reInitialize.]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + NMClientAsync handles communication with all the NodeManagers + and provides asynchronous updates on getting responses from them. It + maintains a thread pool to communicate with individual NMs where a number of + worker threads process requests to NMs by using {@link NMClientImpl}. The max + size of the thread pool is configurable through + {@link YarnConfiguration#NM_CLIENT_ASYNC_THREAD_POOL_MAX_SIZE}. + + It should be used in conjunction with a CallbackHandler. For example + +
+ {@code
+ class MyCallbackHandler extends NMClientAsync.AbstractCallbackHandler {
+   public void onContainerStarted(ContainerId containerId,
+       Map allServiceResponse) {
+     [post process after the container is started, process the response]
+   }
+
+   public void onContainerResourceIncreased(ContainerId containerId,
+       Resource resource) {
+     [post process after the container resource is increased]
+   }
+
+   public void onContainerStatusReceived(ContainerId containerId,
+       ContainerStatus containerStatus) {
+     [make use of the status of the container]
+   }
+
+   public void onContainerStopped(ContainerId containerId) {
+     [post process after the container is stopped]
+   }
+
+   public void onStartContainerError(
+       ContainerId containerId, Throwable t) {
+     [handle the raised exception]
+   }
+
+   public void onGetContainerStatusError(
+       ContainerId containerId, Throwable t) {
+     [handle the raised exception]
+   }
+
+   public void onStopContainerError(
+       ContainerId containerId, Throwable t) {
+     [handle the raised exception]
+   }
+ }
+ }
+ 
+ + The client's life-cycle should be managed like the following: + +
+ {@code
+ NMClientAsync asyncClient = 
+     NMClientAsync.createNMClientAsync(new MyCallbackhandler());
+ asyncClient.init(conf);
+ asyncClient.start();
+ asyncClient.startContainer(container, containerLaunchContext);
+ [... wait for container being started]
+ asyncClient.getContainerStatus(container.getId(), container.getNodeId(),
+     container.getContainerToken());
+ [... handle the status in the callback instance]
+ asyncClient.stopContainer(container.getId(), container.getNodeId(),
+     container.getContainerToken());
+ [... wait for container being stopped]
+ asyncClient.stop();
+ }
+ 
]]> +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.1.1.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.1.1.xml new file mode 100644 index 00000000000..d55be8d1358 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.1.1.xml @@ -0,0 +1,3327 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Type of proxy. + @return Proxy to the ResourceManager for the specified client protocol. + @throws IOException]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Create a new instance of AppAdminClient. +

+ + @param appType application type + @param conf configuration + @return app admin client]]> +
+
+ + + + + + + + + + Launch a new YARN application. +

+ + @param fileName specification of application + @param appName name of the application + @param lifetime lifetime of the application + @param queue queue of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
+
+ + + + + + + Stop a YARN application (attempt to stop gracefully before killing the + application). In the case of a long-running service, the service may be + restarted later. +

+ + @param appName the name of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
+
+ + + + + + + Start a YARN application from a previously saved specification. In the + case of a long-running service, the service must have been previously + launched/started and then stopped, or previously saved but not started. +

+ + @param appName the name of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
+
+ + + + + + + + + + Save the specification for a YARN application / long-running service. + The application may be started later. +

+ + @param fileName specification of application to save + @param appName name of the application + @param lifetime lifetime of the application + @param queue queue of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
+
+ + + + + + + Remove the specification and all application data for a YARN application. + The application cannot be running. +

+ + @param appName the name of the application + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
+
+ + + + + + + + Change the number of running containers for a component of a YARN + application / long-running service. +

+ + @param appName the name of the application + @param componentCounts map of component name to new component count or + amount to change existing component count (e.g. + 5, +5, -5) + @return exit code + @throws IOException IOException + @throws YarnException exception in client or server]]> +
+
+ + + + + + + Upload AM dependencies to HDFS. This makes future application launches + faster since the dependencies do not have to be uploaded on each launch. +

+ + @param destinationFolder + an optional HDFS folder where dependency tarball will be uploaded + @return exit code + @throws IOException + IOException + @throws YarnException + exception in client or server]]> +
+
+ + + + + + + Get detailed app specific status string for a YARN application. +

+ + @param appIdOrName appId or appName + @return status string + @throws IOException IOException + @throws YarnException exception in client or server]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + Send the information of a number of conceptual entities to the timeline + server. It is a blocking API. The method will not return until it gets the + response from the timeline server. +

+ + @param entities + the collection of {@link TimelineEntity} + @return the error information if the sent entities are not correctly stored + @throws IOException if there are I/O errors + @throws YarnException if entities are incomplete/invalid]]> +
+
+ + + + + + + + + Send the information of a number of conceptual entities to the timeline + server. It is a blocking API. The method will not return until it gets the + response from the timeline server. + + This API is only for timeline service v1.5 +

+ + @param appAttemptId {@link ApplicationAttemptId} + @param groupId {@link TimelineEntityGroupId} + @param entities + the collection of {@link TimelineEntity} + @return the error information if the sent entities are not correctly stored + @throws IOException if there are I/O errors + @throws YarnException if entities are incomplete/invalid]]> +
+
+ + + + + + + Send the information of a domain to the timeline server. It is a + blocking API. The method will not return until it gets the response from + the timeline server. +

+ + @param domain + an {@link TimelineDomain} object + @throws IOException + @throws YarnException]]> +
+
+ + + + + + + + Send the information of a domain to the timeline server. It is a + blocking API. The method will not return until it gets the response from + the timeline server. + + This API is only for timeline service v1.5 +

+ + @param domain + an {@link TimelineDomain} object + @param appAttemptId {@link ApplicationAttemptId} + @throws IOException + @throws YarnException]]> +
+
+ + + + + + + Get a delegation token so as to be able to talk to the timeline server in a + secure way. +

+ + @param renewer + Address of the renewer who can renew these tokens when needed by + securely talking to the timeline server + @return a delegation token ({@link Token}) that can be used to talk to the + timeline server + @throws IOException + @throws YarnException]]> +
+
+ + + + + + + Renew a timeline delegation token. +

+ + @param timelineDT + the delegation token to renew + @return the new expiration time + @throws IOException + @throws YarnException]]> +
+
+ + + + + + + Cancel a timeline delegation token. +

+ + @param timelineDT + the delegation token to cancel + @throws IOException + @throws YarnException]]> +
+
+ + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + parameterized event of type T]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InputStream to be checksumed + @return the message digest of the input stream + @throws IOException]]> + + + + + + + + + + + + SharedCacheChecksum object based on the configurable + algorithm implementation + (see yarn.sharedcache.checksum.algo.impl) + + @return SharedCacheChecksum object]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The object type on which this state machine operates. + @param The state of the entity. + @param The external eventType to be handled. + @param The event object.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + When {@link #limit} would be reached on append, past messages will be + truncated from head, and a header telling the user about truncation will be + prepended, with ellipses in between header and messages. +

+ Note that header and ellipses are not counted against {@link #limit}. +

+ An example: + +

+ {@code
+   // At the beginning it's an empty string
+   final Appendable shortAppender = new BoundedAppender(80);
+   // The whole message fits into limit
+   shortAppender.append(
+       "message1 this is a very long message but fitting into limit\n");
+   // The first message is truncated, the second not
+   shortAppender.append("message2 this is shorter than the previous one\n");
+   // The first message is deleted, the second truncated, the third
+   // preserved
+   shortAppender.append("message3 this is even shorter message, maybe.\n");
+   // The first two are deleted, the third one truncated, the last preserved
+   shortAppender.append("message4 the shortest one, yet the greatest :)");
+   // Current contents are like this:
+   // Diagnostic messages truncated, showing last 80 chars out of 199:
+   // ...s is even shorter message, maybe.
+   // message4 the shortest one, yet the greatest :)
+ }
+ 
+

+ Note that null values are {@link #append(CharSequence) append}ed + just like in {@link StringBuilder#append(CharSequence) original + implementation}. +

+ Note that this class is not thread safe.]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java index 3c4e4d01002..941a688134f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java @@ -27,8 +27,12 @@ import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAttributesToNodesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAttributesToNodesResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeAttributesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeAttributesResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeLabelsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeLabelsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; @@ -39,6 +43,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetNewReservationRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetNewReservationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToAttributesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToAttributesResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToLabelsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToLabelsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; @@ -642,4 +648,53 @@ GetResourceProfileResponse getResourceProfile( @Unstable GetAllResourceTypeInfoResponse getResourceTypeInfo( GetAllResourceTypeInfoRequest request) throws YarnException, IOException; + + /** + *

+ * The interface used by client to get attributes to nodes mappings + * available in ResourceManager. + *

+ * + * @param request request to get details of attributes to nodes mapping. + * @return Response containing the details of attributes to nodes mappings. + * @throws YarnException if any error happens inside YARN + * @throws IOException incase of other errors + */ + @Public + @Unstable + GetAttributesToNodesResponse getAttributesToNodes( + GetAttributesToNodesRequest request) throws YarnException, IOException; + + /** + *

+ * The interface used by client to get node attributes available in + * ResourceManager. + *

+ * + * @param request request to get node attributes collection of this cluster. + * @return Response containing node attributes collection. + * @throws YarnException if any error happens inside YARN. + * @throws IOException incase of other errors. + */ + @Public + @Unstable + GetClusterNodeAttributesResponse getClusterNodeAttributes( + GetClusterNodeAttributesRequest request) + throws YarnException, IOException; + + /** + *

+ * The interface used by client to get node to attributes mappings. + * in existing cluster. + *

+ * + * @param request request to get nodes to attributes mapping. + * @return nodes to attributes mappings. + * @throws YarnException if any error happens inside YARN. + * @throws IOException + */ + @Public + @Unstable + GetNodesToAttributesResponse getNodesToAttributes( + GetNodesToAttributesRequest request) throws YarnException, IOException; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateRequest.java index eee50e3c1e5..799088b5437 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateRequest.java @@ -73,7 +73,21 @@ public static AllocateRequest newInstance(int responseID, float appProgress, .releaseList(containersToBeReleased) .resourceBlacklistRequest(resourceBlacklistRequest).build(); } - + + @Public + @Unstable + public static AllocateRequest newInstance(int responseID, float appProgress, + List resourceAsk, + List containersToBeReleased, + ResourceBlacklistRequest resourceBlacklistRequest, + String trackingUrl) { + return AllocateRequest.newBuilder().responseId(responseID) + .progress(appProgress).askList(resourceAsk) + .releaseList(containersToBeReleased) + .resourceBlacklistRequest(resourceBlacklistRequest) + .trackingUrl(trackingUrl).build(); + } + @Public @Unstable public static AllocateRequest newInstance(int responseID, float appProgress, @@ -240,6 +254,22 @@ public void setSchedulingRequests( List schedulingRequests) { } + /** + * Get the tracking url update for this heartbeat. + * @return tracking url to update this application with + */ + @Public + @Unstable + public abstract String getTrackingUrl(); + + /** + * Set the new tracking url for this application. + * @param trackingUrl the new tracking url + */ + @Public + @Unstable + public abstract void setTrackingUrl(String trackingUrl); + @Public @Unstable public static AllocateRequestBuilder newBuilder() { @@ -355,6 +385,19 @@ public AllocateRequestBuilder schedulingRequests( return this; } + /** + * Set the trackingUrl of the request. + * @see AllocateRequest#setTrackingUrl(String) + * @param trackingUrl new tracking url + * @return {@link AllocateRequestBuilder} + */ + @Public + @Unstable + public AllocateRequestBuilder trackingUrl(String trackingUrl) { + allocateRequest.setTrackingUrl(trackingUrl); + return this; + } + /** * Return generated {@link AllocateRequest} object. * @return {@link AllocateRequest} @@ -365,4 +408,4 @@ public AllocateRequest build() { return allocateRequest; } } -} \ No newline at end of file +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAttributesToNodesRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAttributesToNodesRequest.java new file mode 100644 index 00000000000..28e4db6ee83 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAttributesToNodesRequest.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.api.protocolrecords; + +import java.util.Set; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.api.records.NodeAttributeKey; +import org.apache.hadoop.yarn.util.Records; + +/** + *

+ * The request from clients to get node to attribute value mapping for all or + * given set of Node AttributeKey's in the cluster from the + * ResourceManager. + *

+ * + * @see ApplicationClientProtocol#getAttributesToNodes + * (GetAttributesToNodesRequest) + */ +@Public +@Evolving +public abstract class GetAttributesToNodesRequest { + + public static GetAttributesToNodesRequest newInstance() { + return Records.newRecord(GetAttributesToNodesRequest.class); + } + + public static GetAttributesToNodesRequest newInstance( + Set attributes) { + GetAttributesToNodesRequest request = + Records.newRecord(GetAttributesToNodesRequest.class); + request.setNodeAttributes(attributes); + return request; + } + + /** + * Set node attributeKeys for which the mapping of hostname to attribute value + * is required. + * + * @param attributes Set of NodeAttributeKey provided. + */ + @Public + @Unstable + public abstract void setNodeAttributes(Set attributes); + + /** + * Get node attributeKeys for which mapping of hostname to attribute value is + * required. + * + * @return Set of NodeAttributeKey + */ + @Public + @Unstable + public abstract Set getNodeAttributes(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAttributesToNodesResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAttributesToNodesResponse.java new file mode 100644 index 00000000000..045deac7b82 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAttributesToNodesResponse.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.api.protocolrecords; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.api.records.NodeAttributeKey; +import org.apache.hadoop.yarn.api.records.NodeToAttributeValue; +import org.apache.hadoop.yarn.util.Records; + +/** + *

+ * The response sent by the ResourceManager to a client requesting + * node to attribute value mapping for all or given set of Node AttributeKey's. + *

+ * + * @see ApplicationClientProtocol#getAttributesToNodes + * (GetAttributesToNodesRequest) + */ +@Public +@Evolving +public abstract class GetAttributesToNodesResponse { + public static GetAttributesToNodesResponse newInstance( + Map> map) { + GetAttributesToNodesResponse response = + Records.newRecord(GetAttributesToNodesResponse.class); + response.setAttributeToNodes(map); + return response; + } + + @Public + @Evolving + public abstract void setAttributeToNodes( + Map> map); + + /** + * Get mapping of NodeAttributeKey to its associated mapping of list of + * NodeToAttributeValue associated with attribute. + * + * @return Map of node attributes to list of NodeToAttributeValue. + */ + @Public + @Evolving + public abstract Map> getAttributesToNodes(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetClusterNodeAttributesRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetClusterNodeAttributesRequest.java new file mode 100644 index 00000000000..ca81f9a0841 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetClusterNodeAttributesRequest.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import static org.apache.hadoop.classification.InterfaceAudience.*; +import static org.apache.hadoop.classification.InterfaceStability.*; +import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.util.Records; + +/** + *

+ * The request from clients to get node attributes in the cluster from the + * ResourceManager. + *

+ * + * @see ApplicationClientProtocol#getClusterNodeAttributes + * (GetClusterNodeAttributesRequest) + */ +@Public +@Evolving +public abstract class GetClusterNodeAttributesRequest { + + /** + * Create new instance of GetClusterNodeAttributesRequest. + * + * @return GetClusterNodeAttributesRequest is returned. + */ + public static GetClusterNodeAttributesRequest newInstance() { + return Records.newRecord(GetClusterNodeAttributesRequest.class); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetClusterNodeAttributesResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetClusterNodeAttributesResponse.java new file mode 100644 index 00000000000..b0ccd906a32 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetClusterNodeAttributesResponse.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.api.protocolrecords; + +import java.util.Set; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.api.records.NodeAttributeInfo; +import org.apache.hadoop.yarn.util.Records; + +/** + *

+ * The response sent by the ResourceManager to a client requesting + * a node attributes in cluster. + *

+ * + * @see ApplicationClientProtocol#getClusterNodeAttributes + * (GetClusterNodeAttributesRequest) + */ +@Public +@Evolving +public abstract class GetClusterNodeAttributesResponse { + + /** + * Create instance of GetClusterNodeAttributesResponse. + * + * @param attributes + * @return GetClusterNodeAttributesResponse. + */ + public static GetClusterNodeAttributesResponse newInstance( + Set attributes) { + GetClusterNodeAttributesResponse response = + Records.newRecord(GetClusterNodeAttributesResponse.class); + response.setNodeAttributes(attributes); + return response; + } + + /** + * Set node attributes to the response. + * + * @param attributes Map of Node attributeKey to Type. + */ + @Public + @Unstable + public abstract void setNodeAttributes(Set attributes); + + /** + * Get node attributes from the response. + * + * @return Node attributes. + */ + @Public + @Unstable + public abstract Set getNodeAttributes(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToAttributesRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToAttributesRequest.java new file mode 100644 index 00000000000..4fcd8da6936 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToAttributesRequest.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.util.Records; + +import java.util.Set; + +/** + *

+ * The request from clients to get nodes to attributes mapping + * in the cluster from the ResourceManager. + *

+ * + * @see ApplicationClientProtocol#getNodesToAttributes + * (GetNodesToAttributesRequest) + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class GetNodesToAttributesRequest { + + public static GetNodesToAttributesRequest newInstance(Set hostNames) { + GetNodesToAttributesRequest request = + Records.newRecord(GetNodesToAttributesRequest.class); + request.setHostNames(hostNames); + return request; + } + + /** + * Set hostnames for which mapping is required. + * + * @param hostnames + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public abstract void setHostNames(Set hostnames); + + /** + * Get hostnames for which mapping is required. + * + * @return Set of hostnames. + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public abstract Set getHostNames(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToAttributesResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToAttributesResponse.java new file mode 100644 index 00000000000..a82a3f99be4 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToAttributesResponse.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.api.records.NodeAttribute; +import org.apache.hadoop.yarn.util.Records; + +import java.util.Map; +import java.util.Set; + +/** + *

+ * The response sent by the ResourceManager to a client requesting + * nodes to attributes mapping. + *

+ * + * @see ApplicationClientProtocol#getNodesToAttributes + * (GetNodesToAttributesRequest) + */ +@Public +@Evolving +public abstract class GetNodesToAttributesResponse { + + public static GetNodesToAttributesResponse newInstance( + Map> map) { + GetNodesToAttributesResponse response = + Records.newRecord(GetNodesToAttributesResponse.class); + response.setNodeToAttributes(map); + return response; + } + + @Public + @Evolving + public abstract void setNodeToAttributes(Map> map); + + /** + * Get hostnames to NodeAttributes mapping. + * + * @return Map of host to attributes. + */ + @Public + @Evolving + public abstract Map> getNodeToAttributes(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttribute.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttribute.java new file mode 100644 index 00000000000..70649390821 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttribute.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.util.Records; + +/** + *

+ * Node Attribute is a kind of a label which represents one of the + * attribute/feature of a Node. Its different from node partition label as + * resource guarantees across the queues will not be maintained for these type + * of labels. + *

+ *

+ * A given Node can be mapped with any kind of attribute, few examples are + * HAS_SSD=true, JAVA_VERSION=JDK1.8, OS_TYPE=WINDOWS. + *

+ *

+ * Its not compulsory for all the attributes to have value, empty string is the + * default value of the NodeAttributeType.STRING + *

+ *

+ * Node Attribute Prefix is used as namespace to segregate the attributes. + *

+ */ +@Public +@Unstable +public abstract class NodeAttribute { + + public static final String PREFIX_DISTRIBUTED = "nm.yarn.io"; + public static final String PREFIX_CENTRALIZED = "rm.yarn.io"; + + public static NodeAttribute newInstance(String attributeName, + NodeAttributeType attributeType, String attributeValue) { + return newInstance(PREFIX_CENTRALIZED, attributeName, attributeType, + attributeValue); + } + + public static NodeAttribute newInstance(String attributePrefix, + String attributeName, NodeAttributeType attributeType, + String attributeValue) { + NodeAttribute nodeAttribute = Records.newRecord(NodeAttribute.class); + NodeAttributeKey nodeAttributeKey = + NodeAttributeKey.newInstance(attributePrefix, attributeName); + nodeAttribute.setAttributeKey(nodeAttributeKey); + nodeAttribute.setAttributeType(attributeType); + nodeAttribute.setAttributeValue(attributeValue); + return nodeAttribute; + } + + @Public + @Unstable + public abstract NodeAttributeKey getAttributeKey(); + + @Public + @Unstable + public abstract void setAttributeKey(NodeAttributeKey attributeKey); + + @Public + @Unstable + public abstract String getAttributeValue(); + + @Public + @Unstable + public abstract void setAttributeValue(String attributeValue); + + @Public + @Unstable + public abstract NodeAttributeType getAttributeType(); + + @Public + @Unstable + public abstract void setAttributeType(NodeAttributeType attributeType); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeInfo.java new file mode 100644 index 00000000000..d294333ed1e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeInfo.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.util.Records; + +/** + *

+ * Node Attribute Info describes a NodeAttribute. + *

+ */ +@Public +@Unstable +public abstract class NodeAttributeInfo { + + public static NodeAttributeInfo newInstance(NodeAttribute nodeAttribute) { + return newInstance(nodeAttribute.getAttributeKey(), + nodeAttribute.getAttributeType()); + } + + public static NodeAttributeInfo newInstance(NodeAttributeKey nodeAttributeKey, + NodeAttributeType attributeType) { + NodeAttributeInfo nodeAttribute = + Records.newRecord(NodeAttributeInfo.class); + nodeAttribute.setAttributeKey(nodeAttributeKey); + nodeAttribute.setAttributeType(attributeType); + return nodeAttribute; + } + + @Public + @Unstable + public abstract NodeAttributeKey getAttributeKey(); + + @Public + @Unstable + public abstract void setAttributeKey(NodeAttributeKey attributeKey); + + @Public + @Unstable + public abstract NodeAttributeType getAttributeType(); + + @Public + @Unstable + public abstract void setAttributeType(NodeAttributeType attributeType); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeKey.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeKey.java new file mode 100644 index 00000000000..35ff26f07f1 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeKey.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.util.Records; + +/** + *

+ * Node AttributeKey uniquely identifies a given Node Attribute. Node Attribute + * is identified based on attribute prefix and name. + *

+ *

+ * Node Attribute Prefix is used as namespace to segregate the attributes. + *

+ */ +@Public +@Unstable +public abstract class NodeAttributeKey { + + public static NodeAttributeKey newInstance(String attributeName) { + return newInstance(NodeAttribute.PREFIX_CENTRALIZED, attributeName); + } + + public static NodeAttributeKey newInstance(String attributePrefix, + String attributeName) { + NodeAttributeKey nodeAttributeKey = + Records.newRecord(NodeAttributeKey.class); + nodeAttributeKey.setAttributePrefix(attributePrefix); + nodeAttributeKey.setAttributeName(attributeName); + return nodeAttributeKey; + } + + @Public + @Unstable + public abstract String getAttributePrefix(); + + @Public + @Unstable + public abstract void setAttributePrefix(String attributePrefix); + + @Public + @Unstable + public abstract String getAttributeName(); + + @Public + @Unstable + public abstract void setAttributeName(String attributeName); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeOpCode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeOpCode.java new file mode 100644 index 00000000000..76db063eed5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeOpCode.java @@ -0,0 +1,43 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; + +/** + * Enumeration of various node attribute op codes. + */ +@Public +@Evolving +public enum NodeAttributeOpCode { + /** + * Default as No OP. + */ + NO_OP, + /** + * EQUALS op code for Attribute. + */ + EQ, + + /** + * NOT EQUALS op code for Attribute. + */ + NE +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeType.java new file mode 100644 index 00000000000..3f281c81b19 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeAttributeType.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; + +/** + *

+ * Type of a node Attribute. + *

+ * Based on this attribute expressions and values will be evaluated. + */ +@Public +@Unstable +public enum NodeAttributeType { + /** string type node attribute. */ + STRING +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeReport.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeReport.java index 3a80641bb6d..625ad234081 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeReport.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeReport.java @@ -258,4 +258,17 @@ public NodeUpdateType getNodeUpdateType() { * Set the node update type (null indicates absent node update type). * */ public void setNodeUpdateType(NodeUpdateType nodeUpdateType) {} + + /** + * Set the node attributes of node. + * + * @param nodeAttributes set of node attributes. + */ + public abstract void setNodeAttributes(Set nodeAttributes); + + /** + * Get node attributes of node. + * @return the set of node attributes. + */ + public abstract Set getNodeAttributes(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeToAttributeValue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeToAttributeValue.java new file mode 100644 index 00000000000..0bcb8b68b41 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/NodeToAttributeValue.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.util.Records; + +/** + *

+ * Mapping of Attribute Value to a Node. + *

+ */ +@Public +@Unstable +public abstract class NodeToAttributeValue { + public static NodeToAttributeValue newInstance(String hostname, + String attributeValue) { + NodeToAttributeValue nodeToAttributeValue = + Records.newRecord(NodeToAttributeValue.class); + nodeToAttributeValue.setAttributeValue(attributeValue); + nodeToAttributeValue.setHostname(hostname); + return nodeToAttributeValue; + } + + @Public + @Unstable + public abstract String getAttributeValue(); + + @Public + @Unstable + public abstract void setAttributeValue(String attributeValue); + + @Public + @Unstable + public abstract String getHostname(); + + @Public + @Unstable + public abstract void setHostname(String hostname); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java index 173d4c9e777..7740354c70b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java @@ -76,17 +76,6 @@ @Private public static final int VCORES_INDEX = 1; - /** - * Return a new {@link Resource} instance with all resource values - * initialized to {@code value}. - * @param value the value to use for all resources - * @return a new {@link Resource} instance - */ - @Private - @Unstable - public static Resource newInstance(long value) { - return new LightWeightResource(value); - } @Public @Stable @@ -268,18 +257,15 @@ public void setMemorySize(long memory) { * * @param resource name of the resource * @return the ResourceInformation object for the resource - * @throws ResourceNotFoundException if the resource can't be found */ @Public @InterfaceStability.Unstable - public ResourceInformation getResourceInformation(String resource) - throws ResourceNotFoundException { + public ResourceInformation getResourceInformation(String resource) { Integer index = ResourceUtils.getResourceTypeIndex().get(resource); if (index != null) { return resources[index]; } - throw new ResourceNotFoundException("Unknown resource '" + resource - + "'. Known resources are " + Arrays.toString(resources)); + throw new ResourceNotFoundException(this, resource); } /** @@ -310,12 +296,10 @@ public ResourceInformation getResourceInformation(int index) * * @param resource name of the resource * @return the value for the resource - * @throws ResourceNotFoundException if the resource can't be found */ @Public @InterfaceStability.Unstable - public long getResourceValue(String resource) - throws ResourceNotFoundException { + public long getResourceValue(String resource) { return getResourceInformation(resource).getValue(); } @@ -324,13 +308,11 @@ public long getResourceValue(String resource) * * @param resource the resource for which the ResourceInformation is provided * @param resourceInformation ResourceInformation object - * @throws ResourceNotFoundException if the resource is not found */ @Public @InterfaceStability.Unstable public void setResourceInformation(String resource, - ResourceInformation resourceInformation) - throws ResourceNotFoundException { + ResourceInformation resourceInformation) { if (resource.equals(ResourceInformation.MEMORY_URI)) { this.setMemorySize(resourceInformation.getValue()); return; @@ -359,8 +341,7 @@ public void setResourceInformation(int index, ResourceInformation resourceInformation) throws ResourceNotFoundException { if (index < 0 || index >= resources.length) { - throw new ResourceNotFoundException("Unknown resource at index '" + index - + "'. Valid resources are " + Arrays.toString(resources)); + throwExceptionWhenArrayOutOfBound(index); } ResourceInformation.copy(resourceInformation, resources[index]); } @@ -371,12 +352,10 @@ public void setResourceInformation(int index, * * @param resource the resource for which the value is provided. * @param value the value to set - * @throws ResourceNotFoundException if the resource is not found */ @Public @InterfaceStability.Unstable - public void setResourceValue(String resource, long value) - throws ResourceNotFoundException { + public void setResourceValue(String resource, long value) { if (resource.equals(ResourceInformation.MEMORY_URI)) { this.setMemorySize(value); return; @@ -418,7 +397,7 @@ public void setResourceValue(int index, long value) protected void throwExceptionWhenArrayOutOfBound(int index) { String exceptionMsg = String.format( "Trying to access ResourceInformation for given index=%d. " - + "Acceptable index range is [0,%d), please check double check " + + "Acceptable index range is [0,%d), please double check " + "configured resources in resource-types.xml", index, ResourceUtils.getNumberOfKnownResourceTypes()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java index 904ff4b4083..c83c3a22fda 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java @@ -202,7 +202,7 @@ public static ResourceInformation newInstance(String name, String units, ResourceInformation ret = new ResourceInformation(); ret.setName(name); ret.setResourceType(type); - ret.setUnits(units); + ret.setUnitsWithoutValidation(units); ret.setValue(value); ret.setMinimumAllocation(minimumAllocation); ret.setMaximumAllocation(maximumAllocation); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java index 77f77f312ce..02afe50cc23 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java @@ -64,22 +64,6 @@ private ResourceInformation memoryResInfo; private ResourceInformation vcoresResInfo; - /** - * Create a new {@link LightWeightResource} instance with all resource values - * initialized to {@code value}. - * @param value the value to use for all resources - */ - public LightWeightResource(long value) { - ResourceInformation[] types = ResourceUtils.getResourceTypesArray(); - initResourceInformations(value, value, types.length); - - for (int i = 2; i < types.length; i++) { - resources[i] = new ResourceInformation(); - ResourceInformation.copy(types[i], resources[i]); - resources[i].setValue(value); - } - } - public LightWeightResource(long memory, int vcores) { int numberOfKnownResourceTypes = ResourceUtils .getNumberOfKnownResourceTypes(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraint.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraint.java index 0fe8273e6d7..79196fbf851 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraint.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraint.java @@ -29,6 +29,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.records.NodeAttributeOpCode; /** * {@code PlacementConstraint} represents a placement constraint for a resource @@ -155,13 +156,22 @@ public String toString() { private int minCardinality; private int maxCardinality; private Set targetExpressions; + private NodeAttributeOpCode attributeOpCode; public SingleConstraint(String scope, int minCardinality, - int maxCardinality, Set targetExpressions) { + int maxCardinality, NodeAttributeOpCode opCode, + Set targetExpressions) { this.scope = scope; this.minCardinality = minCardinality; this.maxCardinality = maxCardinality; this.targetExpressions = targetExpressions; + this.attributeOpCode = opCode; + } + + public SingleConstraint(String scope, int minCardinality, + int maxCardinality, Set targetExpressions) { + this(scope, minCardinality, maxCardinality, NodeAttributeOpCode.NO_OP, + targetExpressions); } public SingleConstraint(String scope, int minC, int maxC, @@ -169,6 +179,13 @@ public SingleConstraint(String scope, int minC, int maxC, this(scope, minC, maxC, new HashSet<>(Arrays.asList(targetExpressions))); } + public SingleConstraint(String scope, int minC, int maxC, + NodeAttributeOpCode opCode, + TargetExpression... targetExpressions) { + this(scope, minC, maxC, opCode, + new HashSet<>(Arrays.asList(targetExpressions))); + } + /** * Get the scope of the constraint. * @@ -205,6 +222,15 @@ public int getMaxCardinality() { return targetExpressions; } + /** + * Get the NodeAttributeOpCode of the constraint. + * + * @return nodeAttribute Op Code + */ + public NodeAttributeOpCode getNodeAttributeOpCode() { + return attributeOpCode; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -225,6 +251,10 @@ public boolean equals(Object o) { if (!getScope().equals(that.getScope())) { return false; } + if (getNodeAttributeOpCode() != null && !getNodeAttributeOpCode() + .equals(that.getNodeAttributeOpCode())) { + return false; + } return getTargetExpressions().equals(that.getTargetExpressions()); } @@ -233,6 +263,7 @@ public int hashCode() { int result = getScope().hashCode(); result = 31 * result + getMinCardinality(); result = 31 * result + getMaxCardinality(); + result = 31 * result + getNodeAttributeOpCode().hashCode(); result = 31 * result + getTargetExpressions().hashCode(); return result; } @@ -259,6 +290,13 @@ public String toString() { .append(getScope()).append(",") .append(targetExpr) .toString()); + } else if (min == -1 && max == -1) { + // node attribute + targetConstraints.add(new StringBuilder() + .append(getScope()).append(",") + .append(getNodeAttributeOpCode()).append(",") + .append(targetExpr) + .toString()); } else { // cardinality targetConstraints.add(new StringBuilder() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraints.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraints.java index d22a6bd90c0..73fa328833f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraints.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraints.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.AllocationTagNamespaceType; +import org.apache.hadoop.yarn.api.records.NodeAttributeOpCode; import org.apache.hadoop.yarn.api.resource.PlacementConstraint.AbstractConstraint; import org.apache.hadoop.yarn.api.resource.PlacementConstraint.And; import org.apache.hadoop.yarn.api.resource.PlacementConstraint.DelayedOr; @@ -85,6 +86,24 @@ public static AbstractConstraint targetNotIn(String scope, return new SingleConstraint(scope, 0, 0, targetExpressions); } + /** + * Creates a constraint that requires allocations to be placed on nodes that + * belong to a scope (e.g., node or rack) that satisfy any of the + * target expressions based on node attribute op code. + * + * @param scope the scope within which the target expressions should not be + * true + * @param opCode Node Attribute code which could be equals, not equals. + * @param targetExpressions the expressions that need to not be true within + * the scope + * @return the resulting placement constraint + */ + public static AbstractConstraint targetNodeAttribute(String scope, + NodeAttributeOpCode opCode, + TargetExpression... targetExpressions) { + return new SingleConstraint(scope, -1, -1, opCode, targetExpressions); + } + /** * Creates a constraint that restricts the number of allocations within a * given scope (e.g., node or rack). diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 9156c2dff05..a82801d620e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -167,7 +167,7 @@ private static void addDeprecatedKeys() { public static final String DEFAULT_IPC_SERVER_FACTORY_CLASS = "org.apache.hadoop.yarn.factories.impl.pb.RpcServerFactoryPBImpl"; - /** Factory to create serializeable records.*/ + /** Factory to create serializable records.*/ public static final String IPC_RECORD_FACTORY_CLASS = IPC_PREFIX + "record.factory.class"; public static final String DEFAULT_IPC_RECORD_FACTORY_CLASS = @@ -1151,7 +1151,7 @@ public static boolean isAclEnabled(Configuration conf) { public static final String DEFAULT_NM_ADDRESS = "0.0.0.0:" + DEFAULT_NM_PORT; - /** The actual bind address or the NM.*/ + /** The actual bind address for the NM.*/ public static final String NM_BIND_HOST = NM_PREFIX + "bind-host"; @@ -1216,7 +1216,7 @@ public static boolean isAclEnabled(Configuration conf) { NM_PREFIX + "collector-service.address"; public static final int DEFAULT_NM_COLLECTOR_SERVICE_PORT = 8048; public static final String DEFAULT_NM_COLLECTOR_SERVICE_ADDRESS = - "0.0.0.0:" + DEFAULT_NM_LOCALIZER_PORT; + "0.0.0.0:" + DEFAULT_NM_COLLECTOR_SERVICE_PORT; /** Interval in between cache cleanups.*/ public static final String NM_LOCALIZER_CACHE_CLEANUP_INTERVAL_MS = @@ -1865,7 +1865,7 @@ public static boolean isAclEnabled(Configuration conf) { /** * Comma separated list of runtimes that are allowed when using - * LinuxContainerExecutor. The allowed values are: + * LinuxContainerExecutor. The standard values are: *
    *
  • default
  • *
  • docker
  • @@ -1875,6 +1875,9 @@ public static boolean isAclEnabled(Configuration conf) { public static final String LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES = LINUX_CONTAINER_RUNTIME_PREFIX + "allowed-runtimes"; + public static final String LINUX_CONTAINER_RUNTIME_CLASS_FMT = + LINUX_CONTAINER_RUNTIME_PREFIX + "%s.class"; + /** The default list of allowed runtimes when using LinuxContainerExecutor. */ public static final String[] DEFAULT_LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES = {"default"}; @@ -1992,7 +1995,10 @@ public static boolean isAclEnabled(Configuration conf) { * A configurable value to pass to the Docker Stop command. This value * defines the number of seconds between the docker stop command sending * a SIGTERM and a SIGKILL. + * + * @deprecated use {@link YarnConfiguration#NM_SLEEP_DELAY_BEFORE_SIGKILL_MS} */ + @Deprecated public static final String NM_DOCKER_STOP_GRACE_PERIOD = DOCKER_CONTAINER_RUNTIME_PREFIX + "stop.grace-period"; @@ -2000,6 +2006,7 @@ public static boolean isAclEnabled(Configuration conf) { * The default value for the grace period between the SIGTERM and the * SIGKILL in the Docker Stop command. */ + @Deprecated public static final int DEFAULT_NM_DOCKER_STOP_GRACE_PERIOD = 10; /** The default list of read-only mounts to be bind-mounted into all @@ -2012,6 +2019,11 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_DOCKER_DEFAULT_RW_MOUNTS = DOCKER_CONTAINER_RUNTIME_PREFIX + "default-rw-mounts"; + /** The default list of tmpfs mounts to be mounted into all + * Docker containers that use DockerContainerRuntime. */ + public static final String NM_DOCKER_DEFAULT_TMPFS_MOUNTS = + DOCKER_CONTAINER_RUNTIME_PREFIX + "default-tmpfs-mounts"; + /** The mode in which the Java Container Sandbox should run detailed by * the JavaSandboxLinuxContainerRuntime. */ public static final String YARN_CONTAINER_SANDBOX = @@ -2248,6 +2260,9 @@ public static boolean isAclEnabled(Configuration conf) { public static final String YARN_SECURITY_SERVICE_AUTHORIZATION_APPLICATIONMASTER_PROTOCOL = "security.applicationmaster.protocol.acl"; + public static final String + YARN_SECURITY_SERVICE_AUTHORIZATION_DISTRIBUTEDSCHEDULING_PROTOCOL = + "security.distributedscheduling.protocol.acl"; public static final String YARN_SECURITY_SERVICE_AUTHORIZATION_CONTAINER_MANAGEMENT_PROTOCOL = @@ -2264,6 +2279,10 @@ public static boolean isAclEnabled(Configuration conf) { YARN_SECURITY_SERVICE_AUTHORIZATION_COLLECTOR_NODEMANAGER_PROTOCOL = "security.collector-nodemanager.protocol.acl"; + public static final String + YARN_SECURITY_SERVICE_AUTHORIZATION_APPLICATIONMASTER_NODEMANAGER_PROTOCOL = + "security.applicationmaster-nodemanager.applicationmaster.protocol.acl"; + /** No. of milliseconds to wait between sending a SIGTERM and SIGKILL * to a running container */ public static final String NM_SLEEP_DELAY_BEFORE_SIGKILL_MS = @@ -3202,8 +3221,14 @@ public static boolean isAclEnabled(Configuration conf) { "org.apache.hadoop.yarn.server.federation.resolver." + "DefaultSubClusterResolverImpl"; - public static final String DEFAULT_FEDERATION_POLICY_KEY = "*"; + // AMRMProxy split-merge timeout for active sub-clusters. We will not route + // new asks to expired sub-clusters. + public static final String FEDERATION_AMRMPROXY_SUBCLUSTER_TIMEOUT = + FEDERATION_PREFIX + "amrmproxy.subcluster.timeout.ms"; + public static final long DEFAULT_FEDERATION_AMRMPROXY_SUBCLUSTER_TIMEOUT = + 60000; // one minute + public static final String DEFAULT_FEDERATION_POLICY_KEY = "*"; public static final String FEDERATION_POLICY_MANAGER = FEDERATION_PREFIX + "policy-manager"; @@ -3294,6 +3319,11 @@ public static boolean isAclEnabled(Configuration conf) { public static final String ROUTER_WEBAPP_PREFIX = ROUTER_PREFIX + "webapp."; + public static final String ROUTER_USER_CLIENT_THREADS_SIZE = + ROUTER_PREFIX + "interceptor.user.threadpool-size"; + + public static final int DEFAULT_ROUTER_USER_CLIENT_THREADS_SIZE = 5; + /** The address of the Router web application. */ public static final String ROUTER_WEBAPP_ADDRESS = ROUTER_WEBAPP_PREFIX + "address"; @@ -3450,6 +3480,22 @@ public static boolean isAclEnabled(Configuration conf) { public static final String FS_NODE_LABELS_STORE_ROOT_DIR = NODE_LABELS_PREFIX + "fs-store.root-dir"; + /** + * Node-attribute configurations. + */ + public static final String NODE_ATTRIBUTE_PREFIX = + YARN_PREFIX + "node-attribute."; + /** + * Node attribute store implementation class. + */ + public static final String FS_NODE_ATTRIBUTE_STORE_IMPL_CLASS = + NODE_ATTRIBUTE_PREFIX + "fs-store.impl.class"; + /** + * File system node attribute store directory. + */ + public static final String FS_NODE_ATTRIBUTE_STORE_ROOT_DIR = + NODE_ATTRIBUTE_PREFIX + "fs-store.root-dir"; + /** * Flag to indicate if the node labels feature enabled, by default it's * disabled @@ -3512,16 +3558,25 @@ public static boolean areNodeLabelsEnabled( private static final String NM_NODE_LABELS_PREFIX = NM_PREFIX + "node-labels."; + private static final String NM_NODE_ATTRIBUTES_PREFIX = NM_PREFIX + + "node-attributes."; + public static final String NM_NODE_LABELS_PROVIDER_CONFIG = NM_NODE_LABELS_PREFIX + "provider"; + public static final String NM_NODE_ATTRIBUTES_PROVIDER_CONFIG = + NM_NODE_ATTRIBUTES_PREFIX + "provider"; + // whitelist names for the yarn.nodemanager.node-labels.provider - public static final String CONFIG_NODE_LABELS_PROVIDER = "config"; - public static final String SCRIPT_NODE_LABELS_PROVIDER = "script"; + public static final String CONFIG_NODE_DESCRIPTOR_PROVIDER = "config"; + public static final String SCRIPT_NODE_DESCRIPTOR_PROVIDER = "script"; private static final String NM_NODE_LABELS_PROVIDER_PREFIX = NM_NODE_LABELS_PREFIX + "provider."; + private static final String NM_NODE_ATTRIBUTES_PROVIDER_PREFIX = + NM_NODE_ATTRIBUTES_PREFIX + "provider."; + public static final String NM_NODE_LABELS_RESYNC_INTERVAL = NM_NODE_LABELS_PREFIX + "resync-interval-ms"; @@ -3546,6 +3601,9 @@ public static boolean areNodeLabelsEnabled( public static final String NM_PROVIDER_CONFIGURED_NODE_PARTITION = NM_NODE_LABELS_PROVIDER_PREFIX + "configured-node-partition"; + public static final String NM_PROVIDER_CONFIGURED_NODE_ATTRIBUTES = + NM_NODE_ATTRIBUTES_PROVIDER_PREFIX + "configured-node-attributes"; + private static final String RM_NODE_LABELS_PREFIX = RM_PREFIX + "node-labels."; @@ -3593,6 +3651,33 @@ public static boolean areNodeLabelsEnabled( NM_SCRIPT_BASED_NODE_LABELS_PROVIDER_PREFIX + "opts"; /** + * Node attribute provider fetch attributes interval and timeout. + */ + public static final String NM_NODE_ATTRIBUTES_PROVIDER_FETCH_INTERVAL_MS = + NM_NODE_ATTRIBUTES_PROVIDER_PREFIX + "fetch-interval-ms"; + + public static final long + DEFAULT_NM_NODE_ATTRIBUTES_PROVIDER_FETCH_INTERVAL_MS = 10 * 60 * 1000; + + public static final String NM_NODE_ATTRIBUTES_PROVIDER_FETCH_TIMEOUT_MS = + NM_NODE_ATTRIBUTES_PROVIDER_PREFIX + "fetch-timeout-ms"; + + public static final long DEFAULT_NM_NODE_ATTRIBUTES_PROVIDER_FETCH_TIMEOUT_MS + = DEFAULT_NM_NODE_ATTRIBUTES_PROVIDER_FETCH_INTERVAL_MS * 2; + + /** + * Script to collect node attributes. + */ + private static final String NM_SCRIPT_BASED_NODE_ATTRIBUTES_PROVIDER_PREFIX = + NM_NODE_ATTRIBUTES_PROVIDER_PREFIX + "script."; + + public static final String NM_SCRIPT_BASED_NODE_ATTRIBUTES_PROVIDER_PATH = + NM_SCRIPT_BASED_NODE_ATTRIBUTES_PROVIDER_PREFIX + "path"; + + public static final String NM_SCRIPT_BASED_NODE_ATTRIBUTES_PROVIDER_OPTS = + NM_SCRIPT_BASED_NODE_ATTRIBUTES_PROVIDER_PREFIX + "opts"; + + /* * Support to view apps for given user in secure cluster. * @deprecated This field is deprecated for {@link #FILTER_ENTITY_LIST_BY_USER} */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidResourceRequestException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidResourceRequestException.java index f4fd2fa38a1..1ea9eefd87f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidResourceRequestException.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidResourceRequestException.java @@ -30,19 +30,55 @@ * */ public class InvalidResourceRequestException extends YarnException { + public static final String LESS_THAN_ZERO_RESOURCE_MESSAGE_TEMPLATE = + "Invalid resource request! Cannot allocate containers as " + + "requested resource is less than 0! " + + "Requested resource type=[%s], " + "Requested resource=%s"; + + public static final String GREATER_THAN_MAX_RESOURCE_MESSAGE_TEMPLATE = + "Invalid resource request! Cannot allocate containers as " + + "requested resource is greater than " + + "maximum allowed allocation. " + + "Requested resource type=[%s], " + + "Requested resource=%s, maximum allowed allocation=%s, " + + "please note that maximum allowed allocation is calculated " + + "by scheduler based on maximum resource of registered " + + "NodeManagers, which might be less than configured " + + "maximum allocation=%s"; + + public static final String UNKNOWN_REASON_MESSAGE_TEMPLATE = + "Invalid resource request! " + + "Cannot allocate containers for an unknown reason! " + + "Requested resource type=[%s], Requested resource=%s"; + + public enum InvalidResourceType { + LESS_THAN_ZERO, GREATER_THEN_MAX_ALLOCATION, UNKNOWN; + } private static final long serialVersionUID = 13498237L; + private final InvalidResourceType invalidResourceType; public InvalidResourceRequestException(Throwable cause) { super(cause); + this.invalidResourceType = InvalidResourceType.UNKNOWN; } public InvalidResourceRequestException(String message) { + this(message, InvalidResourceType.UNKNOWN); + } + + public InvalidResourceRequestException(String message, + InvalidResourceType invalidResourceType) { super(message); + this.invalidResourceType = invalidResourceType; } public InvalidResourceRequestException(String message, Throwable cause) { super(message, cause); + this.invalidResourceType = InvalidResourceType.UNKNOWN; } + public InvalidResourceType getInvalidResourceType() { + return invalidResourceType; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java index b5fece7dc8c..3fddcff6c3d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java @@ -18,8 +18,10 @@ package org.apache.hadoop.yarn.exceptions; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.records.Resource; /** * This exception is thrown when details of an unknown resource type @@ -28,18 +30,31 @@ @InterfaceAudience.Public @InterfaceStability.Unstable public class ResourceNotFoundException extends YarnRuntimeException { - private static final long serialVersionUID = 10081982L; + private static final String MESSAGE = "The resource manager encountered a " + + "problem that should not occur under normal circumstances. " + + "Please report this error to the Hadoop community by opening a " + + "JIRA ticket at http://issues.apache.org/jira and including the " + + "following information:%n* Resource type requested: %s%n* Resource " + + "object: %s%n* The stack trace for this exception: %s%n" + + "After encountering this error, the resource manager is " + + "in an inconsistent state. It is safe for the resource manager " + + "to be restarted as the error encountered should be transitive. " + + "If high availability is enabled, failing over to " + + "a standby resource manager is also safe."; - public ResourceNotFoundException(String message) { - super(message); + public ResourceNotFoundException(Resource resource, String type) { + this(String.format(MESSAGE, type, resource, + ExceptionUtils.getStackTrace(new Exception()))); } - public ResourceNotFoundException(Throwable cause) { - super(cause); + public ResourceNotFoundException(Resource resource, String type, + Throwable cause) { + super(String.format(MESSAGE, type, resource, + ExceptionUtils.getStackTrace(cause)), cause); } - public ResourceNotFoundException(String message, Throwable cause) { - super(message, cause); + public ResourceNotFoundException(String message) { + super(message); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java index 852334245ce..4777cf8b62a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java @@ -30,6 +30,8 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.api.protocolrecords.AddToClusterNodeLabelsRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.AddToClusterNodeLabelsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodesToAttributesMappingRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodesToAttributesMappingResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.CheckForDecommissioningNodesRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.CheckForDecommissioningNodesResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest; @@ -37,6 +39,8 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshClusterMaxPriorityRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshClusterMaxPriorityResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshQueuesRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshQueuesResponse; @@ -52,8 +56,6 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.ReplaceLabelsOnNodeResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse; -import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest; -import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesResponse; @Private public interface ResourceManagerAdministrationProtocol extends GetUserMappingsProtocol { @@ -144,4 +146,11 @@ public CheckForDecommissioningNodesResponse checkForDecommissioningNodes( public RefreshClusterMaxPriorityResponse refreshClusterMaxPriority( RefreshClusterMaxPriorityRequest request) throws YarnException, IOException; + + + @Private + @Idempotent + NodesToAttributesMappingResponse mapAttributesToNodes( + NodesToAttributesMappingRequest request) throws YarnException, + IOException; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AttributeMappingOperationType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AttributeMappingOperationType.java new file mode 100644 index 00000000000..5de15040503 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AttributeMappingOperationType.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; + +/** + *

    + * Type of node to attribute mapping operation. + *

    + * + */ +@Public +@Unstable +public enum AttributeMappingOperationType { + /** Replaces the existing node to attribute mapping with new mapping.*/ + REPLACE, + + /** Add attribute(s) to a node and if it already exists will update the + * value.*/ + ADD, + + /** Removes attribute(s) mapped to a node. */ + REMOVE +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeToAttributes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeToAttributes.java new file mode 100644 index 00000000000..b2e38b4490f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeToAttributes.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.protocolrecords; + +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.records.NodeAttribute; +import org.apache.hadoop.yarn.util.Records; + +/** + * Represents a mapping of Node id to list of attributes. + */ +@Public +@Unstable +public abstract class NodeToAttributes { + + public static NodeToAttributes newInstance(String node, + List attributes) { + NodeToAttributes nodeIdToAttributes = + Records.newRecord(NodeToAttributes.class); + nodeIdToAttributes.setNode(node); + nodeIdToAttributes.setNodeAttributes(attributes); + return nodeIdToAttributes; + } + + @Public + @Unstable + public abstract String getNode(); + + @Public + @Unstable + public abstract void setNode(String node); + + @Public + @Unstable + public abstract List getNodeAttributes(); + + @Public + @Unstable + public abstract void setNodeAttributes(List attributes); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodesToAttributesMappingRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodesToAttributesMappingRequest.java new file mode 100644 index 00000000000..71421ed6665 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodesToAttributesMappingRequest.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.protocolrecords; + +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.util.Records; + +/** + * list of node-attribute mapping request info. + */ +@Public +@Unstable +public abstract class NodesToAttributesMappingRequest { + + public static NodesToAttributesMappingRequest newInstance( + AttributeMappingOperationType operation, + List nodesToAttributes, boolean failOnUnknownNodes) { + NodesToAttributesMappingRequest request = + Records.newRecord(NodesToAttributesMappingRequest.class); + request.setNodesToAttributes(nodesToAttributes); + request.setFailOnUnknownNodes(failOnUnknownNodes); + request.setOperation(operation); + return request; + } + + @Public + @Unstable + public abstract void setNodesToAttributes( + List nodesToAttributes); + + @Public + @Unstable + public abstract List getNodesToAttributes(); + + @Public + @Unstable + public abstract void setFailOnUnknownNodes(boolean failOnUnknownNodes); + + @Public + @Unstable + public abstract boolean getFailOnUnknownNodes(); + + @Public + @Unstable + public abstract void setOperation(AttributeMappingOperationType operation); + + @Public + @Unstable + public abstract AttributeMappingOperationType getOperation(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodesToAttributesMappingResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodesToAttributesMappingResponse.java new file mode 100644 index 00000000000..10081e1f928 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodesToAttributesMappingResponse.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.protocolrecords; + +import org.apache.hadoop.yarn.util.Records; + +/** + * NodesToAttributesMappingResponse holds response object for attribute + * mapping. + */ +public class NodesToAttributesMappingResponse { + public static NodesToAttributesMappingResponse newInstance() { + return Records.newRecord(NodesToAttributesMappingResponse.class); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/constraint/PlacementConstraintParser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/constraint/PlacementConstraintParser.java index 2926c9d1de8..de9419ae619 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/constraint/PlacementConstraintParser.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/constraint/PlacementConstraintParser.java @@ -17,8 +17,10 @@ */ package org.apache.hadoop.yarn.util.constraint; +import com.google.common.base.Strings; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.records.NodeAttributeOpCode; import org.apache.hadoop.yarn.api.resource.PlacementConstraint; import org.apache.hadoop.yarn.api.resource.PlacementConstraint.AbstractConstraint; import org.apache.hadoop.yarn.api.resource.PlacementConstraints; @@ -44,11 +46,12 @@ @InterfaceStability.Unstable public final class PlacementConstraintParser { + public static final char EXPRESSION_VAL_DELIM = ','; private static final char EXPRESSION_DELIM = ':'; private static final char KV_SPLIT_DELIM = '='; - private static final char EXPRESSION_VAL_DELIM = ','; private static final char BRACKET_START = '('; private static final char BRACKET_END = ')'; + private static final String KV_NE_DELIM = "!="; private static final String IN = "in"; private static final String NOT_IN = "notin"; private static final String AND = "and"; @@ -349,6 +352,91 @@ public String nextElement() { } } + /** + * Constraint parser used to parse a given target expression. + */ + public static class NodeConstraintParser extends ConstraintParser { + + public NodeConstraintParser(String expression) { + super(new BaseStringTokenizer(expression, + String.valueOf(EXPRESSION_VAL_DELIM))); + } + + @Override + public AbstractConstraint parse() + throws PlacementConstraintParseException { + PlacementConstraint.AbstractConstraint placementConstraints = null; + String attributeName = ""; + NodeAttributeOpCode opCode = NodeAttributeOpCode.EQ; + String scope = SCOPE_NODE; + + Set constraintEntities = new TreeSet<>(); + while (hasMoreTokens()) { + String currentTag = nextToken(); + StringTokenizer attributeKV = getAttributeOpCodeTokenizer(currentTag); + + // Usually there will be only one k=v pair. However in case when + // multiple values are present for same attribute, it will also be + // coming as next token. for example, java=1.8,1.9 or python!=2. + if (attributeKV.countTokens() > 1) { + opCode = getAttributeOpCode(currentTag); + attributeName = attributeKV.nextToken(); + currentTag = attributeKV.nextToken(); + } + constraintEntities.add(currentTag); + } + + if(attributeName.isEmpty()) { + throw new PlacementConstraintParseException( + "expecting valid expression like k=v or k!=v, but get " + + constraintEntities); + } + + PlacementConstraint.TargetExpression target = null; + if (!constraintEntities.isEmpty()) { + target = PlacementConstraints.PlacementTargets + .nodeAttribute(attributeName, + constraintEntities + .toArray(new String[constraintEntities.size()])); + } + + placementConstraints = PlacementConstraints + .targetNodeAttribute(scope, opCode, target); + return placementConstraints; + } + + private StringTokenizer getAttributeOpCodeTokenizer(String currentTag) { + StringTokenizer attributeKV = new StringTokenizer(currentTag, + KV_NE_DELIM); + + // Try with '!=' delim as well. + if (attributeKV.countTokens() < 2) { + attributeKV = new StringTokenizer(currentTag, + String.valueOf(KV_SPLIT_DELIM)); + } + return attributeKV; + } + + /** + * Below conditions are validated. + * java=8 : OpCode = EQUALS + * java!=8 : OpCode = NEQUALS + * @param currentTag tag + * @return Attribute op code. + */ + private NodeAttributeOpCode getAttributeOpCode(String currentTag) + throws PlacementConstraintParseException { + if (currentTag.contains(KV_NE_DELIM)) { + return NodeAttributeOpCode.NE; + } else if (currentTag.contains(String.valueOf(KV_SPLIT_DELIM))) { + return NodeAttributeOpCode.EQ; + } + throw new PlacementConstraintParseException( + "expecting valid expression like k=v or k!=v, but get " + + currentTag); + } + } + /** * Constraint parser used to parse a given target expression, such as * "NOTIN, NODE, foo, bar". @@ -363,20 +451,23 @@ public TargetConstraintParser(String expression) { @Override public AbstractConstraint parse() throws PlacementConstraintParseException { - PlacementConstraint.AbstractConstraint placementConstraints; + PlacementConstraint.AbstractConstraint placementConstraints = null; String op = nextToken(); if (op.equalsIgnoreCase(IN) || op.equalsIgnoreCase(NOT_IN)) { String scope = nextToken(); scope = parseScope(scope); - Set allocationTags = new TreeSet<>(); + Set constraintEntities = new TreeSet<>(); while(hasMoreTokens()) { String tag = nextToken(); - allocationTags.add(tag); + constraintEntities.add(tag); + } + PlacementConstraint.TargetExpression target = null; + if(!constraintEntities.isEmpty()) { + target = PlacementConstraints.PlacementTargets.allocationTag( + constraintEntities + .toArray(new String[constraintEntities.size()])); } - PlacementConstraint.TargetExpression target = - PlacementConstraints.PlacementTargets.allocationTag( - allocationTags.toArray(new String[allocationTags.size()])); if (op.equalsIgnoreCase(IN)) { placementConstraints = PlacementConstraints .targetIn(scope, target); @@ -499,6 +590,14 @@ private SourceTags(String sourceTag, int number) { this.num = number; } + public static SourceTags emptySourceTags() { + return new SourceTags("", 0); + } + + public boolean isEmpty() { + return Strings.isNullOrEmpty(tag) && num == 0; + } + public String getTag() { return this.tag; } @@ -550,6 +649,11 @@ public static AbstractConstraint parseExpression(String constraintStr) new ConjunctionConstraintParser(constraintStr); constraintOptional = Optional.ofNullable(jp.tryParse()); } + if (!constraintOptional.isPresent()) { + NodeConstraintParser np = + new NodeConstraintParser(constraintStr); + constraintOptional = Optional.ofNullable(np.tryParse()); + } if (!constraintOptional.isPresent()) { throw new PlacementConstraintParseException( "Invalid constraint expression " + constraintStr); @@ -584,32 +688,60 @@ public static AbstractConstraint parseExpression(String constraintStr) */ public static Map parsePlacementSpec( String expression) throws PlacementConstraintParseException { + // Continue handling for application tag based constraint otherwise. // Respect insertion order. Map result = new LinkedHashMap<>(); PlacementConstraintParser.ConstraintTokenizer tokenizer = new PlacementConstraintParser.MultipleConstraintsTokenizer(expression); tokenizer.validate(); - while(tokenizer.hasMoreElements()) { + while (tokenizer.hasMoreElements()) { String specStr = tokenizer.nextElement(); // each spec starts with sourceAllocationTag=numOfContainers and // followed by a constraint expression. // foo=4,Pn String[] splitted = specStr.split( String.valueOf(EXPRESSION_VAL_DELIM), 2); - if (splitted.length != 2) { + final SourceTags st; + final String exprs; + if (splitted.length == 1) { + // source tags not specified + exprs = splitted[0]; + st = SourceTags.emptySourceTags(); + } else if (splitted.length == 2) { + exprs = splitted[1]; + String tagAlloc = splitted[0]; + st = SourceTags.parseFrom(tagAlloc); + } else { throw new PlacementConstraintParseException( "Unexpected placement constraint expression " + specStr); } - String tagAlloc = splitted[0]; - SourceTags st = SourceTags.parseFrom(tagAlloc); - String exprs = splitted[1]; AbstractConstraint constraint = PlacementConstraintParser.parseExpression(exprs); result.put(st, constraint.build()); } + // Validation + Set sourceTagSet = result.keySet(); + if (sourceTagSet.stream() + .filter(sourceTags -> sourceTags.isEmpty()) + .findAny() + .isPresent()) { + // Source tags, e.g foo=3, is optional for a node-attribute constraint, + // but when source tags is absent, the parser only accept single + // constraint expression to avoid ambiguous semantic. This is because + // DS AM is requesting number of containers per the number specified + // in the source tags, we do overwrite when there is no source tags + // with num_containers argument from commandline. If that is partially + // missed in the constraints, we don't know if it is ought to + // overwritten or not. + if (result.size() != 1) { + throw new PlacementConstraintParseException( + "Source allocation tags is required for a multi placement" + + " constraint expression."); + } + } return result; } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java index 3dbd609b534..c2d720147b9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java @@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -527,6 +528,12 @@ private static void addResourceTypeInformation(String prop, String value, String units = getUnits(value); Long resourceValue = Long.valueOf(value.substring(0, value.length() - units.length())); + String destUnit = getDefaultUnit(resourceType); + if(!units.equals(destUnit)) { + resourceValue = UnitsConversionUtil.convert( + units, destUnit, resourceValue); + units = destUnit; + } nodeResources.get(resourceType).setValue(resourceValue); nodeResources.get(resourceType).setUnits(units); if (LOG.isDebugEnabled()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto index 81adef19335..fdd4bc5aca8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto @@ -64,4 +64,7 @@ service ApplicationClientProtocolService { rpc getResourceProfiles(GetAllResourceProfilesRequestProto) returns (GetAllResourceProfilesResponseProto); rpc getResourceProfile(GetResourceProfileRequestProto) returns (GetResourceProfileResponseProto); rpc getResourceTypeInfo(GetAllResourceTypeInfoRequestProto) returns (GetAllResourceTypeInfoResponseProto); + rpc getClusterNodeAttributes (GetClusterNodeAttributesRequestProto) returns (GetClusterNodeAttributesResponseProto); + rpc getAttributesToNodes (GetAttributesToNodesRequestProto) returns (GetAttributesToNodesResponseProto); + rpc getNodesToAttributes (GetNodesToAttributesRequestProto) returns (GetNodesToAttributesResponseProto); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/resourcemanager_administration_protocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/resourcemanager_administration_protocol.proto index 113462305cd..032aa8e67b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/resourcemanager_administration_protocol.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/resourcemanager_administration_protocol.proto @@ -45,4 +45,5 @@ service ResourceManagerAdministrationProtocolService { rpc replaceLabelsOnNodes(ReplaceLabelsOnNodeRequestProto) returns (ReplaceLabelsOnNodeResponseProto); rpc checkForDecommissioningNodes(CheckForDecommissioningNodesRequestProto) returns (CheckForDecommissioningNodesResponseProto); rpc refreshClusterMaxPriority(RefreshClusterMaxPriorityRequestProto) returns (RefreshClusterMaxPriorityResponseProto); + rpc mapAttributesToNodes(NodesToAttributesMappingRequestProto) returns (NodesToAttributesMappingResponseProto); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto index e8c92d962f3..d37e36a1878 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto @@ -130,6 +130,22 @@ enum DecommissionTypeProto { GRACEFUL = 2; FORCEFUL = 3; } + + +enum AttributeMappingOperationTypeProto { + REPLACE = 1; + ADD = 2; + REMOVE = 3; +} + +message NodesToAttributesMappingRequestProto { + optional AttributeMappingOperationTypeProto operation = 1 [default = REPLACE]; + repeated NodeToAttributesProto nodeToAttributes = 2; + optional bool failOnUnknownNodes = 3; +} + +message NodesToAttributesMappingResponseProto { +} ////////////////////////////////////////////////////////////////// ///////////// RM Failover related records //////////////////////// ////////////////////////////////////////////////////////////////// diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index d6138e865ff..5fe2cc94550 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -355,6 +355,7 @@ message NodeReportProto { optional ResourceUtilizationProto node_utilization = 12; optional uint32 decommissioning_timeout = 13; optional NodeUpdateTypeProto node_update_type = 14; + repeated NodeAttributeProto node_attributes = 15; } message NodeIdToLabelsProto { @@ -372,6 +373,42 @@ message NodeLabelProto { optional bool isExclusive = 2 [default = true]; } +enum NodeAttributeTypeProto { + STRING = 1; +} + +message NodeAttributeKeyProto { + optional string attributePrefix = 1 [default="rm.yarn.io"]; + required string attributeName = 2; +} + +message NodeAttributeProto { + required NodeAttributeKeyProto attributeKey = 1; + optional NodeAttributeTypeProto attributeType = 2 [default = STRING]; + optional string attributeValue = 3 [default=""]; +} + + +message NodeAttributeInfoProto { + required NodeAttributeKeyProto attributeKey = 1; + required NodeAttributeTypeProto attributeType = 2; +} + +message NodeToAttributeValueProto { + required string hostname = 1; + required string attributeValue = 2; +} + +message AttributeToNodesProto { + required NodeAttributeKeyProto nodeAttribute = 1; + repeated NodeToAttributeValueProto nodeValueMap = 2; +} + +message NodeToAttributesProto { + optional string node = 1; + repeated NodeAttributeProto nodeAttributes = 2; +} + enum ContainerTypeProto { APPLICATION_MASTER = 1; TASK = 2; @@ -609,11 +646,18 @@ message PlacementConstraintProto { optional CompositePlacementConstraintProto compositeConstraint = 2; } +enum NodeAttributeOpCodeProto { + NO_OP = 1; + EQ = 2; + NE = 3; +} + message SimplePlacementConstraintProto { required string scope = 1; repeated PlacementConstraintTargetProto targetExpressions = 2; optional int32 minCardinality = 3; optional int32 maxCardinality = 4; + optional NodeAttributeOpCodeProto attributeOpCode = 5; } message PlacementConstraintTargetProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index 92a65adbed7..248f775bdeb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -92,6 +92,7 @@ message AllocateRequestProto { optional float progress = 5; repeated UpdateContainerRequestProto update_requests = 7; repeated SchedulingRequestProto scheduling_requests = 10; + optional string tracking_url = 11; } message NMTokenProto { @@ -259,6 +260,29 @@ message GetClusterNodeLabelsResponseProto { repeated NodeLabelProto nodeLabels = 2; } +message GetClusterNodeAttributesRequestProto { +} + +message GetClusterNodeAttributesResponseProto { + repeated NodeAttributeInfoProto nodeAttributes = 1; +} + +message GetAttributesToNodesRequestProto { + repeated NodeAttributeKeyProto nodeAttributes = 1; +} + +message GetAttributesToNodesResponseProto { + repeated AttributeToNodesProto attributesToNodes = 1; +} + +message GetNodesToAttributesRequestProto { + repeated string hostnames = 1; +} + +message GetNodesToAttributesResponseProto { + repeated NodeToAttributesProto nodesToAttributes = 1; +} + message UpdateApplicationPriorityRequestProto { required ApplicationIdProto applicationId = 1; required PriorityProto applicationPriority = 2; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/resource/TestPlacementConstraintParser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/resource/TestPlacementConstraintParser.java index a69571c5c80..91e4fdb4e84 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/resource/TestPlacementConstraintParser.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/api/resource/TestPlacementConstraintParser.java @@ -22,6 +22,8 @@ import java.util.Iterator; import java.util.Map; import java.util.Set; + +import org.apache.hadoop.yarn.api.records.NodeAttributeOpCode; import org.apache.hadoop.yarn.api.resource.PlacementConstraint.AbstractConstraint; import org.apache.hadoop.yarn.api.resource.PlacementConstraint.And; import org.apache.hadoop.yarn.api.resource.PlacementConstraint.Or; @@ -38,8 +40,14 @@ import org.apache.hadoop.yarn.util.constraint.PlacementConstraintParser.SourceTagsTokenizer; import org.apache.hadoop.yarn.util.constraint.PlacementConstraintParser.ConstraintTokenizer; -import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.*; import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.PlacementTargets.allocationTag; +import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.and; +import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.cardinality; +import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.or; +import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.PlacementTargets; +import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.targetIn; +import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.targetNodeAttribute; +import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.targetNotIn; import org.junit.Assert; import org.junit.Test; @@ -443,4 +451,77 @@ private void verifyConstraintToString(String inputExpr, + constrainExpr + ", caused by: " + e.getMessage()); } } + + @Test + public void testParseNodeAttributeSpec() + throws PlacementConstraintParseException { + Map result; + PlacementConstraint.AbstractConstraint expectedPc1, expectedPc2; + PlacementConstraint actualPc1, actualPc2; + + // A single node attribute constraint + result = PlacementConstraintParser + .parsePlacementSpec("xyz=4,rm.yarn.io/foo=true"); + Assert.assertEquals(1, result.size()); + TargetExpression target = PlacementTargets + .nodeAttribute("rm.yarn.io/foo", "true"); + expectedPc1 = targetNodeAttribute("node", NodeAttributeOpCode.EQ, target); + + actualPc1 = result.values().iterator().next(); + Assert.assertEquals(expectedPc1, actualPc1.getConstraintExpr()); + + // A single node attribute constraint + result = PlacementConstraintParser + .parsePlacementSpec("xyz=3,rm.yarn.io/foo!=abc"); + Assert.assertEquals(1, result.size()); + target = PlacementTargets + .nodeAttribute("rm.yarn.io/foo", "abc"); + expectedPc1 = targetNodeAttribute("node", NodeAttributeOpCode.NE, target); + + actualPc1 = result.values().iterator().next(); + Assert.assertEquals(expectedPc1, actualPc1.getConstraintExpr()); + + actualPc1 = result.values().iterator().next(); + Assert.assertEquals(expectedPc1, actualPc1.getConstraintExpr()); + + // A single node attribute constraint + result = PlacementConstraintParser + .parsePlacementSpec( + "xyz=1,rm.yarn.io/foo!=abc:zxy=1,rm.yarn.io/bar=true"); + Assert.assertEquals(2, result.size()); + target = PlacementTargets + .nodeAttribute("rm.yarn.io/foo", "abc"); + expectedPc1 = targetNodeAttribute("node", NodeAttributeOpCode.NE, target); + target = PlacementTargets + .nodeAttribute("rm.yarn.io/bar", "true"); + expectedPc2 = targetNodeAttribute("node", NodeAttributeOpCode.EQ, target); + + Iterator valueIt = result.values().iterator(); + actualPc1 = valueIt.next(); + actualPc2 = valueIt.next(); + Assert.assertEquals(expectedPc1, actualPc1.getConstraintExpr()); + Assert.assertEquals(expectedPc2, actualPc2.getConstraintExpr()); + + // A single node attribute constraint w/o source tags + result = PlacementConstraintParser + .parsePlacementSpec("rm.yarn.io/foo=true"); + Assert.assertEquals(1, result.size()); + target = PlacementTargets.nodeAttribute("rm.yarn.io/foo", "true"); + expectedPc1 = targetNodeAttribute("node", NodeAttributeOpCode.EQ, target); + + SourceTags actualSourceTags = result.keySet().iterator().next(); + Assert.assertTrue(actualSourceTags.isEmpty()); + actualPc1 = result.values().iterator().next(); + Assert.assertEquals(expectedPc1, actualPc1.getConstraintExpr()); + + // If source tags is not specified for a node-attribute constraint, + // then this expression must be single constraint expression. + try { + PlacementConstraintParser + .parsePlacementSpec("rm.yarn.io/foo=true:xyz=1,notin,node,xyz"); + Assert.fail("Expected a failure!"); + } catch (Exception e) { + Assert.assertTrue(e instanceof PlacementConstraintParseException); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java index 66bf3204bf6..c342dbe40a8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java @@ -43,7 +43,7 @@ public void testUnits() { Assert.assertEquals("Resource units incorrect", units, ri.getUnits()); units = "z"; try { - ResourceInformation.newInstance(name, units); + ResourceInformation.newInstance(name, units).setUnits(units); Assert.fail(units + "is not a valid unit"); } catch (IllegalArgumentException ie) { // do nothing diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java index b9ba543ee64..d63933cac18 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java @@ -68,6 +68,10 @@ public void initializeMemberVariables() { .YARN_SECURITY_SERVICE_AUTHORIZATION_RESOURCETRACKER_PROTOCOL); configurationPropsToSkipCompare.add(YarnConfiguration .YARN_SECURITY_SERVICE_AUTHORIZATION_COLLECTOR_NODEMANAGER_PROTOCOL); + configurationPropsToSkipCompare.add(YarnConfiguration + .YARN_SECURITY_SERVICE_AUTHORIZATION_DISTRIBUTEDSCHEDULING_PROTOCOL); + configurationPropsToSkipCompare.add(YarnConfiguration + .YARN_SECURITY_SERVICE_AUTHORIZATION_APPLICATIONMASTER_NODEMANAGER_PROTOCOL); configurationPropsToSkipCompare.add(YarnConfiguration.CURATOR_LEADER_ELECTOR); configurationPropsToSkipCompare .add(YarnConfiguration.RM_RESERVATION_SYSTEM_MAX_PERIODICITY); @@ -101,6 +105,8 @@ public void initializeMemberVariables() { .add(YarnConfiguration.DEFAULT_FEDERATION_POLICY_MANAGER); configurationPropsToSkipCompare .add(YarnConfiguration.DEFAULT_FEDERATION_POLICY_MANAGER_PARAMS); + configurationPropsToSkipCompare + .add(YarnConfiguration.FEDERATION_AMRMPROXY_SUBCLUSTER_TIMEOUT); // Federation StateStore ZK implementation configs to be ignored configurationPropsToSkipCompare.add( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index 76fa38f922a..a04f57b2704 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -47,6 +47,7 @@ import java.util.Arrays; import java.util.concurrent.atomic.AtomicLong; +import com.google.common.base.Strings; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; @@ -523,9 +524,15 @@ public boolean init(String[] args) throws ParseException, IOException { if (cliParser.hasOption("placement_spec")) { String placementSpec = cliParser.getOptionValue("placement_spec"); - LOG.info("Placement Spec received [{}]", placementSpec); - parsePlacementSpecs(placementSpec); + String decodedSpec = getDecodedPlacementSpec(placementSpec); + LOG.info("Placement Spec received [{}]", decodedSpec); + + this.numTotalContainers = 0; + int globalNumOfContainers = Integer + .parseInt(cliParser.getOptionValue("num_containers", "0")); + parsePlacementSpecs(decodedSpec, globalNumOfContainers); LOG.info("Total num containers requested [{}]", numTotalContainers); + if (numTotalContainers == 0) { throw new IllegalArgumentException( "Cannot run distributed shell with no containers"); @@ -694,23 +701,33 @@ public boolean init(String[] args) throws ParseException, IOException { return true; } - private void parsePlacementSpecs(String placementSpecifications) { - // Client sends placement spec in encoded format - Base64.Decoder decoder = Base64.getDecoder(); - byte[] decodedBytes = decoder.decode( - placementSpecifications.getBytes(StandardCharsets.UTF_8)); - String decodedSpec = new String(decodedBytes, StandardCharsets.UTF_8); - LOG.info("Decode placement spec: " + decodedSpec); + private void parsePlacementSpecs(String decodedSpec, + int globalNumOfContainers) { Map pSpecs = PlacementSpec.parse(decodedSpec); this.placementSpecs = new HashMap<>(); - this.numTotalContainers = 0; for (PlacementSpec pSpec : pSpecs.values()) { - this.numTotalContainers += pSpec.numContainers; + // Use global num of containers when the spec doesn't specify + // source tags. This is allowed when using node-attribute constraints. + if (Strings.isNullOrEmpty(pSpec.sourceTag) + && pSpec.getNumContainers() == 0 + && globalNumOfContainers > 0) { + pSpec.setNumContainers(globalNumOfContainers); + } + this.numTotalContainers += pSpec.getNumContainers(); this.placementSpecs.put(pSpec.sourceTag, pSpec); } } + private String getDecodedPlacementSpec(String placementSpecifications) { + Base64.Decoder decoder = Base64.getDecoder(); + byte[] decodedBytes = decoder.decode( + placementSpecifications.getBytes(StandardCharsets.UTF_8)); + String decodedSpec = new String(decodedBytes, StandardCharsets.UTF_8); + LOG.info("Decode placement spec: " + decodedSpec); + return decodedSpec; + } + /** * Helper function to print usage * @@ -793,11 +810,13 @@ public void run() throws YarnException, IOException, InterruptedException { placementConstraintMap = new HashMap<>(); for (PlacementSpec spec : this.placementSpecs.values()) { if (spec.constraint != null) { - placementConstraintMap.put( - Collections.singleton(spec.sourceTag), spec.constraint); + Set allocationTags = Strings.isNullOrEmpty(spec.sourceTag) ? + Collections.emptySet() : Collections.singleton(spec.sourceTag); + placementConstraintMap.put(allocationTags, spec.constraint); } } } + RegisterApplicationMasterResponse response = amRMClient .registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl, placementConstraintMap); @@ -845,14 +864,18 @@ public void run() throws YarnException, IOException, InterruptedException { // Keep looping until all the containers are launched and shell script // executed on them ( regardless of success/failure). if (this.placementSpecs == null) { + LOG.info("placementSpecs null"); for (int i = 0; i < numTotalContainersToRequest; ++i) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } } else { + LOG.info("placementSpecs to create req:" + placementSpecs); List schedReqs = new ArrayList<>(); for (PlacementSpec pSpec : this.placementSpecs.values()) { - for (int i = 0; i < pSpec.numContainers; i++) { + LOG.info("placementSpec :" + pSpec + ", container:" + pSpec + .getNumContainers()); + for (int i = 0; i < pSpec.getNumContainers(); i++) { SchedulingRequest sr = setupSchedulingRequest(pSpec); schedReqs.add(sr); } @@ -944,7 +967,7 @@ protected boolean finish() { // When the application completes, it should send a finish application // signal to the RM - LOG.info("Application completed. Signalling finish to RM"); + LOG.info("Application completed. Signalling finished to RM"); FinalApplicationStatus appStatus; boolean success = true; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java index c8a71b320c0..e8b69fe1861 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java @@ -103,7 +103,7 @@ * the provided shell command on a set of containers.

    * *

    This client is meant to act as an example on how to write yarn-based applications.

    - * + * *

    To submit an application, a client first needs to connect to the ResourceManager * aka ApplicationsManager or ASM via the {@link ApplicationClientProtocol}. The {@link ApplicationClientProtocol} * provides a way for the client to get access to cluster information and to request for a @@ -192,6 +192,8 @@ // Placement specification private String placementSpec = ""; + // Node Attribute specification + private String nodeAttributeSpec = ""; // log4j.properties file // if available, add to local resources and set into classpath private String log4jPropFile = ""; @@ -448,6 +450,7 @@ public boolean init(String[] args) throws ParseException { // Check if it is parsable PlacementSpec.parse(this.placementSpec); } + appName = cliParser.getOptionValue("appname", "DistributedShell"); amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0")); amQueue = cliParser.getOptionValue("queue", "default"); @@ -854,7 +857,9 @@ public boolean run() throws IOException, YarnException { // Set java executable command LOG.info("Setting up app master command"); - vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); + // Need extra quote here because JAVA_HOME might contain space on Windows, + // e.g. C:/Program Files/Java... + vargs.add("\"" + Environment.JAVA_HOME.$$() + "/bin/java\""); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/PlacementSpec.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/PlacementSpec.java index 290925980a5..ceaa37d5879 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/PlacementSpec.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/PlacementSpec.java @@ -37,8 +37,8 @@ LoggerFactory.getLogger(PlacementSpec.class); public final String sourceTag; - public final int numContainers; public final PlacementConstraint constraint; + private int numContainers; public PlacementSpec(String sourceTag, int numContainers, PlacementConstraint constraint) { @@ -47,6 +47,22 @@ public PlacementSpec(String sourceTag, int numContainers, this.constraint = constraint; } + /** + * Get the number of container for this spec. + * @return container count + */ + public int getNumContainers() { + return numContainers; + } + + /** + * Set number of containers for this spec. + * @param numContainers number of containers. + */ + public void setNumContainers(int numContainers) { + this.numContainers = numContainers; + } + // Placement specification should be of the form: // PlacementSpec => ""|KeyVal;PlacementSpec // KeyVal => SourceTag=Constraint @@ -71,6 +87,7 @@ public PlacementSpec(String sourceTag, int numContainers, public static Map parse(String specs) throws IllegalArgumentException { LOG.info("Parsing Placement Specs: [{}]", specs); + Map pSpecs = new HashMap<>(); Map parsed; try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java index 3a98a22ee40..49d8f3d9db0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java @@ -624,6 +624,7 @@ private File verifyEntityTypeFileExists(String basePath, String entityType, String entityfileName) { String outputDirPathForEntity = basePath + File.separator + entityType + File.separator; + LOG.info(outputDirPathForEntity); File outputDirForEntity = new File(outputDirPathForEntity); Assert.assertTrue(outputDirForEntity.isDirectory()); @@ -775,6 +776,12 @@ private boolean checkIPs(String hostname, String localIP, String appIP) } + private String getSleepCommand(int sec) { + // Windows doesn't have a sleep command, ping -n does the trick + return Shell.WINDOWS ? "ping -n " + (sec + 1) + " 127.0.0.1 >nul" + : "sleep " + sec; + } + @Test public void testDSRestartWithPreviousRunningContainers() throws Exception { String[] args = { @@ -783,7 +790,7 @@ public void testDSRestartWithPreviousRunningContainers() throws Exception { "--num_containers", "1", "--shell_command", - "sleep 8", + getSleepCommand(8), "--master_memory", "512", "--container_memory", @@ -818,7 +825,7 @@ public void testDSAttemptFailuresValidityIntervalSucess() throws Exception { "--num_containers", "1", "--shell_command", - "sleep 8", + getSleepCommand(8), "--master_memory", "512", "--container_memory", @@ -856,7 +863,7 @@ public void testDSAttemptFailuresValidityIntervalFailed() throws Exception { "--num_containers", "1", "--shell_command", - "sleep 8", + getSleepCommand(8), "--master_memory", "512", "--container_memory", diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/pom.xml index ab762187830..7386e4158e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/pom.xml @@ -91,10 +91,18 @@ org.apache.hadoop hadoop-yarn-api + + org.apache.hadoop + hadoop-yarn-client + org.apache.hadoop hadoop-yarn-common + + org.apache.hadoop + hadoop-yarn-registry + org.apache.hadoop hadoop-yarn-server-common @@ -103,6 +111,14 @@ org.apache.hadoop hadoop-common + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + org.slf4j slf4j-api @@ -119,6 +135,42 @@ javax.ws.rs jsr311-api + + javax.servlet + javax.servlet-api + + + commons-codec + commons-codec + + + commons-io + commons-io + + + org.apache.commons + commons-lang3 + + + com.google.guava + guava + + + com.sun.jersey + jersey-client + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-servlet + org.mockito mockito-all @@ -155,6 +207,11 @@ curator-test test + + org.apache.hadoop + hadoop-minikdc + test + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/client/ApiServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/client/ApiServiceClient.java index 9232fc81f66..ca6cc508b27 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/client/ApiServiceClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/client/ApiServiceClient.java @@ -20,19 +20,28 @@ import java.io.File; import java.io.IOException; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.security.PrivilegedExceptionAction; import java.text.MessageFormat; import java.util.List; import java.util.Map; +import javax.ws.rs.core.HttpHeaders; import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.UriBuilder; import com.google.common.base.Preconditions; + +import org.apache.commons.codec.binary.Base64; +import com.google.common.base.Strings; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.authentication.client.AuthenticatedURL; +import org.apache.hadoop.security.authentication.client.AuthenticationException; +import org.apache.hadoop.security.authentication.util.KerberosUtil; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; @@ -48,11 +57,14 @@ import org.apache.hadoop.yarn.service.api.records.ServiceState; import org.apache.hadoop.yarn.service.api.records.ServiceStatus; import org.apache.hadoop.yarn.service.conf.RestApiConstants; -import org.apache.hadoop.yarn.service.utils.JsonSerDeser; import org.apache.hadoop.yarn.service.utils.ServiceApiUtil; import org.apache.hadoop.yarn.util.RMHAUtils; -import org.codehaus.jackson.map.PropertyNamingStrategy; import org.eclipse.jetty.util.UrlEncoded; +import org.ietf.jgss.GSSContext; +import org.ietf.jgss.GSSException; +import org.ietf.jgss.GSSManager; +import org.ietf.jgss.GSSName; +import org.ietf.jgss.Oid; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,6 +83,7 @@ public class ApiServiceClient extends AppAdminClient { private static final Logger LOG = LoggerFactory.getLogger(ApiServiceClient.class); + private static final Base64 BASE_64_CODEC = new Base64(0); protected YarnClient yarnClient; @Override protected void serviceInit(Configuration configuration) @@ -80,6 +93,54 @@ super.serviceInit(configuration); } + /** + * Generate SPNEGO challenge request token. + * + * @param server - hostname to contact + * @throws IOException + * @throws InterruptedException + */ + String generateToken(String server) throws IOException, InterruptedException { + UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); + LOG.debug("The user credential is {}", currentUser); + String challenge = currentUser + .doAs(new PrivilegedExceptionAction() { + @Override + public String run() throws Exception { + try { + // This Oid for Kerberos GSS-API mechanism. + Oid mechOid = KerberosUtil.getOidInstance("GSS_KRB5_MECH_OID"); + GSSManager manager = GSSManager.getInstance(); + // GSS name for server + GSSName serverName = manager.createName("HTTP@" + server, + GSSName.NT_HOSTBASED_SERVICE); + // Create a GSSContext for authentication with the service. + // We're passing client credentials as null since we want them to + // be read from the Subject. + GSSContext gssContext = manager.createContext( + serverName.canonicalize(mechOid), mechOid, null, + GSSContext.DEFAULT_LIFETIME); + gssContext.requestMutualAuth(true); + gssContext.requestCredDeleg(true); + // Establish context + byte[] inToken = new byte[0]; + byte[] outToken = gssContext.initSecContext(inToken, 0, + inToken.length); + gssContext.dispose(); + // Base64 encoded and stringified token for server + LOG.debug("Got valid challenge for host {}", serverName); + return new String(BASE_64_CODEC.encode(outToken), + StandardCharsets.US_ASCII); + } catch (GSSException | IllegalAccessException + | NoSuchFieldException | ClassNotFoundException e) { + LOG.error("Error: {}", e); + throw new AuthenticationException(e); + } + } + }); + return challenge; + } + /** * Calculate Resource Manager address base on working REST API. */ @@ -100,6 +161,7 @@ private String getRMWebAddress() { for (String host : rmServers) { try { Client client = Client.create(); + client.setFollowRedirects(false); StringBuilder sb = new StringBuilder(); sb.append(scheme); sb.append(host); @@ -116,8 +178,11 @@ private String getRMWebAddress() { WebResource webResource = client .resource(sb.toString()); if (useKerberos) { - AuthenticatedURL.Token token = new AuthenticatedURL.Token(); - webResource.header("WWW-Authenticate", token); + String[] server = host.split(":"); + String challenge = generateToken(server[0]); + webResource.header(HttpHeaders.AUTHORIZATION, "Negotiate " + + challenge); + LOG.debug("Authorization: Negotiate {}", challenge); } ClientResponse test = webResource.get(ClientResponse.class); if (test.getStatus() == 200) { @@ -125,7 +190,8 @@ private String getRMWebAddress() { break; } } catch (Exception e) { - LOG.debug("Fail to connect to: "+host, e); + LOG.info("Fail to connect to: "+host); + LOG.debug("Root cause: {}", e); } } return scheme+rmAddress; @@ -147,11 +213,7 @@ private String getServicePath(String appName) throws IOException { api.append("/"); api.append(appName); } - Configuration conf = getConfig(); - if (conf.get("hadoop.http.authentication.type").equalsIgnoreCase("simple")) { - api.append("?user.name=" + UrlEncoded - .encodeString(System.getProperty("user.name"))); - } + appendUserNameIfRequired(api); return api.toString(); } @@ -162,15 +224,27 @@ private String getInstancesPath(String appName) throws IOException { api.append(url); api.append("/app/v1/services/").append(appName).append("/") .append(RestApiConstants.COMP_INSTANCES); - Configuration conf = getConfig(); - if (conf.get("hadoop.http.authentication.type").equalsIgnoreCase( - "simple")) { - api.append("?user.name=" + UrlEncoded - .encodeString(System.getProperty("user.name"))); - } + appendUserNameIfRequired(api); return api.toString(); } + private String getInstancePath(String appName, List components, + String version, List containerStates) throws IOException { + UriBuilder builder = UriBuilder.fromUri(getInstancesPath(appName)); + if (components != null && !components.isEmpty()) { + components.forEach(compName -> + builder.queryParam(RestApiConstants.PARAM_COMP_NAME, compName)); + } + if (!Strings.isNullOrEmpty(version)){ + builder.queryParam(RestApiConstants.PARAM_VERSION, version); + } + if (containerStates != null && !containerStates.isEmpty()){ + containerStates.forEach(state -> + builder.queryParam(RestApiConstants.PARAM_CONTAINER_STATE, state)); + } + return builder.build().toString(); + } + private String getComponentsPath(String appName) throws IOException { Preconditions.checkNotNull(appName); String url = getRMWebAddress(); @@ -178,13 +252,17 @@ private String getComponentsPath(String appName) throws IOException { api.append(url); api.append("/app/v1/services/").append(appName).append("/") .append(RestApiConstants.COMPONENTS); + appendUserNameIfRequired(api); + return api.toString(); + } + + private void appendUserNameIfRequired(StringBuilder builder) { Configuration conf = getConfig(); if (conf.get("hadoop.http.authentication.type").equalsIgnoreCase( "simple")) { - api.append("?user.name=" + UrlEncoded + builder.append("?user.name=").append(UrlEncoded .encodeString(System.getProperty("user.name"))); } - return api.toString(); } private Builder getApiClient() throws IOException { @@ -206,8 +284,13 @@ private Builder getApiClient(String requestPath) Builder builder = client .resource(requestPath).type(MediaType.APPLICATION_JSON); if (conf.get("hadoop.http.authentication.type").equals("kerberos")) { - AuthenticatedURL.Token token = new AuthenticatedURL.Token(); - builder.header("WWW-Authenticate", token); + try { + URI url = new URI(requestPath); + String challenge = generateToken(url.getHost()); + builder.header(HttpHeaders.AUTHORIZATION, "Negotiate " + challenge); + } catch (Exception e) { + throw new IOException(e); + } } return builder .accept("application/json;charset=utf-8"); @@ -517,6 +600,26 @@ public String getStatusString(String appIdOrName) throws IOException, return output; } + @Override + public int actionUpgradeExpress(String appName, File path) + throws IOException, YarnException { + int result; + try { + Service service = + loadAppJsonFromLocalFS(path.getAbsolutePath(), appName, null, null); + service.setState(ServiceState.EXPRESS_UPGRADING); + String buffer = jsonSerDeser.toJson(service); + LOG.info("Upgrade in progress. Please wait.."); + ClientResponse response = getApiClient(getServicePath(appName)) + .put(ClientResponse.class, buffer); + result = processResponse(response); + } catch (Exception e) { + LOG.error("Failed to upgrade application: ", e); + result = EXIT_EXCEPTION_THROWN; + } + return result; + } + @Override public int initiateUpgrade(String appName, String fileName, boolean autoFinalize) throws IOException, YarnException { @@ -553,7 +656,7 @@ public int actionUpgradeInstances(String appName, List compInstances) container.setState(ContainerState.UPGRADING); toUpgrade[idx++] = container; } - String buffer = CONTAINER_JSON_SERDE.toJson(toUpgrade); + String buffer = ServiceApiUtil.CONTAINER_JSON_SERDE.toJson(toUpgrade); ClientResponse response = getApiClient(getInstancesPath(appName)) .put(ClientResponse.class, buffer); result = processResponse(response); @@ -577,7 +680,7 @@ public int actionUpgradeComponents(String appName, List components) component.setState(ComponentState.UPGRADING); toUpgrade[idx++] = component; } - String buffer = COMP_JSON_SERDE.toJson(toUpgrade); + String buffer = ServiceApiUtil.COMP_JSON_SERDE.toJson(toUpgrade); ClientResponse response = getApiClient(getComponentsPath(appName)) .put(ClientResponse.class, buffer); result = processResponse(response); @@ -599,11 +702,25 @@ public int actionCleanUp(String appName, String userName) throws return result; } - private static final JsonSerDeser CONTAINER_JSON_SERDE = - new JsonSerDeser<>(Container[].class, - PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); - - private static final JsonSerDeser COMP_JSON_SERDE = - new JsonSerDeser<>(Component[].class, - PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); + @Override + public String getInstances(String appName, List components, + String version, List containerStates) throws IOException, + YarnException { + try { + String uri = getInstancePath(appName, components, version, + containerStates); + ClientResponse response = getApiClient(uri).get(ClientResponse.class); + if (response.getStatus() != 200) { + StringBuilder sb = new StringBuilder(); + sb.append("Failed: HTTP error code: "); + sb.append(response.getStatus()); + sb.append(" ErrorMsg: ").append(response.getEntity(String.class)); + return sb.toString(); + } + return response.getEntity(String.class); + } catch (Exception e) { + LOG.error("Fail to get containers {}", e); + } + return null; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/webapp/ApiServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/webapp/ApiServer.java index 82fadae8bc3..cd6f0d79e2d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/webapp/ApiServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/webapp/ApiServer.java @@ -44,14 +44,7 @@ import org.slf4j.LoggerFactory; import javax.servlet.http.HttpServletRequest; -import javax.ws.rs.Consumes; -import javax.ws.rs.DELETE; -import javax.ws.rs.GET; -import javax.ws.rs.POST; -import javax.ws.rs.PUT; -import javax.ws.rs.Path; -import javax.ws.rs.PathParam; -import javax.ws.rs.Produces; +import javax.ws.rs.*; import javax.ws.rs.core.Context; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; @@ -61,13 +54,7 @@ import java.io.IOException; import java.lang.reflect.UndeclaredThrowableException; import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import static org.apache.hadoop.yarn.service.api.records.ServiceState.ACCEPTED; @@ -453,7 +440,8 @@ public Response updateService(@Context HttpServletRequest request, if (updateServiceData.getState() != null && ( updateServiceData.getState() == ServiceState.UPGRADING || updateServiceData.getState() == - ServiceState.UPGRADING_AUTO_FINALIZE)) { + ServiceState.UPGRADING_AUTO_FINALIZE) || + updateServiceData.getState() == ServiceState.EXPRESS_UPGRADING) { return upgradeService(updateServiceData, ugi); } @@ -582,6 +570,40 @@ public Response updateComponentInstances(@Context HttpServletRequest request, return Response.status(Status.NO_CONTENT).build(); } + @GET + @Path(COMP_INSTANCES_PATH) + @Produces({RestApiConstants.MEDIA_TYPE_JSON_UTF8}) + public Response getComponentInstances(@Context HttpServletRequest request, + @PathParam(SERVICE_NAME) String serviceName, + @QueryParam(PARAM_COMP_NAME) List componentNames, + @QueryParam(PARAM_VERSION) String version, + @QueryParam(PARAM_CONTAINER_STATE) List containerStates) { + try { + UserGroupInformation ugi = getProxyUser(request); + LOG.info("GET: component instances for service = {}, compNames in {}, " + + "version = {}, containerStates in {}, user = {}", serviceName, + Objects.toString(componentNames, "[]"), Objects.toString(version, ""), + Objects.toString(containerStates, "[]"), ugi); + + List containerStatesDe = containerStates.stream().map( + ContainerState::valueOf).collect(Collectors.toList()); + + return Response.ok(getContainers(ugi, serviceName, componentNames, + version, containerStatesDe)).build(); + } catch (IllegalArgumentException iae) { + return formatResponse(Status.BAD_REQUEST, "valid container states are: " + + Arrays.toString(ContainerState.values())); + } catch (AccessControlException e) { + return formatResponse(Response.Status.FORBIDDEN, e.getMessage()); + } catch (IOException | InterruptedException e) { + return formatResponse(Response.Status.INTERNAL_SERVER_ERROR, + e.getMessage()); + } catch (UndeclaredThrowableException e) { + return formatResponse(Response.Status.INTERNAL_SERVER_ERROR, + e.getCause().getMessage()); + } + } + private Response flexService(Service service, UserGroupInformation ugi) throws IOException, InterruptedException { String appName = service.getName(); @@ -669,7 +691,11 @@ private Response upgradeService(Service service, ServiceClient sc = getServiceClient(); sc.init(YARN_CONFIG); sc.start(); - sc.initiateUpgrade(service); + if (service.getState().equals(ServiceState.EXPRESS_UPGRADING)) { + sc.actionUpgradeExpress(service); + } else { + sc.initiateUpgrade(service); + } sc.close(); return null; }); @@ -685,7 +711,8 @@ private Response processComponentsUpgrade(UserGroupInformation ugi, String serviceName, Set compNames) throws YarnException, IOException, InterruptedException { Service service = getServiceFromClient(ugi, serviceName); - if (service.getState() != ServiceState.UPGRADING) { + if (!service.getState().equals(ServiceState.UPGRADING) && + !service.getState().equals(ServiceState.UPGRADING_AUTO_FINALIZE)) { throw new YarnException( String.format("The upgrade of service %s has not been initiated.", service.getName())); @@ -752,6 +779,22 @@ private Service getServiceFromClient(UserGroupInformation ugi, }); } + private Container[] getContainers(UserGroupInformation ugi, + String serviceName, List componentNames, String version, + List containerStates) throws IOException, + InterruptedException { + return ugi.doAs((PrivilegedExceptionAction) () -> { + Container[] result; + ServiceClient sc = getServiceClient(); + sc.init(YARN_CONFIG); + sc.start(); + result = sc.getContainers(serviceName, componentNames, version, + containerStates); + sc.close(); + return result; + }); + } + /** * Used by negative test case. * diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/test/java/org/apache/hadoop/yarn/service/client/TestSecureApiServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/test/java/org/apache/hadoop/yarn/service/client/TestSecureApiServiceClient.java new file mode 100644 index 00000000000..4f3b46189fa --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/src/test/java/org/apache/hadoop/yarn/service/client/TestSecureApiServiceClient.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.service.client; + +import static org.junit.Assert.*; + +import java.io.File; + +import javax.security.sasl.Sasl; + +import java.util.Map; +import java.util.HashMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.minikdc.KerberosSecurityTestcase; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.SaslRpcServer.QualityOfProtection; +import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; +import org.junit.Before; +import org.junit.Test; + +/** + * Test Spnego Client Login. + */ +public class TestSecureApiServiceClient extends KerberosSecurityTestcase { + + private String clientPrincipal = "client"; + + private String server1Protocol = "HTTP"; + + private String server2Protocol = "server2"; + + private String host = "localhost"; + + private String server1Principal = server1Protocol + "/" + host; + + private String server2Principal = server2Protocol + "/" + host; + + private File keytabFile; + + private Configuration conf = new Configuration(); + + private Map props; + + @Before + public void setUp() throws Exception { + keytabFile = new File(getWorkDir(), "keytab"); + getKdc().createPrincipal(keytabFile, clientPrincipal, server1Principal, + server2Principal); + SecurityUtil.setAuthenticationMethod(AuthenticationMethod.KERBEROS, conf); + UserGroupInformation.setConfiguration(conf); + UserGroupInformation.setShouldRenewImmediatelyForTests(true); + props = new HashMap(); + props.put(Sasl.QOP, QualityOfProtection.AUTHENTICATION.saslQop); + } + + @Test + public void testHttpSpnegoChallenge() throws Exception { + UserGroupInformation.loginUserFromKeytab(clientPrincipal, keytabFile + .getCanonicalPath()); + ApiServiceClient asc = new ApiServiceClient(); + String challenge = asc.generateToken("localhost"); + assertNotNull(challenge); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMProtocol.java index 45ff98ac57d..652a314abef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMProtocol.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMProtocol.java @@ -23,6 +23,8 @@ import org.apache.hadoop.yarn.proto.ClientAMProtocol.CompInstancesUpgradeRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.FlexComponentsRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.FlexComponentsResponseProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesRequestProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.RestartServiceRequestProto; @@ -55,4 +57,7 @@ RestartServiceResponseProto restart(RestartServiceRequestProto request) CompInstancesUpgradeResponseProto upgrade( CompInstancesUpgradeRequestProto request) throws IOException, YarnException; + + GetCompInstancesResponseProto getCompInstances( + GetCompInstancesRequestProto request) throws IOException, YarnException; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMService.java index e97c3d64ab6..2ef8f7ee7b7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ClientAMService.java @@ -35,6 +35,8 @@ import org.apache.hadoop.yarn.proto.ClientAMProtocol.ComponentCountProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.FlexComponentsRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.FlexComponentsResponseProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesRequestProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.RestartServiceRequestProto; @@ -43,15 +45,18 @@ import org.apache.hadoop.yarn.proto.ClientAMProtocol.StopResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.UpgradeServiceRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.UpgradeServiceResponseProto; +import org.apache.hadoop.yarn.service.api.records.Container; import org.apache.hadoop.yarn.service.component.ComponentEvent; import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEvent; import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType; +import org.apache.hadoop.yarn.service.utils.FilterUtils; import org.apache.hadoop.yarn.service.utils.ServiceApiUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.InetSocketAddress; +import java.util.List; import static org.apache.hadoop.yarn.service.component.ComponentEventType.FLEX; @@ -161,7 +166,7 @@ public UpgradeServiceResponseProto upgrade( LOG.info("Upgrading service to version {} by {}", request.getVersion(), UserGroupInformation.getCurrentUser()); context.getServiceManager().processUpgradeRequest(request.getVersion(), - request.getAutoFinalize()); + request.getAutoFinalize(), request.getExpressUpgrade()); return UpgradeServiceResponseProto.newBuilder().build(); } catch (Exception ex) { return UpgradeServiceResponseProto.newBuilder().setError(ex.getMessage()) @@ -194,4 +199,13 @@ public CompInstancesUpgradeResponseProto upgrade( } return CompInstancesUpgradeResponseProto.newBuilder().build(); } + + @Override + public GetCompInstancesResponseProto getCompInstances( + GetCompInstancesRequestProto request) throws IOException { + List containers = FilterUtils.filterInstances(context, request); + return GetCompInstancesResponseProto.newBuilder().setCompInstances( + ServiceApiUtil.CONTAINER_JSON_SERDE.toJson(containers.toArray( + new Container[containers.size()]))).build(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceEvent.java index 0196be2a989..3a55472c0c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceEvent.java @@ -19,6 +19,9 @@ package org.apache.hadoop.yarn.service; import org.apache.hadoop.yarn.event.AbstractEvent; +import org.apache.hadoop.yarn.service.api.records.Component; + +import java.util.Queue; /** * Events are handled by {@link ServiceManager} to manage the service @@ -29,6 +32,8 @@ private final ServiceEventType type; private String version; private boolean autoFinalize; + private boolean expressUpgrade; + private Queue compsToUpgradeInOrder; public ServiceEvent(ServiceEventType serviceEventType) { super(serviceEventType); @@ -56,4 +61,24 @@ public ServiceEvent setAutoFinalize(boolean autoFinalize) { this.autoFinalize = autoFinalize; return this; } + + public boolean isExpressUpgrade() { + return expressUpgrade; + } + + public ServiceEvent setExpressUpgrade(boolean expressUpgrade) { + this.expressUpgrade = expressUpgrade; + return this; + } + + public Queue getCompsToUpgradeInOrder() { + return compsToUpgradeInOrder; + } + + public ServiceEvent setCompsToUpgradeInOrder( + Queue compsToUpgradeInOrder) { + this.compsToUpgradeInOrder = compsToUpgradeInOrder; + return this; + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java index 05ecb3fc9be..04454b1d290 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceManager.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.service.api.records.ComponentState; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.api.records.ServiceState; import org.apache.hadoop.yarn.service.component.Component; @@ -40,8 +41,11 @@ import java.io.IOException; import java.text.MessageFormat; import java.util.EnumSet; +import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Queue; import java.util.concurrent.locks.ReentrantReadWriteLock; import static org.apache.hadoop.yarn.service.utils.ServiceApiUtil.jsonSerDeser; @@ -67,6 +71,8 @@ private final SliderFileSystem fs; private String upgradeVersion; + private Queue compsToUpgradeInOrder; private static final StateMachineFactory STATE_MACHINE_FACTORY = @@ -141,14 +147,20 @@ private State getState() { @Override public State transition(ServiceManager serviceManager, ServiceEvent event) { + serviceManager.upgradeVersion = event.getVersion(); try { - if (!event.isAutoFinalize()) { - serviceManager.serviceSpec.setState(ServiceState.UPGRADING); + if (event.isExpressUpgrade()) { + serviceManager.serviceSpec.setState(ServiceState.EXPRESS_UPGRADING); + serviceManager.compsToUpgradeInOrder = event + .getCompsToUpgradeInOrder(); + serviceManager.upgradeNextCompIfAny(); + } else if (event.isAutoFinalize()) { + serviceManager.serviceSpec.setState(ServiceState + .UPGRADING_AUTO_FINALIZE); } else { serviceManager.serviceSpec.setState( - ServiceState.UPGRADING_AUTO_FINALIZE); + ServiceState.UPGRADING); } - serviceManager.upgradeVersion = event.getVersion(); return State.UPGRADING; } catch (Throwable e) { LOG.error("[SERVICE]: Upgrade to version {} failed", event.getVersion(), @@ -169,8 +181,19 @@ public State transition(ServiceManager serviceManager, if (currState.equals(ServiceState.STABLE)) { return State.STABLE; } + if (currState.equals(ServiceState.EXPRESS_UPGRADING)) { + org.apache.hadoop.yarn.service.api.records.Component component = + serviceManager.compsToUpgradeInOrder.peek(); + if (!component.getState().equals(ComponentState.NEEDS_UPGRADE) && + !component.getState().equals(ComponentState.UPGRADING)) { + serviceManager.compsToUpgradeInOrder.remove(); + } + serviceManager.upgradeNextCompIfAny(); + } if (currState.equals(ServiceState.UPGRADING_AUTO_FINALIZE) || - event.getType().equals(ServiceEventType.START)) { + event.getType().equals(ServiceEventType.START) || + (currState.equals(ServiceState.EXPRESS_UPGRADING) && + serviceManager.compsToUpgradeInOrder.isEmpty())) { ServiceState targetState = checkIfStable(serviceManager.serviceSpec); if (targetState.equals(ServiceState.STABLE)) { if (serviceManager.finalizeUpgrade()) { @@ -184,6 +207,19 @@ public State transition(ServiceManager serviceManager, } } + private void upgradeNextCompIfAny() { + if (!compsToUpgradeInOrder.isEmpty()) { + org.apache.hadoop.yarn.service.api.records.Component component = + compsToUpgradeInOrder.peek(); + + ComponentEvent needUpgradeEvent = new ComponentEvent( + component.getName(), ComponentEventType.UPGRADE).setTargetSpec( + component).setUpgradeVersion(upgradeVersion).setExpressUpgrade(true); + context.scheduler.getDispatcher().getEventHandler().handle( + needUpgradeEvent); + } + } + /** * @return whether finalization of upgrade was successful. */ @@ -250,23 +286,18 @@ public void checkAndUpdateServiceState() { } void processUpgradeRequest(String upgradeVersion, - boolean autoFinalize) throws IOException { + boolean autoFinalize, boolean expressUpgrade) throws IOException { Service targetSpec = ServiceApiUtil.loadServiceUpgrade( context.fs, context.service.getName(), upgradeVersion); List - compsThatNeedUpgrade = componentsFinder. + compsNeedUpgradeList = componentsFinder. findTargetComponentSpecs(context.service, targetSpec); - ServiceEvent event = new ServiceEvent(ServiceEventType.UPGRADE) - .setVersion(upgradeVersion) - .setAutoFinalize(autoFinalize); - context.scheduler.getDispatcher().getEventHandler().handle(event); - if (compsThatNeedUpgrade != null && !compsThatNeedUpgrade.isEmpty()) { - if (autoFinalize) { - event.setAutoFinalize(true); - } - compsThatNeedUpgrade.forEach(component -> { + // remove all components from need upgrade list if there restart policy + // doesn't all upgrade. + if (compsNeedUpgradeList != null) { + compsNeedUpgradeList.removeIf(component -> { org.apache.hadoop.yarn.service.api.records.Component.RestartPolicyEnum restartPolicy = component.getRestartPolicy(); @@ -274,25 +305,65 @@ void processUpgradeRequest(String upgradeVersion, Component.getRestartPolicyHandler(restartPolicy); // Do not allow upgrades for components which have NEVER/ON_FAILURE // restart policy - if (restartPolicyHandler.allowUpgrades()) { + if (!restartPolicyHandler.allowUpgrades()) { + LOG.info("The component {} has a restart policy that doesnt " + + "allow upgrades {} ", component.getName(), + component.getRestartPolicy().toString()); + return true; + } + + return false; + }); + } + + ServiceEvent event = new ServiceEvent(ServiceEventType.UPGRADE) + .setVersion(upgradeVersion) + .setAutoFinalize(autoFinalize) + .setExpressUpgrade(expressUpgrade); + + if (expressUpgrade) { + // In case of express upgrade components need to be upgraded in order. + // Once the service manager gets notified that a component finished + // upgrading, it then issues event to upgrade the next component. + Map + compsNeedUpgradeByName = new HashMap<>(); + if (compsNeedUpgradeList != null) { + compsNeedUpgradeList.forEach(component -> + compsNeedUpgradeByName.put(component.getName(), component)); + } + List resolvedComps = ServiceApiUtil + .resolveCompsDependency(targetSpec); + + Queue + orderedCompUpgrade = new LinkedList<>(); + resolvedComps.forEach(compName -> { + org.apache.hadoop.yarn.service.api.records.Component component = + compsNeedUpgradeByName.get(compName); + if (component != null ) { + orderedCompUpgrade.add(component); + } + }); + event.setCompsToUpgradeInOrder(orderedCompUpgrade); + } + + context.scheduler.getDispatcher().getEventHandler().handle(event); + + if (compsNeedUpgradeList != null && !compsNeedUpgradeList.isEmpty()) { + if (!expressUpgrade) { + compsNeedUpgradeList.forEach(component -> { ComponentEvent needUpgradeEvent = new ComponentEvent( component.getName(), ComponentEventType.UPGRADE).setTargetSpec( component).setUpgradeVersion(event.getVersion()); context.scheduler.getDispatcher().getEventHandler().handle( needUpgradeEvent); - } else { - LOG.info("The component {} has a restart " - + "policy that doesnt allow upgrades {} ", component.getName(), - component.getRestartPolicy().toString()); - } - }); - } else { + + }); + } + } else if (autoFinalize) { // nothing to upgrade if upgrade auto finalize is requested, trigger a // state check. - if (autoFinalize) { - context.scheduler.getDispatcher().getEventHandler().handle( - new ServiceEvent(ServiceEventType.CHECK_STABLE)); - } + context.scheduler.getDispatcher().getEventHandler().handle( + new ServiceEvent(ServiceEventType.CHECK_STABLE)); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java index d3e8e4f33a6..b49ef2ad923 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java @@ -59,6 +59,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.service.api.ServiceApiConstants; +import org.apache.hadoop.yarn.service.api.records.ContainerState; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.api.records.ServiceState; import org.apache.hadoop.yarn.service.api.records.ConfigFile; @@ -80,6 +81,8 @@ import org.apache.hadoop.yarn.service.utils.ServiceRegistryUtils; import org.apache.hadoop.yarn.service.utils.ServiceUtils; import org.apache.hadoop.yarn.util.BoundedAppender; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -102,7 +105,8 @@ import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY; import static org.apache.hadoop.registry.client.api.RegistryConstants.*; -import static org.apache.hadoop.yarn.api.records.ContainerExitStatus.KILLED_AFTER_APP_COMPLETION; +import static org.apache.hadoop.yarn.api.records.ContainerExitStatus + .KILLED_AFTER_APP_COMPLETION; import static org.apache.hadoop.yarn.service.api.ServiceApiConstants.*; import static org.apache.hadoop.yarn.service.component.ComponentEventType.*; import static org.apache.hadoop.yarn.service.exceptions.LauncherExitCodes @@ -137,6 +141,8 @@ private ServiceTimelinePublisher serviceTimelinePublisher; + private boolean timelineServiceEnabled; + // Global diagnostics that will be reported to RM on eRxit. // The unit the number of characters. This will be limited to 64 * 1024 // characters. @@ -169,6 +175,8 @@ private volatile FinalApplicationStatus finalApplicationStatus = FinalApplicationStatus.ENDED; + private Clock systemClock; + // For unit test override since we don't want to terminate UT process. private ServiceUtils.ProcessTerminationHandler terminationHandler = new ServiceUtils.ProcessTerminationHandler(); @@ -176,6 +184,8 @@ public ServiceScheduler(ServiceContext context) { super(context.getService().getName()); this.context = context; + this.app = context.getService(); + this.systemClock = SystemClock.getInstance(); } public void buildInstance(ServiceContext context, Configuration configuration) @@ -219,7 +229,7 @@ public void buildInstance(ServiceContext context, Configuration configuration) nmClient.getClient().cleanupRunningContainersOnStop(false); addIfService(nmClient); - dispatcher = new AsyncDispatcher("Component dispatcher"); + dispatcher = createAsyncDispatcher(); dispatcher.register(ServiceEventType.class, new ServiceEventHandler()); dispatcher.register(ComponentEventType.class, new ComponentEventHandler()); @@ -253,6 +263,15 @@ public void buildInstance(ServiceContext context, Configuration configuration) YarnServiceConf.CONTAINER_RECOVERY_TIMEOUT_MS, YarnServiceConf.DEFAULT_CONTAINER_RECOVERY_TIMEOUT_MS, app.getConfiguration(), getConfig()); + + if (YarnConfiguration + .timelineServiceV2Enabled(getConfig())) { + timelineServiceEnabled = true; + } + + serviceManager = createServiceManager(); + context.setServiceManager(serviceManager); + } protected YarnRegistryViewForProviders createYarnRegistryOperations( @@ -262,6 +281,14 @@ protected YarnRegistryViewForProviders createYarnRegistryOperations( context.attemptId); } + protected ServiceManager createServiceManager() { + return new ServiceManager(context); + } + + protected AsyncDispatcher createAsyncDispatcher() { + return new AsyncDispatcher("Component dispatcher"); + } + protected NMClientAsync createNMClient() { return NMClientAsync.createNMClientAsync(new NMClientCallback()); } @@ -300,21 +327,38 @@ public void serviceStop() throws Exception { // only stop the entire service when a graceful stop has been initiated // (e.g. via client RPC, not through the AM receiving a SIGTERM) if (gracefulStop) { + if (YarnConfiguration.timelineServiceV2Enabled(getConfig())) { - // mark component-instances/containers as STOPPED - for (ContainerId containerId : getLiveInstances().keySet()) { - serviceTimelinePublisher.componentInstanceFinished(containerId, - KILLED_AFTER_APP_COMPLETION, diagnostics.toString()); + + // mark other component-instances/containers as STOPPED + final Map liveInst = + getLiveInstances(); + for (Map.Entry instance : liveInst + .entrySet()) { + if (!ComponentInstance.isFinalState( + instance.getValue().getContainerSpec().getState())) { + LOG.info("{} Component instance state changed from {} to {}", + instance.getValue().getCompInstanceName(), + instance.getValue().getContainerSpec().getState(), + ContainerState.STOPPED); + serviceTimelinePublisher.componentInstanceFinished( + instance.getKey(), KILLED_AFTER_APP_COMPLETION, + ContainerState.STOPPED, getDiagnostics().toString()); + } } + + LOG.info("Service state changed to {}", finalApplicationStatus); // mark attempt as unregistered - serviceTimelinePublisher - .serviceAttemptUnregistered(context, diagnostics.toString()); + serviceTimelinePublisher.serviceAttemptUnregistered(context, + finalApplicationStatus, diagnostics.toString()); } + // unregister AM - amRMClient.unregisterApplicationMaster(FinalApplicationStatus.ENDED, + amRMClient.unregisterApplicationMaster(finalApplicationStatus, diagnostics.toString(), ""); - LOG.info("Service {} unregistered with RM, with attemptId = {} " + - ", diagnostics = {} ", app.getName(), context.attemptId, diagnostics); + LOG.info("Service {} unregistered with RM, with attemptId = {} " + + ", diagnostics = {} ", app.getName(), context.attemptId, + diagnostics); } super.serviceStop(); } @@ -344,8 +388,6 @@ public void serviceStart() throws Exception { // Since AM has been started and registered, the service is in STARTED state app.setState(ServiceState.STARTED); - serviceManager = new ServiceManager(context); - context.setServiceManager(serviceManager); // recover components based on containers sent from RM recoverComponents(response); @@ -649,6 +691,7 @@ public void onContainersAllocated(List containers) { @Override public void onContainersReceivedFromPreviousAttempts( List containers) { + LOG.info("Containers recovered after AM registered: {}", containers); if (containers == null || containers.isEmpty()) { return; } @@ -687,7 +730,8 @@ public void onContainersCompleted(List statuses) { } ComponentEvent event = new ComponentEvent(instance.getCompName(), CONTAINER_COMPLETED) - .setStatus(status).setInstance(instance); + .setStatus(status).setInstance(instance) + .setContainerId(containerId); dispatcher.getEventHandler().handle(event); } } @@ -900,7 +944,7 @@ public boolean hasAtLeastOnePlacementConstraint() { * (which #failed-instances + #suceeded-instances = #total-n-containers) * The service will be terminated. */ - public synchronized void terminateServiceIfAllComponentsFinished() { + public void terminateServiceIfAllComponentsFinished() { boolean shouldTerminate = true; // Succeeded comps and failed comps, for logging purposes. @@ -909,7 +953,30 @@ public synchronized void terminateServiceIfAllComponentsFinished() { for (Component comp : getAllComponents().values()) { ComponentRestartPolicy restartPolicy = comp.getRestartPolicyHandler(); - if (!restartPolicy.shouldTerminate(comp)) { + + if (restartPolicy.shouldTerminate(comp)) { + if (restartPolicy.hasCompletedSuccessfully(comp)) { + comp.getComponentSpec().setState(org.apache.hadoop + .yarn.service.api.records.ComponentState.SUCCEEDED); + LOG.info("{} Component state changed from {} to {}", + comp.getName(), comp.getComponentSpec().getState(), + org.apache.hadoop + .yarn.service.api.records.ComponentState.SUCCEEDED); + } else { + comp.getComponentSpec().setState(org.apache.hadoop + .yarn.service.api.records.ComponentState.FAILED); + LOG.info("{} Component state changed from {} to {}", + comp.getName(), comp.getComponentSpec().getState(), + org.apache.hadoop + .yarn.service.api.records.ComponentState.FAILED); + } + + if (isTimelineServiceEnabled()) { + // record in ATS + serviceTimelinePublisher.componentFinished(comp.getComponentSpec(), + comp.getComponentSpec().getState(), systemClock.getTime()); + } + } else { shouldTerminate = false; break; } @@ -918,7 +985,7 @@ public synchronized void terminateServiceIfAllComponentsFinished() { if (nFailed > 0) { failedComponents.add(comp.getName()); - } else{ + } else { succeededComponents.add(comp.getName()); } } @@ -933,16 +1000,28 @@ public synchronized void terminateServiceIfAllComponentsFinished() { LOG.info("Failed components: [" + org.apache.commons.lang3.StringUtils .join(failedComponents, ",") + "]"); + int exitStatus = EXIT_SUCCESS; if (failedComponents.isEmpty()) { setGracefulStop(FinalApplicationStatus.SUCCEEDED); - getTerminationHandler().terminate(EXIT_SUCCESS); - } else{ + app.setState(ServiceState.SUCCEEDED); + } else { setGracefulStop(FinalApplicationStatus.FAILED); - getTerminationHandler().terminate(EXIT_FALSE); + app.setState(ServiceState.FAILED); + exitStatus = EXIT_FALSE; } + + getTerminationHandler().terminate(exitStatus); } } + public Clock getSystemClock() { + return systemClock; + } + + public boolean isTimelineServiceEnabled() { + return timelineServiceEnabled; + } + public ServiceUtils.ProcessTerminationHandler getTerminationHandler() { return terminationHandler; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Artifact.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Artifact.java index ce062cc8384..bba579183f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Artifact.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Artifact.java @@ -38,7 +38,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "Artifact of an service component") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") @JsonInclude(JsonInclude.Include.NON_NULL) public class Artifact implements Serializable { private static final long serialVersionUID = 3608929500111099035L; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Component.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Component.java index 0481123c197..9f64b290561 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Component.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Component.java @@ -49,7 +49,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "One or more components of the service. If the service is HBase say, then the component can be a simple role like master or regionserver. If the service is a complex business webapp then a component can be other services say Kafka or Storm. Thereby it opens up the support for complex and nested services.") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") @XmlRootElement @XmlAccessorType(XmlAccessType.FIELD) @JsonInclude(JsonInclude.Include.NON_NULL) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ComponentState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ComponentState.java index 3e7ed11a257..472f3749f70 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ComponentState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ComponentState.java @@ -26,5 +26,5 @@ @InterfaceStability.Unstable @ApiModel(description = "The current state of a component.") public enum ComponentState { - FLEXING, STABLE, NEEDS_UPGRADE, UPGRADING; + FLEXING, STABLE, NEEDS_UPGRADE, UPGRADING, SUCCEEDED, FAILED; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ConfigFile.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ConfigFile.java index 623feedb11f..cd9dc8460bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ConfigFile.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ConfigFile.java @@ -42,7 +42,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "A config file that needs to be created and made available as a volume in an service component container.") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") @XmlRootElement @JsonInclude(JsonInclude.Include.NON_NULL) public class ConfigFile implements Serializable { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Configuration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Configuration.java index 3ff020f24be..dedfb6db81f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Configuration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Configuration.java @@ -42,7 +42,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "Set of configuration properties that can be injected into the service components via envs, files and custom pluggable helper docker containers. Files of several standard formats like xml, properties, json, yaml and templates will be supported.") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") @JsonInclude(JsonInclude.Include.NON_NULL) public class Configuration implements Serializable { private static final long serialVersionUID = -4330788704981074466L; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Container.java index 1ffd85f1739..2f9af7c1d14 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Container.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Container.java @@ -37,7 +37,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "An instance of a running service container") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") @XmlRootElement @JsonInclude(JsonInclude.Include.NON_NULL) public class Container extends BaseResource { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ContainerState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ContainerState.java index 6e390737e70..cac527a5482 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ContainerState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ContainerState.java @@ -26,5 +26,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable public enum ContainerState { - RUNNING_BUT_UNREADY, READY, STOPPED, NEEDS_UPGRADE, UPGRADING; + RUNNING_BUT_UNREADY, READY, STOPPED, NEEDS_UPGRADE, UPGRADING, SUCCEEDED, + FAILED; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Error.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Error.java index c64b1b579ce..e6754ebf9c2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Error.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Error.java @@ -27,7 +27,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") public class Error { private Integer code = null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/KerberosPrincipal.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/KerberosPrincipal.java index e38fdb5d325..4bae74d311d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/KerberosPrincipal.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/KerberosPrincipal.java @@ -34,8 +34,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "The kerberos principal of the service.") -@javax.annotation.Generated(value = "io.swagger.codegen.languages" + - ".JavaClientCodegen", date = "2017-11-20T11:29:11.785-08:00") @JsonInclude(JsonInclude.Include.NON_NULL) public class KerberosPrincipal implements Serializable { private static final long serialVersionUID = -6431667195287650037L; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementConstraint.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementConstraint.java index 5eaf5e80ccb..1c84c02e2e6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementConstraint.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementConstraint.java @@ -40,9 +40,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "Placement constraint details.") -@javax.annotation.Generated( - value = "class io.swagger.codegen.languages.JavaClientCodegen", - date = "2018-02-16T10:20:12.927-07:00") public class PlacementConstraint implements Serializable { private static final long serialVersionUID = 1518017165676511762L; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementPolicy.java index a9824bfcec9..799e6eb86b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementPolicy.java @@ -37,9 +37,6 @@ @InterfaceStability.Unstable @ApiModel(description = "Advanced placement policy of the components of a " + "service.") -@javax.annotation.Generated( - value = "class io.swagger.codegen.languages.JavaClientCodegen", - date = "2018-02-16T10:20:12.927-07:00") public class PlacementPolicy implements Serializable { private static final long serialVersionUID = 4341110649551172231L; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementScope.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementScope.java index 0da19b77a8f..01b1d5dedf3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementScope.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementScope.java @@ -32,9 +32,6 @@ @InterfaceStability.Unstable @ApiModel(description = "The scope of placement for the containers of a " + "component.") -@javax.annotation.Generated( - value = "class io.swagger.codegen.languages.JavaClientCodegen", - date = "2018-02-16T10:20:12.927-07:00") public enum PlacementScope { NODE(PlacementConstraints.NODE), RACK(PlacementConstraints.RACK); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementType.java index 1b155ab9a11..6dfe935fc53 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/PlacementType.java @@ -30,9 +30,6 @@ @ApiModel(description = "The type of placement - affinity/anti-affinity/" + "affinity-with-cardinality with containers of another component or " + "containers of the same component (self).") -@javax.annotation.Generated( - value = "class io.swagger.codegen.languages.JavaClientCodegen", - date = "2018-02-16T10:20:12.927-07:00") public enum PlacementType { AFFINITY, ANTI_AFFINITY, AFFINITY_WITH_CARDINALITY; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ReadinessCheck.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ReadinessCheck.java index 0665cb53fad..1143a8f2e0e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ReadinessCheck.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ReadinessCheck.java @@ -47,7 +47,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "A custom command or a pluggable helper container to determine the readiness of a container of a component. Readiness for every service is different. Hence the need for a simple interface, with scope to support advanced usecases.") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") @XmlRootElement @XmlAccessorType(XmlAccessType.FIELD) @JsonInclude(JsonInclude.Include.NON_NULL) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Resource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Resource.java index f1c0852cecc..67f55fc61a1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Resource.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Resource.java @@ -40,7 +40,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "Resource determines the amount of resources (vcores, memory, network, etc.) usable by a container. This field determines the resource to be applied for all the containers of a component or service. The resource specified at the service (or global) level can be overriden at the component level. Only one of profile OR cpu & memory are expected. It raises a validation exception otherwise.") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") public class Resource extends BaseResource implements Cloneable { private static final long serialVersionUID = -6431667797380250037L; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ResourceInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ResourceInformation.java index f39b11adc38..103fffb6f75 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ResourceInformation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ResourceInformation.java @@ -28,8 +28,6 @@ * ResourceInformation determines unit/name/value of resource types in addition to memory and vcores. It will be part of Resource object */ @ApiModel(description = "ResourceInformation determines unit/value of resource types in addition to memory and vcores. It will be part of Resource object") -@javax.annotation.Generated(value = "io.swagger.codegen.languages.JavaClientCodegen", - date = "2017-11-22T15:15:49.495-08:00") public class ResourceInformation { @SerializedName("value") private Long value = null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Service.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Service.java index 22beff44db8..57d1a1aa4ab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Service.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/Service.java @@ -43,7 +43,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "An Service resource has the following attributes.") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") @XmlRootElement @XmlAccessorType(XmlAccessType.FIELD) @JsonInclude(JsonInclude.Include.NON_NULL) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ServiceState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ServiceState.java index b6ae38bdeee..49c19853671 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ServiceState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ServiceState.java @@ -27,8 +27,7 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "The current state of an service.") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") public enum ServiceState { ACCEPTED, STARTED, STABLE, STOPPED, FAILED, FLEX, UPGRADING, - UPGRADING_AUTO_FINALIZE; + UPGRADING_AUTO_FINALIZE, EXPRESS_UPGRADING, SUCCEEDED; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ServiceStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ServiceStatus.java index f9c81909634..c1bb73a3f4e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ServiceStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/api/records/ServiceStatus.java @@ -36,7 +36,6 @@ @InterfaceAudience.Public @InterfaceStability.Unstable @ApiModel(description = "The current status of a submitted service, returned as a response to the GET API.") -@javax.annotation.Generated(value = "class io.swagger.codegen.languages.JavaClientCodegen", date = "2016-06-02T08:15:05.615-07:00") @XmlRootElement @JsonInclude(JsonInclude.Include.NON_NULL) public class ServiceStatus extends BaseResource { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java index 699a4e508cb..a27ed87aa63 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.service.client; import com.google.common.annotations.VisibleForTesting; + import org.apache.commons.lang3.StringUtils; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFrameworkFactory; @@ -57,6 +58,8 @@ import org.apache.hadoop.yarn.proto.ClientAMProtocol.CompInstancesUpgradeRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.ComponentCountProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.FlexComponentsRequestProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesRequestProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.RestartServiceRequestProto; @@ -66,6 +69,7 @@ import org.apache.hadoop.yarn.service.ClientAMProtocol; import org.apache.hadoop.yarn.service.ServiceMaster; import org.apache.hadoop.yarn.service.api.records.Container; +import org.apache.hadoop.yarn.service.api.records.ContainerState; import org.apache.hadoop.yarn.service.api.records.Component; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.api.records.ServiceState; @@ -100,6 +104,7 @@ import java.text.MessageFormat; import java.util.*; import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; import static org.apache.hadoop.yarn.api.records.YarnApplicationState.*; import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.*; @@ -211,49 +216,32 @@ public int actionBuild(Service service) return EXIT_SUCCESS; } - @Override - public int initiateUpgrade(String appName, String fileName, - boolean autoFinalize) - throws IOException, YarnException { - Service upgradeService = loadAppJsonFromLocalFS(fileName, appName, - null, null); - if (autoFinalize) { - upgradeService.setState(ServiceState.UPGRADING_AUTO_FINALIZE); - } else { - upgradeService.setState(ServiceState.UPGRADING); - } - return initiateUpgrade(upgradeService); - } - - public int initiateUpgrade(Service service) throws YarnException, - IOException { + private ApplicationReport upgradePrecheck(Service service) + throws YarnException, IOException { boolean upgradeEnabled = getConfig().getBoolean( - YARN_SERVICE_UPGRADE_ENABLED, - YARN_SERVICE_UPGRADE_ENABLED_DEFAULT); + YARN_SERVICE_UPGRADE_ENABLED, YARN_SERVICE_UPGRADE_ENABLED_DEFAULT); if (!upgradeEnabled) { throw new YarnException(ErrorStrings.SERVICE_UPGRADE_DISABLED); } - Service persistedService = - ServiceApiUtil.loadService(fs, service.getName()); + Service persistedService = ServiceApiUtil.loadService(fs, + service.getName()); if (!StringUtils.isEmpty(persistedService.getId())) { - cachedAppInfo.put(persistedService.getName(), new AppInfo( - ApplicationId.fromString(persistedService.getId()), - persistedService.getKerberosPrincipal().getPrincipalName())); + cachedAppInfo.put(persistedService.getName(), + new AppInfo(ApplicationId.fromString(persistedService.getId()), + persistedService.getKerberosPrincipal().getPrincipalName())); } if (persistedService.getVersion().equals(service.getVersion())) { - String message = - service.getName() + " is already at version " + service.getVersion() - + ". There is nothing to upgrade."; + String message = service.getName() + " is already at version " + + service.getVersion() + ". There is nothing to upgrade."; LOG.error(message); throw new YarnException(message); } Service liveService = getStatus(service.getName()); if (!liveService.getState().equals(ServiceState.STABLE)) { - String message = service.getName() + " is at " + - liveService.getState() - + " state, upgrade can not be invoked when service is STABLE."; + String message = service.getName() + " is at " + liveService.getState() + + " state and upgrade can only be initiated when service is STABLE."; LOG.error(message); throw new YarnException(message); } @@ -262,11 +250,67 @@ public int initiateUpgrade(Service service) throws YarnException, ServiceApiUtil.validateAndResolveService(service, fs, getConfig()); ServiceApiUtil.createDirAndPersistApp(fs, serviceUpgradeDir, service); - ApplicationReport appReport = - yarnClient.getApplicationReport(getAppId(service.getName())); + ApplicationReport appReport = yarnClient + .getApplicationReport(getAppId(service.getName())); if (StringUtils.isEmpty(appReport.getHost())) { throw new YarnException(service.getName() + " AM hostname is empty"); } + return appReport; + } + + @Override + public int actionUpgradeExpress(String appName, File path) + throws IOException, YarnException { + Service service = + loadAppJsonFromLocalFS(path.getAbsolutePath(), appName, null, null); + service.setState(ServiceState.UPGRADING_AUTO_FINALIZE); + actionUpgradeExpress(service); + return EXIT_SUCCESS; + } + + public int actionUpgradeExpress(Service service) throws YarnException, + IOException { + ApplicationReport appReport = upgradePrecheck(service); + ClientAMProtocol proxy = createAMProxy(service.getName(), appReport); + UpgradeServiceRequestProto.Builder requestBuilder = + UpgradeServiceRequestProto.newBuilder(); + requestBuilder.setVersion(service.getVersion()); + if (service.getState().equals(ServiceState.UPGRADING_AUTO_FINALIZE)) { + requestBuilder.setAutoFinalize(true); + } + if (service.getState().equals(ServiceState.EXPRESS_UPGRADING)) { + requestBuilder.setExpressUpgrade(true); + requestBuilder.setAutoFinalize(true); + } + UpgradeServiceResponseProto responseProto = proxy.upgrade( + requestBuilder.build()); + if (responseProto.hasError()) { + LOG.error("Service {} express upgrade to version {} failed because {}", + service.getName(), service.getVersion(), responseProto.getError()); + throw new YarnException("Failed to express upgrade service " + + service.getName() + " to version " + service.getVersion() + + " because " + responseProto.getError()); + } + return EXIT_SUCCESS; + } + + @Override + public int initiateUpgrade(String appName, String fileName, + boolean autoFinalize) + throws IOException, YarnException { + Service upgradeService = loadAppJsonFromLocalFS(fileName, appName, + null, null); + if (autoFinalize) { + upgradeService.setState(ServiceState.UPGRADING_AUTO_FINALIZE); + } else { + upgradeService.setState(ServiceState.UPGRADING); + } + return initiateUpgrade(upgradeService); + } + + public int initiateUpgrade(Service service) throws YarnException, + IOException { + ApplicationReport appReport = upgradePrecheck(service); ClientAMProtocol proxy = createAMProxy(service.getName(), appReport); UpgradeServiceRequestProto.Builder requestBuilder = @@ -318,6 +362,49 @@ public int actionCleanUp(String appName, String userName) throws } } + @Override + public String getInstances(String appName, + List components, String version, List containerStates) + throws IOException, YarnException { + GetCompInstancesResponseProto result = filterContainers(appName, components, + version, containerStates); + return result.getCompInstances(); + } + + public Container[] getContainers(String appName, List components, + String version, List containerStates) + throws IOException, YarnException { + GetCompInstancesResponseProto result = filterContainers(appName, components, + version, containerStates != null ? containerStates.stream() + .map(Enum::toString).collect(Collectors.toList()) : null); + + return ServiceApiUtil.CONTAINER_JSON_SERDE.fromJson( + result.getCompInstances()); + } + + private GetCompInstancesResponseProto filterContainers(String appName, + List components, String version, + List containerStates) throws IOException, YarnException { + ApplicationReport appReport = yarnClient.getApplicationReport(getAppId( + appName)); + if (StringUtils.isEmpty(appReport.getHost())) { + throw new YarnException(appName + " AM hostname is empty."); + } + ClientAMProtocol proxy = createAMProxy(appName, appReport); + GetCompInstancesRequestProto.Builder req = GetCompInstancesRequestProto + .newBuilder(); + if (components != null && !components.isEmpty()) { + req.addAllComponentNames(components); + } + if (version != null) { + req.setVersion(version); + } + if (containerStates != null && !containerStates.isEmpty()){ + req.addAllContainerStates(containerStates); + } + return proxy.getCompInstances(req.build()); + } + public int actionUpgrade(Service service, List compInstances) throws IOException, YarnException { ApplicationReport appReport = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/AlwaysRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/AlwaysRestartPolicy.java index 704ab14d0ec..505120d8c25 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/AlwaysRestartPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/AlwaysRestartPolicy.java @@ -79,4 +79,9 @@ public static AlwaysRestartPolicy getInstance() { @Override public boolean shouldTerminate(Component component) { return false; } + + @Override public boolean allowContainerRetriesForInstance( + ComponentInstance componentInstance) { + return true; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java index a1ee7964b83..acf3404fe93 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/Component.java @@ -19,7 +19,9 @@ package org.apache.hadoop.yarn.service.component; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ExecutionType; import static org.apache.hadoop.yarn.service.api.records.Component @@ -42,6 +44,7 @@ import org.apache.hadoop.yarn.service.api.records.ContainerState; import org.apache.hadoop.yarn.service.api.records.ResourceInformation; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType; import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceId; import org.apache.hadoop.yarn.service.ContainerFailureTracker; import org.apache.hadoop.yarn.service.ServiceContext; @@ -518,10 +521,10 @@ private static synchronized void checkAndUpdateComponentState( private static class ContainerCompletedTransition extends BaseTransition { @Override public void transition(Component component, ComponentEvent event) { - + Preconditions.checkNotNull(event.getContainerId()); component.updateMetrics(event.getStatus()); component.dispatcher.getEventHandler().handle( - new ComponentInstanceEvent(event.getStatus().getContainerId(), STOP) + new ComponentInstanceEvent(event.getContainerId(), STOP) .setStatus(event.getStatus())); ComponentRestartPolicy restartPolicy = @@ -545,13 +548,21 @@ public void transition(Component component, ComponentEvent event) { @Override public void transition(Component component, ComponentEvent event) { component.upgradeInProgress.set(true); + component.upgradeEvent = event; component.componentSpec.setState(org.apache.hadoop.yarn.service.api. records.ComponentState.NEEDS_UPGRADE); component.numContainersThatNeedUpgrade.set( component.componentSpec.getNumberOfContainers()); - component.componentSpec.getContainers().forEach(container -> - container.setState(ContainerState.NEEDS_UPGRADE)); - component.upgradeEvent = event; + component.componentSpec.getContainers().forEach(container -> { + container.setState(ContainerState.NEEDS_UPGRADE); + if (event.isExpressUpgrade()) { + ComponentInstanceEvent upgradeEvent = new ComponentInstanceEvent( + ContainerId.fromString(container.getId()), + ComponentInstanceEventType.UPGRADE); + LOG.info("Upgrade container {}", container.getId()); + component.dispatcher.getEventHandler().handle(upgradeEvent); + } + }); } } @@ -634,7 +645,8 @@ private void assignContainerToCompInstance(Container container) { version); launchContext.setArtifact(compSpec.getArtifact()) .setConfiguration(compSpec.getConfiguration()) - .setLaunchCommand(compSpec.getLaunchCommand()); + .setLaunchCommand(compSpec.getLaunchCommand()) + .setRunPrivilegedContainer(compSpec.getRunPrivilegedContainer()); return launchContext; } @@ -784,28 +796,33 @@ private void setDesiredContainers(int n) { } private void updateMetrics(ContainerStatus status) { - switch (status.getExitStatus()) { - case SUCCESS: - componentMetrics.containersSucceeded.incr(); - scheduler.getServiceMetrics().containersSucceeded.incr(); - return; - case PREEMPTED: - componentMetrics.containersPreempted.incr(); - scheduler.getServiceMetrics().containersPreempted.incr(); - break; - case DISKS_FAILED: - componentMetrics.containersDiskFailure.incr(); - scheduler.getServiceMetrics().containersDiskFailure.incr(); - break; - default: - break; + //when a container preparation fails while building launch context, then + //the container status may not exist. + if (status != null) { + switch (status.getExitStatus()) { + case SUCCESS: + componentMetrics.containersSucceeded.incr(); + scheduler.getServiceMetrics().containersSucceeded.incr(); + return; + case PREEMPTED: + componentMetrics.containersPreempted.incr(); + scheduler.getServiceMetrics().containersPreempted.incr(); + break; + case DISKS_FAILED: + componentMetrics.containersDiskFailure.incr(); + scheduler.getServiceMetrics().containersDiskFailure.incr(); + break; + default: + break; + } } // containersFailed include preempted, disks_failed etc. componentMetrics.containersFailed.incr(); scheduler.getServiceMetrics().containersFailed.incr(); - if (Apps.shouldCountTowardsNodeBlacklisting(status.getExitStatus())) { + if (status != null && Apps.shouldCountTowardsNodeBlacklisting( + status.getExitStatus())) { String host = scheduler.getLiveInstances().get(status.getContainerId()) .getNodeId().getHost(); failureTracker.incNodeFailure(host); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentEvent.java index 84caa77b205..643961d505a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentEvent.java @@ -35,6 +35,7 @@ private ContainerId containerId; private org.apache.hadoop.yarn.service.api.records.Component targetSpec; private String upgradeVersion; + private boolean expressUpgrade; public ContainerId getContainerId() { return containerId; @@ -113,4 +114,13 @@ public ComponentEvent setUpgradeVersion(String upgradeVersion) { this.upgradeVersion = upgradeVersion; return this; } + + public boolean isExpressUpgrade() { + return expressUpgrade; + } + + public ComponentEvent setExpressUpgrade(boolean expressUpgrade) { + this.expressUpgrade = expressUpgrade; + return this; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentRestartPolicy.java index 23b0fb9e2c8..c5adffebcc8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentRestartPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentRestartPolicy.java @@ -42,4 +42,6 @@ boolean shouldRelaunchInstance(ComponentInstance componentInstance, boolean shouldTerminate(Component component); + boolean allowContainerRetriesForInstance(ComponentInstance componentInstance); + } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/NeverRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/NeverRestartPolicy.java index ace1f8940e7..cd44a585680 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/NeverRestartPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/NeverRestartPolicy.java @@ -79,4 +79,9 @@ public static NeverRestartPolicy getInstance() { } return true; } + + @Override public boolean allowContainerRetriesForInstance( + ComponentInstance componentInstance) { + return false; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/OnFailureRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/OnFailureRestartPolicy.java index 39fba2afd01..b939ba0428f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/OnFailureRestartPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/OnFailureRestartPolicy.java @@ -84,4 +84,9 @@ public static OnFailureRestartPolicy getInstance() { } return true; } + + @Override public boolean allowContainerRetriesForInstance( + ComponentInstance componentInstance) { + return true; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java index 529596d989e..afd8c671fa4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java @@ -31,6 +31,7 @@ import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.client.api.NMClient; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -41,7 +42,9 @@ import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.service.ServiceScheduler; import org.apache.hadoop.yarn.service.api.records.Artifact; +import org.apache.hadoop.yarn.service.api.records.ComponentState; import org.apache.hadoop.yarn.service.api.records.ContainerState; +import org.apache.hadoop.yarn.service.api.records.ServiceState; import org.apache.hadoop.yarn.service.component.Component; import org.apache.hadoop.yarn.service.component.ComponentEvent; import org.apache.hadoop.yarn.service.component.ComponentEventType; @@ -68,6 +71,9 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; import static org.apache.hadoop.registry.client.types.yarn.YarnRegistryAttributes.*; + +import static org.apache.hadoop.yarn.api.records.ContainerExitStatus + .KILLED_AFTER_APP_COMPLETION; import static org.apache.hadoop.yarn.api.records.ContainerExitStatus.KILLED_BY_APPMASTER; import static org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType.*; import static org.apache.hadoop.yarn.service.component.instance.ComponentInstanceState.*; @@ -76,6 +82,8 @@ Comparable { private static final Logger LOG = LoggerFactory.getLogger(ComponentInstance.class); + private static final String FAILED_BEFORE_LAUNCH_DIAG = + "failed before launch"; private StateMachine stateMachine; @@ -97,6 +105,7 @@ private long containerStartedTime = 0; // This container object is used for rest API query private org.apache.hadoop.yarn.service.api.records.Container containerSpec; + private String serviceVersion; private static final StateMachineFactory comp.maxContainerFailurePerComp) { + && comp.currentContainerFailure.get() + > comp.maxContainerFailurePerComp) { String exitDiag = MessageFormat.format( - "[COMPONENT {0}]: Failed {1} times, exceeded the limit - {2}. Shutting down now... " - + System.lineSeparator(), - comp.getName(), comp.currentContainerFailure.get(), comp.maxContainerFailurePerComp); + "[COMPONENT {0}]: Failed {1} times, exceeded the limit - {2}. " + + "Shutting down now... " + + System.lineSeparator(), comp.getName(), + comp.currentContainerFailure.get(), + comp.maxContainerFailurePerComp); compInstance.diagnostics.append(exitDiag); // append to global diagnostics that will be reported to RM. scheduler.getDiagnostics().append(containerDiag); scheduler.getDiagnostics().append(exitDiag); LOG.warn(exitDiag); + + compInstance.getContainerSpec().setState(ContainerState.FAILED); + comp.getComponentSpec().setState(ComponentState.FAILED); + comp.getScheduler().getApp().setState(ServiceState.FAILED); + + if (compInstance.timelineServiceEnabled) { + // record in ATS + compInstance.scheduler.getServiceTimelinePublisher() + .componentInstanceFinished(compInstance.getContainer().getId(), + failedBeforeLaunching ? + -1 : + event.getStatus().getExitStatus(), ContainerState.FAILED, + containerDiag); + + // mark other component-instances/containers as STOPPED + for (ContainerId containerId : scheduler.getLiveInstances() + .keySet()) { + if (!compInstance.container.getId().equals(containerId) + && !isFinalState(compInstance.getContainerSpec().getState())) { + compInstance.getContainerSpec().setState(ContainerState.STOPPED); + compInstance.scheduler.getServiceTimelinePublisher() + .componentInstanceFinished(containerId, + KILLED_AFTER_APP_COMPLETION, ContainerState.STOPPED, + scheduler.getDiagnostics().toString()); + } + } + + compInstance.scheduler.getServiceTimelinePublisher() + .componentFinished(comp.getComponentSpec(), ComponentState.FAILED, + scheduler.getSystemClock().getTime()); + + compInstance.scheduler.getServiceTimelinePublisher() + .serviceAttemptUnregistered(comp.getContext(), + FinalApplicationStatus.FAILED, + scheduler.getDiagnostics().toString()); + } + shouldFailService = true; } if (!failedBeforeLaunching) { // clean up registry - // If the container failed before launching, no need to cleanup registry, + // If the container failed before launching, no need to cleanup + // registry, // because it was not registered before. - // hdfs dir content will be overwritten when a new container gets started, + // hdfs dir content will be overwritten when a new container gets + // started, // so no need remove. - compInstance.scheduler.executorService - .submit(() -> compInstance.cleanupRegistry(event.getContainerId())); - - if (compInstance.timelineServiceEnabled) { - // record in ATS - compInstance.serviceTimelinePublisher - .componentInstanceFinished(event.getContainerId(), - event.getStatus().getExitStatus(), containerDiag); - } - compInstance.containerSpec.setState(ContainerState.STOPPED); + compInstance.scheduler.executorService.submit( + () -> compInstance.cleanupRegistry(event.getContainerId())); } // remove the failed ContainerId -> CompInstance mapping @@ -347,7 +435,8 @@ public void transition(ComponentInstance compInstance, // According to component restart policy, handle container restart // or finish the service (if all components finished) - handleComponentInstanceRelaunch(compInstance, event); + handleComponentInstanceRelaunch(compInstance, event, + failedBeforeLaunching, containerDiag); if (shouldFailService) { scheduler.getTerminationHandler().terminate(-1); @@ -355,11 +444,21 @@ public void transition(ComponentInstance compInstance, } } + public static boolean isFinalState(ContainerState state) { + return ContainerState.FAILED.equals(state) || ContainerState.STOPPED + .equals(state) || ContainerState.SUCCEEDED.equals(state); + } + private static class ContainerUpgradeTransition extends BaseTransition { @Override public void transition(ComponentInstance compInstance, ComponentInstanceEvent event) { + if (!compInstance.containerSpec.getState().equals( + ContainerState.NEEDS_UPGRADE)) { + //nothing to upgrade. this may happen with express upgrade. + return; + } compInstance.containerSpec.setState(ContainerState.UPGRADING); compInstance.component.decContainersReady(false); ComponentEvent upgradeEvent = compInstance.component.getUpgradeEvent(); @@ -382,6 +481,30 @@ public ComponentInstanceState getState() { } } + /** + * Returns the version of service at which the instance is at. + */ + public String getServiceVersion() { + this.readLock.lock(); + try { + return this.serviceVersion; + } finally { + this.readLock.unlock(); + } + } + + /** + * Returns the state of the container in the container spec. + */ + public ContainerState getContainerState() { + this.readLock.lock(); + try { + return this.containerSpec.getState(); + } finally { + this.readLock.unlock(); + } + } + @Override public void handle(ComponentInstanceEvent event) { try { @@ -465,7 +588,7 @@ public NodeId getNodeId() { return this.container.getNodeId(); } - public org.apache.hadoop.yarn.service.api.records.Component getCompSpec() { + private org.apache.hadoop.yarn.service.api.records.Component getCompSpec() { return component.getComponentSpec(); } @@ -537,7 +660,7 @@ public void destroy() { if (timelineServiceEnabled) { serviceTimelinePublisher.componentInstanceFinished(containerId, - KILLED_BY_APPMASTER, diagnostics.toString()); + KILLED_BY_APPMASTER, ContainerState.STOPPED, diagnostics.toString()); } cancelContainerStatusRetriever(); scheduler.executorService.submit(() -> @@ -667,8 +790,16 @@ public int compareTo(ComponentInstance to) { return result; } - @VisibleForTesting public org.apache.hadoop.yarn.service.api.records + /** + * Returns container spec. + */ + public org.apache.hadoop.yarn.service.api.records .Container getContainerSpec() { - return containerSpec; + readLock.lock(); + try { + return containerSpec; + } finally { + readLock.unlock(); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstanceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstanceEvent.java index 707b0349655..889da6e15ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstanceEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstanceEvent.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.service.component.instance; +import com.google.common.base.Preconditions; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.event.AbstractEvent; @@ -32,6 +33,7 @@ public ComponentInstanceEvent(ContainerId containerId, ComponentInstanceEventType componentInstanceEventType) { super(componentInstanceEventType); + Preconditions.checkNotNull(containerId); this.id = containerId; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/RestApiConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/RestApiConstants.java index 2d7db32b23e..45ad7e4adbb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/RestApiConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/RestApiConstants.java @@ -37,11 +37,14 @@ String COMPONENTS = "components"; String COMPONENTS_PATH = SERVICE_PATH + "/" + COMPONENTS; - // Query param String SERVICE_NAME = "service_name"; String COMPONENT_NAME = "component_name"; String COMP_INSTANCE_NAME = "component_instance_name"; + String PARAM_COMP_NAME = "componentName"; + String PARAM_VERSION = "version"; + String PARAM_CONTAINER_STATE = "containerState"; + String MEDIA_TYPE_JSON_UTF8 = MediaType.APPLICATION_JSON + ";charset=utf-8"; Long DEFAULT_UNLIMITED_LIFETIME = -1l; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java index da5a8d6dbbd..c34cc77dcbb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.service.containerlaunch; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; @@ -254,4 +255,8 @@ public void setRunPrivilegedContainer(boolean runPrivilegedContainer) { this.runPrivilegedContainer = runPrivilegedContainer; } + @VisibleForTesting + public String getDockerImage() { + return dockerImage; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/ContainerLaunchService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/ContainerLaunchService.java index 084c721ebf6..3c856ec1907 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/ContainerLaunchService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/ContainerLaunchService.java @@ -22,8 +22,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.service.ServiceContext; import org.apache.hadoop.yarn.service.api.records.Artifact; +import org.apache.hadoop.yarn.service.component.ComponentEvent; +import org.apache.hadoop.yarn.service.component.ComponentEventType; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; import org.apache.hadoop.yarn.service.provider.ProviderService; import org.apache.hadoop.yarn.service.provider.ProviderFactory; @@ -116,9 +119,12 @@ public void reInitCompInstance(Service service, launcher.completeContainerLaunch(), true); } } catch (Exception e) { - LOG.error(instance.getCompInstanceId() - + ": Failed to launch container. ", e); - + LOG.error("{}: Failed to launch container.", + instance.getCompInstanceId(), e); + ComponentEvent event = new ComponentEvent(instance.getCompName(), + ComponentEventType.CONTAINER_COMPLETED) + .setInstance(instance).setContainerId(container.getId()); + context.scheduler.getDispatcher().getEventHandler().handle(event); } } } @@ -133,6 +139,7 @@ public void reInitCompInstance(Service service, private org.apache.hadoop.yarn.service.api.records.Configuration configuration; private String launchCommand; + private boolean runPrivilegedContainer; public ComponentLaunchContext(String name, String serviceVersion) { this.name = Preconditions.checkNotNull(name); @@ -160,6 +167,10 @@ public String getLaunchCommand() { return launchCommand; } + public boolean isRunPrivilegedContainer() { + return runPrivilegedContainer; + } + public ComponentLaunchContext setArtifact(Artifact artifact) { this.artifact = artifact; return this; @@ -175,5 +186,11 @@ public ComponentLaunchContext setLaunchCommand(String launchCommand) { this.launchCommand = launchCommand; return this; } + + public ComponentLaunchContext setRunPrivilegedContainer( + boolean runPrivilegedContainer) { + this.runPrivilegedContainer = runPrivilegedContainer; + return this; + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/exceptions/RestApiErrorMessages.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/exceptions/RestApiErrorMessages.java index 5b3c72cae4a..8f831ee5a70 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/exceptions/RestApiErrorMessages.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/exceptions/RestApiErrorMessages.java @@ -50,6 +50,10 @@ "Artifact id (like docker image name) is either empty or not provided"; String ERROR_ARTIFACT_ID_FOR_COMP_INVALID = ERROR_ARTIFACT_ID_INVALID + ERROR_SUFFIX_FOR_COMPONENT; + String ERROR_ARTIFACT_PATH_FOR_COMP_INVALID = "For component %s with %s " + + "artifact, path does not exist: %s"; + String ERROR_CONFIGFILE_DEST_FILE_FOR_COMP_NOT_ABSOLUTE = "For component %s " + + "with %s artifact, dest_file must be a relative path: %s"; String ERROR_RESOURCE_INVALID = "Resource is not provided"; String ERROR_RESOURCE_FOR_COMP_INVALID = @@ -89,7 +93,7 @@ String ERROR_ABSENT_NUM_OF_INSTANCE = "Num of instances should appear either globally or per component"; String ERROR_ABSENT_LAUNCH_COMMAND = - "Launch_command is required when type is not DOCKER"; + "launch_command is required when type is not DOCKER"; String ERROR_QUICKLINKS_FOR_COMP_INVALID = "Quicklinks specified at" + " component level, needs corresponding values set at service level"; @@ -121,4 +125,8 @@ String ERROR_COMP_DOES_NOT_NEED_UPGRADE = "The component (%s) does not need" + " an upgrade."; + String ERROR_KERBEROS_PRINCIPAL_NAME_FORMAT = "Kerberos principal (%s) does " + + " not contain a hostname."; + String ERROR_KERBEROS_PRINCIPAL_MISSING = "Kerberos principal or keytab is" + + " missing."; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/impl/pb/client/ClientAMProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/impl/pb/client/ClientAMProtocolPBClientImpl.java index e82181eb703..49ecd2e425f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/impl/pb/client/ClientAMProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/impl/pb/client/ClientAMProtocolPBClientImpl.java @@ -34,6 +34,8 @@ import org.apache.hadoop.yarn.proto.ClientAMProtocol.CompInstancesUpgradeRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.FlexComponentsRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.FlexComponentsResponseProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesRequestProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusResponseProto; import org.apache.hadoop.yarn.service.impl.pb.service.ClientAMProtocolPB; @@ -128,4 +130,15 @@ public CompInstancesUpgradeResponseProto upgrade( } return null; } + + @Override + public GetCompInstancesResponseProto getCompInstances( + GetCompInstancesRequestProto request) throws IOException, YarnException { + try { + return proxy.getCompInstances(null, request); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + } + return null; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/impl/pb/service/ClientAMProtocolPBServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/impl/pb/service/ClientAMProtocolPBServiceImpl.java index 50a678b393d..eab3f9fb959 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/impl/pb/service/ClientAMProtocolPBServiceImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/impl/pb/service/ClientAMProtocolPBServiceImpl.java @@ -25,6 +25,8 @@ import org.apache.hadoop.yarn.proto.ClientAMProtocol.CompInstancesUpgradeResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.FlexComponentsRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.FlexComponentsResponseProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesRequestProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetStatusResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.RestartServiceRequestProto; @@ -103,4 +105,15 @@ public CompInstancesUpgradeResponseProto upgrade(RpcController controller, throw new ServiceException(e); } } + + @Override + public GetCompInstancesResponseProto getCompInstances( + RpcController controller, GetCompInstancesRequestProto request) + throws ServiceException { + try { + return real.getCompInstances(request); + } catch (IOException | YarnException e) { + throw new ServiceException(e); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractClientProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractClientProvider.java index 672c4352433..ae796196d25 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractClientProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractClientProvider.java @@ -68,18 +68,18 @@ public AbstractClientProvider() { * Validate the artifact. * @param artifact */ - public abstract void validateArtifact(Artifact artifact, FileSystem - fileSystem) throws IOException; + public abstract void validateArtifact(Artifact artifact, String compName, + FileSystem fileSystem) throws IOException; - protected abstract void validateConfigFile(ConfigFile configFile, FileSystem - fileSystem) throws IOException; + protected abstract void validateConfigFile(ConfigFile configFile, + String compName, FileSystem fileSystem) throws IOException; /** * Validate the config files. * @param configFiles config file list * @param fs file system */ - public void validateConfigFiles(List configFiles, + public void validateConfigFiles(List configFiles, String compName, FileSystem fs) throws IOException { Set destFileSet = new HashSet<>(); @@ -128,7 +128,7 @@ public void validateConfigFiles(List configFiles, } if (StringUtils.isEmpty(file.getDestFile())) { - throw new IllegalArgumentException("Dest_file is empty."); + throw new IllegalArgumentException("dest_file is empty."); } if (destFileSet.contains(file.getDestFile())) { @@ -144,7 +144,7 @@ public void validateConfigFiles(List configFiles, } // provider-specific validation - validateConfigFile(file, fs); + validateConfigFile(file, compName, fs); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java index 9c71e66823d..4394e62922a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java @@ -22,6 +22,7 @@ import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.service.api.records.Service; +import org.apache.hadoop.yarn.service.component.ComponentRestartPolicy; import org.apache.hadoop.yarn.service.conf.YarnServiceConf; import org.apache.hadoop.yarn.service.conf.YarnServiceConstants; import org.apache.hadoop.yarn.service.containerlaunch.ContainerLaunchService; @@ -55,7 +56,8 @@ public abstract void processArtifact(AbstractLauncher launcher, ComponentInstance compInstance, SliderFileSystem fileSystem, - Service service) + Service service, + ContainerLaunchService.ComponentLaunchContext compLaunchCtx) throws IOException; public Map buildContainerTokens(ComponentInstance instance, @@ -116,18 +118,22 @@ public void buildContainerLaunchCommand(AbstractLauncher launcher, public void buildContainerRetry(AbstractLauncher launcher, Configuration yarnConf, - ContainerLaunchService.ComponentLaunchContext compLaunchContext) { + ContainerLaunchService.ComponentLaunchContext compLaunchContext, + ComponentInstance instance) { // By default retry forever every 30 seconds - launcher.setRetryContext( - YarnServiceConf.getInt(CONTAINER_RETRY_MAX, - DEFAULT_CONTAINER_RETRY_MAX, - compLaunchContext.getConfiguration(), yarnConf), - YarnServiceConf.getInt(CONTAINER_RETRY_INTERVAL, - DEFAULT_CONTAINER_RETRY_INTERVAL, - compLaunchContext.getConfiguration(), yarnConf), - YarnServiceConf.getLong(CONTAINER_FAILURES_VALIDITY_INTERVAL, - DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL, - compLaunchContext.getConfiguration(), yarnConf)); + + ComponentRestartPolicy restartPolicy = instance.getComponent() + .getRestartPolicyHandler(); + if (restartPolicy.allowContainerRetriesForInstance(instance)) { + launcher.setRetryContext(YarnServiceConf + .getInt(CONTAINER_RETRY_MAX, DEFAULT_CONTAINER_RETRY_MAX, + compLaunchContext.getConfiguration(), yarnConf), YarnServiceConf + .getInt(CONTAINER_RETRY_INTERVAL, DEFAULT_CONTAINER_RETRY_INTERVAL, + compLaunchContext.getConfiguration(), yarnConf), YarnServiceConf + .getLong(CONTAINER_FAILURES_VALIDITY_INTERVAL, + DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL, + compLaunchContext.getConfiguration(), yarnConf)); + } } public void buildContainerLaunchContext(AbstractLauncher launcher, @@ -135,7 +141,7 @@ public void buildContainerLaunchContext(AbstractLauncher launcher, SliderFileSystem fileSystem, Configuration yarnConf, Container container, ContainerLaunchService.ComponentLaunchContext compLaunchContext) throws IOException, SliderException { - processArtifact(launcher, instance, fileSystem, service); + processArtifact(launcher, instance, fileSystem, service, compLaunchContext); ServiceContext context = instance.getComponent().getScheduler().getContext(); @@ -161,6 +167,6 @@ public void buildContainerLaunchContext(AbstractLauncher launcher, yarnConf, container, compLaunchContext, tokensForSubstitution); // Setup container retry settings - buildContainerRetry(launcher, yarnConf, compLaunchContext); + buildContainerRetry(launcher, yarnConf, compLaunchContext, instance); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/defaultImpl/DefaultClientProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/defaultImpl/DefaultClientProvider.java index 0920a9ce114..999a8dc9cb2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/defaultImpl/DefaultClientProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/defaultImpl/DefaultClientProvider.java @@ -17,13 +17,16 @@ */ package org.apache.hadoop.yarn.service.provider.defaultImpl; +import java.io.IOException; +import java.nio.file.Paths; + import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.yarn.service.provider.AbstractClientProvider; import org.apache.hadoop.yarn.service.api.records.Artifact; import org.apache.hadoop.yarn.service.api.records.ConfigFile; +import org.apache.hadoop.yarn.service.exceptions.RestApiErrorMessages; +import org.apache.hadoop.yarn.service.provider.AbstractClientProvider; -import java.io.IOException; -import java.nio.file.Paths; +import com.google.common.annotations.VisibleForTesting; public class DefaultClientProvider extends AbstractClientProvider { @@ -31,16 +34,19 @@ public DefaultClientProvider() { } @Override - public void validateArtifact(Artifact artifact, FileSystem fileSystem) { + public void validateArtifact(Artifact artifact, String compName, + FileSystem fileSystem) { } @Override - protected void validateConfigFile(ConfigFile configFile, FileSystem - fileSystem) throws IOException { + @VisibleForTesting + public void validateConfigFile(ConfigFile configFile, String compName, + FileSystem fileSystem) throws IOException { // validate dest_file is not absolute if (Paths.get(configFile.getDestFile()).isAbsolute()) { - throw new IllegalArgumentException( - "Dest_file must not be absolute path: " + configFile.getDestFile()); + throw new IllegalArgumentException(String.format( + RestApiErrorMessages.ERROR_CONFIGFILE_DEST_FILE_FOR_COMP_NOT_ABSOLUTE, + compName, "no", configFile.getDestFile())); } } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/defaultImpl/DefaultProviderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/defaultImpl/DefaultProviderService.java index a3a0c1f96ee..790fe20c5c9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/defaultImpl/DefaultProviderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/defaultImpl/DefaultProviderService.java @@ -19,6 +19,7 @@ import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; +import org.apache.hadoop.yarn.service.containerlaunch.ContainerLaunchService; import org.apache.hadoop.yarn.service.provider.AbstractProviderService; import org.apache.hadoop.yarn.service.utils.SliderFileSystem; import org.apache.hadoop.yarn.service.containerlaunch.AbstractLauncher; @@ -30,7 +31,8 @@ @Override public void processArtifact(AbstractLauncher launcher, ComponentInstance compInstance, SliderFileSystem fileSystem, - Service service) + Service service, + ContainerLaunchService.ComponentLaunchContext compLaunchCtx) throws IOException { } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerClientProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerClientProvider.java index f91742edb36..901d779b3b1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerClientProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerClientProvider.java @@ -35,19 +35,20 @@ public DockerClientProvider() { } @Override - public void validateArtifact(Artifact artifact, FileSystem fileSystem) { + public void validateArtifact(Artifact artifact, String compName, + FileSystem fileSystem) { if (artifact == null) { - throw new IllegalArgumentException( - RestApiErrorMessages.ERROR_ARTIFACT_INVALID); + throw new IllegalArgumentException(String.format( + RestApiErrorMessages.ERROR_ARTIFACT_FOR_COMP_INVALID, compName)); } if (StringUtils.isEmpty(artifact.getId())) { - throw new IllegalArgumentException( - RestApiErrorMessages.ERROR_ARTIFACT_ID_INVALID); + throw new IllegalArgumentException(String.format( + RestApiErrorMessages.ERROR_ARTIFACT_ID_FOR_COMP_INVALID, compName)); } } @Override - protected void validateConfigFile(ConfigFile configFile, FileSystem - fileSystem) throws IOException { + protected void validateConfigFile(ConfigFile configFile, String compName, + FileSystem fileSystem) throws IOException { } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java index 7844621b4ae..6027a66abca 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java @@ -23,7 +23,6 @@ import org.apache.hadoop.yarn.service.provider.AbstractProviderService; import org.apache.hadoop.yarn.service.provider.ProviderUtils; import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.service.api.records.Component; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.utils.SliderFileSystem; import org.apache.hadoop.yarn.service.containerlaunch.AbstractLauncher; @@ -38,34 +37,38 @@ public class DockerProviderService extends AbstractProviderService implements DockerKeys { + @Override public void processArtifact(AbstractLauncher launcher, ComponentInstance compInstance, SliderFileSystem fileSystem, - Service service) throws IOException{ + Service service, ContainerLaunchService.ComponentLaunchContext + compLaunchCtx) throws IOException{ launcher.setYarnDockerMode(true); - launcher.setDockerImage(compInstance.getCompSpec().getArtifact().getId()); - launcher.setDockerNetwork(compInstance.getCompSpec().getConfiguration() + launcher.setDockerImage(compLaunchCtx.getArtifact().getId()); + launcher.setDockerNetwork(compLaunchCtx.getConfiguration() .getProperty(DOCKER_NETWORK)); launcher.setDockerHostname(compInstance.getHostname()); launcher.setRunPrivilegedContainer( - compInstance.getCompSpec().getRunPrivilegedContainer()); + compLaunchCtx.isRunPrivilegedContainer()); } /** * Check if system is default to disable docker override or * user requested a Docker container with ENTRY_POINT support. * - * @param component - YARN Service component + * @param compLaunchContext - launch context for the component. * @return true if Docker launch command override is disabled */ - private boolean checkUseEntryPoint(Component component) { + private boolean checkUseEntryPoint( + ContainerLaunchService.ComponentLaunchContext compLaunchContext) { boolean overrideDisable = false; String overrideDisableKey = Environment. YARN_CONTAINER_RUNTIME_DOCKER_RUN_OVERRIDE_DISABLE. name(); - String overrideDisableValue = (component - .getConfiguration().getEnv(overrideDisableKey) != null) ? - component.getConfiguration().getEnv(overrideDisableKey) : - System.getenv(overrideDisableKey); + String overrideDisableValue = ( + compLaunchContext.getConfiguration().getEnv(overrideDisableKey) + != null) ? + compLaunchContext.getConfiguration().getEnv( + overrideDisableKey) : System.getenv(overrideDisableKey); overrideDisable = Boolean.parseBoolean(overrideDisableValue); return overrideDisable; } @@ -77,10 +80,9 @@ public void buildContainerLaunchCommand(AbstractLauncher launcher, ContainerLaunchService.ComponentLaunchContext compLaunchContext, Map tokensForSubstitution) throws IOException, SliderException { - Component component = instance.getComponent().getComponentSpec(); - boolean useEntryPoint = checkUseEntryPoint(component); + boolean useEntryPoint = checkUseEntryPoint(compLaunchContext); if (useEntryPoint) { - String launchCommand = component.getLaunchCommand(); + String launchCommand = compLaunchContext.getLaunchCommand(); if (!StringUtils.isEmpty(launchCommand)) { launcher.addCommand(launchCommand); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/tarball/TarballClientProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/tarball/TarballClientProvider.java index 3b890fd85a6..b801e0caa31 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/tarball/TarballClientProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/tarball/TarballClientProvider.java @@ -36,30 +36,33 @@ public TarballClientProvider() { } @Override - public void validateArtifact(Artifact artifact, FileSystem fs) - throws IOException { + public void validateArtifact(Artifact artifact, String compName, + FileSystem fs) throws IOException { if (artifact == null) { - throw new IllegalArgumentException( - RestApiErrorMessages.ERROR_ARTIFACT_INVALID); + throw new IllegalArgumentException(String.format( + RestApiErrorMessages.ERROR_ARTIFACT_FOR_COMP_INVALID, compName)); } if (StringUtils.isEmpty(artifact.getId())) { - throw new IllegalArgumentException( - RestApiErrorMessages.ERROR_ARTIFACT_ID_INVALID); + throw new IllegalArgumentException(String.format( + RestApiErrorMessages.ERROR_ARTIFACT_ID_FOR_COMP_INVALID, compName)); } Path p = new Path(artifact.getId()); if (!fs.exists(p)) { - throw new IllegalArgumentException( "Artifact tarball does not exist " - + artifact.getId()); + throw new IllegalArgumentException(String.format( + RestApiErrorMessages.ERROR_ARTIFACT_PATH_FOR_COMP_INVALID, compName, + Artifact.TypeEnum.TARBALL.name(), artifact.getId())); } } @Override - protected void validateConfigFile(ConfigFile configFile, FileSystem - fileSystem) throws IOException { + protected void validateConfigFile(ConfigFile configFile, String compName, + FileSystem fileSystem) throws IOException { // validate dest_file is not absolute if (Paths.get(configFile.getDestFile()).isAbsolute()) { - throw new IllegalArgumentException( - "Dest_file must not be absolute path: " + configFile.getDestFile()); + throw new IllegalArgumentException(String.format( + RestApiErrorMessages.ERROR_CONFIGFILE_DEST_FILE_FOR_COMP_NOT_ABSOLUTE, + compName, Artifact.TypeEnum.TARBALL.name(), + configFile.getDestFile())); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/tarball/TarballProviderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/tarball/TarballProviderService.java index 7f29d1f5c1b..87406f79228 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/tarball/TarballProviderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/tarball/TarballProviderService.java @@ -22,6 +22,7 @@ import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; +import org.apache.hadoop.yarn.service.containerlaunch.ContainerLaunchService; import org.apache.hadoop.yarn.service.provider.AbstractProviderService; import org.apache.hadoop.yarn.service.utils.SliderFileSystem; import org.apache.hadoop.yarn.service.containerlaunch.AbstractLauncher; @@ -33,9 +34,9 @@ @Override public void processArtifact(AbstractLauncher launcher, ComponentInstance instance, SliderFileSystem fileSystem, - Service service) - throws IOException { - Path artifact = new Path(instance.getCompSpec().getArtifact().getId()); + Service service, ContainerLaunchService.ComponentLaunchContext + compLaunchCtx) throws IOException { + Path artifact = new Path(compLaunchCtx.getArtifact().getId()); if (!fileSystem.isFile(artifact)) { throw new IOException( "Package doesn't exist as a resource: " + artifact); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceTimelineEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceTimelineEvent.java index 6c3428a748d..832dad729ca 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceTimelineEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceTimelineEvent.java @@ -32,5 +32,8 @@ COMPONENT_INSTANCE_IP_HOST_UPDATE, - COMPONENT_INSTANCE_BECOME_READY + COMPONENT_INSTANCE_BECOME_READY, + + COMPONENT_FINISHED + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceTimelinePublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceTimelinePublisher.java index 6c73ebb8d67..79f37c00d27 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceTimelinePublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceTimelinePublisher.java @@ -28,6 +28,8 @@ import org.apache.hadoop.yarn.client.api.TimelineV2Client; import org.apache.hadoop.yarn.service.ServiceContext; import org.apache.hadoop.yarn.service.api.records.*; +import org.apache.hadoop.yarn.service.api.records.Component; +import org.apache.hadoop.yarn.service.api.records.ComponentState; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; import org.apache.hadoop.yarn.util.timeline.TimelineUtils; import org.slf4j.Logger; @@ -42,7 +44,6 @@ import java.util.Set; import static org.apache.hadoop.yarn.service.api.records.ContainerState.READY; -import static org.apache.hadoop.yarn.service.api.records.ContainerState.STOPPED; import static org.apache.hadoop.yarn.service.timelineservice.ServiceTimelineMetricsConstants.DIAGNOSTICS_INFO; /** @@ -130,12 +131,11 @@ public void serviceAttemptUpdated(Service service) { } public void serviceAttemptUnregistered(ServiceContext context, - String diagnostics) { + FinalApplicationStatus status, String diagnostics) { TimelineEntity entity = createServiceAttemptEntity( context.attemptId.getApplicationId().toString()); Map entityInfos = new HashMap(); - entityInfos.put(ServiceTimelineMetricsConstants.STATE, - FinalApplicationStatus.ENDED); + entityInfos.put(ServiceTimelineMetricsConstants.STATE, status); entityInfos.put(DIAGNOSTICS_INFO, diagnostics); entity.addInfo(entityInfos); @@ -180,7 +180,7 @@ public void componentInstanceStarted(Container container, } public void componentInstanceFinished(ContainerId containerId, - int exitCode, String diagnostics) { + int exitCode, ContainerState state, String diagnostics) { TimelineEntity entity = createComponentInstanceEntity( containerId.toString()); @@ -189,7 +189,7 @@ public void componentInstanceFinished(ContainerId containerId, entityInfos.put(ServiceTimelineMetricsConstants.EXIT_STATUS_CODE, exitCode); entityInfos.put(DIAGNOSTICS_INFO, diagnostics); - entityInfos.put(ServiceTimelineMetricsConstants.STATE, STOPPED); + entityInfos.put(ServiceTimelineMetricsConstants.STATE, state); entity.addInfo(entityInfos); // add an event @@ -375,4 +375,25 @@ private void putEntity(TimelineEntity entity) { log.error("Error when publishing entity " + entity, e); } } + + public void componentFinished( + Component comp, + ComponentState state, long finishTime) { + createComponentEntity(comp.getName()); + TimelineEntity entity = createComponentEntity(comp.getName()); + + // create info keys + Map entityInfos = new HashMap(); + entityInfos.put(ServiceTimelineMetricsConstants.STATE, state); + entity.addInfo(entityInfos); + + // add an event + TimelineEvent startEvent = new TimelineEvent(); + startEvent + .setId(ServiceTimelineEvent.COMPONENT_FINISHED.toString()); + startEvent.setTimestamp(finishTime); + entity.addEvent(startEvent); + + putEntity(entity); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/FilterUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/FilterUtils.java new file mode 100644 index 00000000000..10f9fea6e14 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/FilterUtils.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.service.utils; + +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.proto.ClientAMProtocol; +import org.apache.hadoop.yarn.service.ServiceContext; +import org.apache.hadoop.yarn.service.api.records.Container; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class FilterUtils { + + /** + * Returns containers filtered by requested fields. + * + * @param context service context + * @param filterReq filter request + */ + public static List filterInstances(ServiceContext context, + ClientAMProtocol.GetCompInstancesRequestProto filterReq) { + List results = new ArrayList<>(); + Map instances = + context.scheduler.getLiveInstances(); + + instances.forEach(((containerId, instance) -> { + boolean include = true; + if (filterReq.getComponentNamesList() != null && + !filterReq.getComponentNamesList().isEmpty()) { + // filter by component name + if (!filterReq.getComponentNamesList().contains( + instance.getComponent().getName())) { + include = false; + } + } + + if (filterReq.getVersion() != null && !filterReq.getVersion().isEmpty()) { + // filter by version + String instanceServiceVersion = instance.getServiceVersion(); + if (instanceServiceVersion == null || !instanceServiceVersion.equals( + filterReq.getVersion())) { + include = false; + } + } + + if (filterReq.getContainerStatesList() != null && + !filterReq.getContainerStatesList().isEmpty()) { + // filter by state + if (!filterReq.getContainerStatesList().contains( + instance.getContainerState().toString())) { + include = false; + } + } + + if (include) { + results.add(instance.getContainerSpec()); + } + })); + + return results; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceApiUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceApiUtil.java index 705e04065c0..b588e88ae7f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceApiUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceApiUtil.java @@ -72,6 +72,15 @@ public static JsonSerDeser jsonSerDeser = new JsonSerDeser<>(Service.class, PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); + + public static final JsonSerDeser CONTAINER_JSON_SERDE = + new JsonSerDeser<>(Container[].class, + PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); + + public static final JsonSerDeser COMP_JSON_SERDE = + new JsonSerDeser<>(Component[].class, + PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); + private static final PatternValidator namePattern = new PatternValidator("[a-z][a-z0-9-]*"); @@ -234,6 +243,16 @@ public static void validateAndResolveService(Service service, public static void validateKerberosPrincipal( KerberosPrincipal kerberosPrincipal) throws IOException { + try { + if (!kerberosPrincipal.getPrincipalName().contains("/")) { + throw new IllegalArgumentException(String.format( + RestApiErrorMessages.ERROR_KERBEROS_PRINCIPAL_NAME_FORMAT, + kerberosPrincipal.getPrincipalName())); + } + } catch (NullPointerException e) { + throw new IllegalArgumentException( + RestApiErrorMessages.ERROR_KERBEROS_PRINCIPAL_MISSING); + } if (!StringUtils.isEmpty(kerberosPrincipal.getKeytab())) { try { // validate URI format @@ -273,7 +292,7 @@ private static void validateComponent(Component comp, FileSystem fs, AbstractClientProvider compClientProvider = ProviderFactory .getClientProvider(comp.getArtifact()); - compClientProvider.validateArtifact(comp.getArtifact(), fs); + compClientProvider.validateArtifact(comp.getArtifact(), comp.getName(), fs); if (comp.getLaunchCommand() == null && (comp.getArtifact() == null || comp .getArtifact().getType() != Artifact.TypeEnum.DOCKER)) { @@ -290,7 +309,7 @@ private static void validateComponent(Component comp, FileSystem fs, + ": " + comp.getNumberOfContainers(), comp.getName())); } compClientProvider.validateConfigFiles(comp.getConfiguration() - .getFiles(), fs); + .getFiles(), comp.getName(), fs); MonitorUtils.getProbe(comp.getReadinessCheck()); } @@ -619,6 +638,32 @@ public static void validateInstancesUpgrade(List return containerNeedUpgrade; } + /** + * Validates the components that are requested are stable for upgrade. + * It returns the instances of the components which are in ready state. + */ + public static List validateAndResolveCompsStable( + Service liveService, Collection compNames) throws YarnException { + Preconditions.checkNotNull(compNames); + HashSet requestedComps = Sets.newHashSet(compNames); + List containerNeedUpgrade = new ArrayList<>(); + for (Component liveComp : liveService.getComponents()) { + if (requestedComps.contains(liveComp.getName())) { + if (!liveComp.getState().equals(ComponentState.STABLE)) { + // Nothing to upgrade + throw new YarnException(String.format( + ERROR_COMP_DOES_NOT_NEED_UPGRADE, liveComp.getName())); + } + liveComp.getContainers().forEach(liveContainer -> { + if (liveContainer.getState().equals(ContainerState.READY)) { + containerNeedUpgrade.add(liveContainer); + } + }); + } + } + return containerNeedUpgrade; + } + private static String parseComponentName(String componentInstanceName) throws YarnException { int idx = componentInstanceName.lastIndexOf('-'); @@ -632,4 +677,22 @@ private static String parseComponentName(String componentInstanceName) public static String $(String s) { return "${" + s +"}"; } + + public static List resolveCompsDependency(Service service) { + List components = new ArrayList(); + for (Component component : service.getComponents()) { + int depSize = component.getDependencies().size(); + if (!components.contains(component.getName())) { + components.add(component.getName()); + } + if (depSize != 0) { + for (String depComp : component.getDependencies()) { + if (!components.contains(depComp)) { + components.add(0, depComp); + } + } + } + } + return components; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/proto/ClientAMProtocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/proto/ClientAMProtocol.proto index 91721b0d900..169f765b8a8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/proto/ClientAMProtocol.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/proto/ClientAMProtocol.proto @@ -32,6 +32,8 @@ service ClientAMProtocolService { returns (RestartServiceResponseProto); rpc upgrade(CompInstancesUpgradeRequestProto) returns (CompInstancesUpgradeResponseProto); + rpc getCompInstances(GetCompInstancesRequestProto) returns + (GetCompInstancesResponseProto); } message FlexComponentsRequestProto { @@ -64,6 +66,7 @@ message StopResponseProto { message UpgradeServiceRequestProto { optional string version = 1; optional bool autoFinalize = 2; + optional bool expressUpgrade = 3; } message UpgradeServiceResponseProto { @@ -81,4 +84,14 @@ message CompInstancesUpgradeRequestProto { } message CompInstancesUpgradeResponseProto { +} + +message GetCompInstancesRequestProto { + repeated string componentNames = 1; + optional string version = 2; + repeated string containerStates = 3; +} + +message GetCompInstancesResponseProto { + optional string compInstances = 1; } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockRunningServiceContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockRunningServiceContext.java new file mode 100644 index 00000000000..321b2cda3e9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockRunningServiceContext.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.service; + +import org.apache.hadoop.registry.client.api.RegistryOperations; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.client.api.NMClient; +import org.apache.hadoop.yarn.client.api.async.NMClientAsync; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.service.api.records.Service; +import org.apache.hadoop.yarn.service.component.Component; +import org.apache.hadoop.yarn.service.component.ComponentEvent; +import org.apache.hadoop.yarn.service.component.ComponentEventType; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEvent; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType; +import org.apache.hadoop.yarn.service.containerlaunch.ContainerLaunchService; +import org.apache.hadoop.yarn.service.registry.YarnRegistryViewForProviders; +import org.apache.hadoop.yarn.service.utils.ServiceUtils; +import org.mockito.stubbing.Answer; + +import java.io.IOException; +import java.util.Map; + +import static org.mockito.Matchers.anyObject; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Mocked service context for a running service. + */ +public class MockRunningServiceContext extends ServiceContext { + + public MockRunningServiceContext(ServiceTestUtils.ServiceFSWatcher fsWatcher, + Service serviceDef) throws Exception { + super(); + this.service = serviceDef; + this.fs = fsWatcher.getFs(); + + ContainerLaunchService mockLaunchService = mock( + ContainerLaunchService.class); + + this.scheduler = new ServiceScheduler(this) { + @Override + protected YarnRegistryViewForProviders + createYarnRegistryOperations( + ServiceContext context, RegistryOperations registryClient) { + return mock(YarnRegistryViewForProviders.class); + } + + @Override + public NMClientAsync createNMClient() { + NMClientAsync nmClientAsync = super.createNMClient(); + NMClient nmClient = mock(NMClient.class); + try { + when(nmClient.getContainerStatus(anyObject(), anyObject())) + .thenAnswer( + (Answer) invocation -> ContainerStatus + .newInstance((ContainerId) invocation.getArguments()[0], + org.apache.hadoop.yarn.api.records.ContainerState + .RUNNING, + "", 0)); + } catch (YarnException | IOException e) { + throw new RuntimeException(e); + } + nmClientAsync.setClient(nmClient); + return nmClientAsync; + } + + @Override + public ContainerLaunchService getContainerLaunchService() { + return mockLaunchService; + } + + @Override public ServiceUtils.ProcessTerminationHandler + getTerminationHandler() { + return new + ServiceUtils.ProcessTerminationHandler() { + public void terminate(int exitCode) { + } + }; + } + }; + + + this.scheduler.init(fsWatcher.getConf()); + + ServiceTestUtils.createServiceManager(this); + + doNothing().when(mockLaunchService). + reInitCompInstance(anyObject(), anyObject(), anyObject(), anyObject()); + stabilizeComponents(this); + } + + private void stabilizeComponents(ServiceContext context) { + + ApplicationId appId = ApplicationId.fromString(context.service.getId()); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); + context.attemptId = attemptId; + Map + componentState = context.scheduler.getAllComponents(); + + int counter = 0; + for (org.apache.hadoop.yarn.service.api.records.Component componentSpec : + context.service.getComponents()) { + Component component = new org.apache.hadoop.yarn.service.component. + Component(componentSpec, 1L, context); + componentState.put(component.getName(), component); + component.handle( + new ComponentEvent(component.getName(), ComponentEventType.FLEX) + .setDesired( + component.getComponentSpec().getNumberOfContainers())); + + for (int i = 0; i < componentSpec.getNumberOfContainers(); i++) { + counter++; + assignNewContainer(attemptId, counter, component); + } + + component.handle(new ComponentEvent(component.getName(), + ComponentEventType.CHECK_STABLE)); + } + } + + public void assignNewContainer(ApplicationAttemptId attemptId, + long containerNum, Component component) { + + Container container = org.apache.hadoop.yarn.api.records.Container + .newInstance(ContainerId.newContainerId(attemptId, containerNum), + NODE_ID, "localhost", null, null, + null); + component.handle(new ComponentEvent(component.getName(), + ComponentEventType.CONTAINER_ALLOCATED) + .setContainer(container).setContainerId(container.getId())); + ComponentInstance instance = this.scheduler.getLiveInstances().get( + container.getId()); + ComponentInstanceEvent startEvent = new ComponentInstanceEvent( + container.getId(), ComponentInstanceEventType.START); + instance.handle(startEvent); + + ComponentInstanceEvent readyEvent = new ComponentInstanceEvent( + container.getId(), ComponentInstanceEventType.BECOME_READY); + instance.handle(readyEvent); + } + + private static final NodeId NODE_ID = NodeId.fromString("localhost:0"); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockServiceAM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockServiceAM.java index 4a75aefe058..729287cfcb8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockServiceAM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/MockServiceAM.java @@ -68,6 +68,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeoutException; @@ -99,6 +100,8 @@ private Map containerStatuses = new ConcurrentHashMap<>(); + private Set releasedContainers = ConcurrentHashMap.newKeySet(); + private Credentials amCreds; public MockServiceAM(Service service) { @@ -223,6 +226,13 @@ public RegisterApplicationMasterResponse registerApplicationMaster( return response; } + @Override + public synchronized void releaseAssignedContainer( + ContainerId containerId) { + releasedContainers.add(containerId); + super.releaseAssignedContainer(containerId); + } + @Override public void unregisterApplicationMaster( FinalApplicationStatus appStatus, String appMessage, String appTrackingUrl) { @@ -288,7 +298,7 @@ public void feedRecoveredContainer(ContainerId containerId, String compName) { } /** - * + * Creates a mock container and container ID and feeds to the component. * @param service The service for the component * @param id The id for the container * @param compName The component to which the container is fed @@ -297,6 +307,18 @@ public void feedRecoveredContainer(ContainerId containerId, String compName) { public Container feedContainerToComp(Service service, int id, String compName) { ContainerId containerId = createContainerId(id); + return feedContainerToComp(service, containerId, compName); + } + + /** + * Feeds the container to the component. + * @param service The service for the component + * @param containerId container id + * @param compName The component to which the container is fed + * @return + */ + public Container feedContainerToComp(Service service, ContainerId containerId, + String compName) { Container container = createContainer(containerId, compName); synchronized (feedContainers) { feedContainers.add(container); @@ -423,4 +445,14 @@ protected ByteBuffer recordTokensForContainers() } return ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); } + + /** + * Waits for the container to get released + * @param containerId ContainerId + */ + public void waitForContainerToRelease(ContainerId containerId) + throws TimeoutException, InterruptedException { + GenericTestUtils.waitFor(() -> releasedContainers.contains(containerId), + 1000, 9990000); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java index 3d1412dfe71..6b49ab07c1a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java @@ -63,7 +63,6 @@ import java.io.OutputStream; import java.net.URL; import java.nio.file.Paths; -import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.TimeoutException; @@ -115,18 +114,14 @@ public static Service createTerminatingJobExample(String serviceName) { exampleApp.setName(serviceName); exampleApp.setVersion("v1"); exampleApp.addComponent( - createComponent("terminating-comp1", 2, "sleep " + "1000", + createComponent("terminating-comp1", 2, "sleep 1000", Component.RestartPolicyEnum.NEVER, null)); exampleApp.addComponent( createComponent("terminating-comp2", 2, "sleep 1000", - Component.RestartPolicyEnum.ON_FAILURE, new ArrayList() {{ - add("terminating-comp1"); - }})); + Component.RestartPolicyEnum.ON_FAILURE, null)); exampleApp.addComponent( createComponent("terminating-comp3", 2, "sleep 1000", - Component.RestartPolicyEnum.ON_FAILURE, new ArrayList() {{ - add("terminating-comp2"); - }})); + Component.RestartPolicyEnum.ON_FAILURE, null)); return exampleApp; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceAM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceAM.java index e9478f0b7e2..21e93fadcdd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceAM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceAM.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.service; +import com.google.common.base.Supplier; import com.google.common.collect.ImmutableMap; import org.apache.commons.io.FileUtils; import org.apache.curator.test.TestingCluster; @@ -391,4 +392,38 @@ public void testIPChange() throws TimeoutException, .equals("newer.host"), 2000, 200000); am.stop(); } + + // Test to verify that the containers are released and the + // component instance is added to the pending queue when building the launch + // context fails. + @Test(timeout = 9990000) + public void testContainersReleasedWhenPreLaunchFails() + throws Exception { + ApplicationId applicationId = ApplicationId.newInstance( + System.currentTimeMillis(), 1); + Service exampleApp = new Service(); + exampleApp.setId(applicationId.toString()); + exampleApp.setVersion("v1"); + exampleApp.setName("testContainersReleasedWhenPreLaunchFails"); + + Component compA = createComponent("compa", 1, "pwd"); + Artifact artifact = new Artifact(); + artifact.setType(Artifact.TypeEnum.TARBALL); + compA.artifact(artifact); + exampleApp.addComponent(compA); + + MockServiceAM am = new MockServiceAM(exampleApp); + am.init(conf); + am.start(); + + ContainerId containerId = am.createContainerId(1); + + // allocate a container + am.feedContainerToComp(exampleApp, containerId, "compa"); + am.waitForContainerToRelease(containerId); + + Assert.assertEquals(1, + am.getComponent("compa").getPendingInstances().size()); + am.stop(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceManager.java index fc509f19420..a37cabe38c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceManager.java @@ -19,23 +19,26 @@ package org.apache.hadoop.yarn.service; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.registry.client.api.RegistryOperations; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.service.api.records.Artifact; import org.apache.hadoop.yarn.service.api.records.ComponentState; +import org.apache.hadoop.yarn.service.api.records.ContainerState; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.api.records.ServiceState; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEvent; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType; import org.apache.hadoop.yarn.service.exceptions.SliderException; -import org.apache.hadoop.yarn.service.registry.YarnRegistryViewForProviders; import org.apache.hadoop.yarn.service.utils.ServiceApiUtil; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import java.io.IOException; -import java.util.Map; - -import static org.mockito.Mockito.mock; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.TimeoutException; /** * Tests for {@link ServiceManager}. @@ -46,117 +49,120 @@ public ServiceTestUtils.ServiceFSWatcher rule = new ServiceTestUtils.ServiceFSWatcher(); - @Test - public void testUpgrade() throws IOException, SliderException { - ServiceManager serviceManager = createTestServiceManager("testUpgrade"); - upgrade(serviceManager, "v2", false, false); + @Test (timeout = TIMEOUT) + public void testUpgrade() throws Exception { + ServiceContext context = createServiceContext("testUpgrade"); + initUpgrade(context, "v2", false, false, false); Assert.assertEquals("service not upgraded", ServiceState.UPGRADING, - serviceManager.getServiceSpec().getState()); + context.getServiceManager().getServiceSpec().getState()); } - @Test + @Test (timeout = TIMEOUT) public void testRestartNothingToUpgrade() - throws IOException, SliderException { - ServiceManager serviceManager = createTestServiceManager( + throws Exception { + ServiceContext context = createServiceContext( "testRestartNothingToUpgrade"); - upgrade(serviceManager, "v2", false, false); - - //make components stable - serviceManager.getServiceSpec().getComponents().forEach(comp -> { - comp.setState(ComponentState.STABLE); - }); - serviceManager.handle(new ServiceEvent(ServiceEventType.START)); + initUpgrade(context, "v2", false, false, false); + ServiceManager manager = context.getServiceManager(); + //make components stable by upgrading all instances + upgradeAllInstances(context); + + context.scheduler.getDispatcher().getEventHandler().handle( + new ServiceEvent(ServiceEventType.START)); + GenericTestUtils.waitFor(()-> + context.service.getState().equals(ServiceState.STABLE), + CHECK_EVERY_MILLIS, TIMEOUT); Assert.assertEquals("service not re-started", ServiceState.STABLE, - serviceManager.getServiceSpec().getState()); + manager.getServiceSpec().getState()); } - @Test - public void testAutoFinalizeNothingToUpgrade() throws IOException, - SliderException { - ServiceManager serviceManager = createTestServiceManager( + @Test(timeout = TIMEOUT) + public void testAutoFinalizeNothingToUpgrade() throws Exception { + ServiceContext context = createServiceContext( "testAutoFinalizeNothingToUpgrade"); - upgrade(serviceManager, "v2", false, true); - - //make components stable - serviceManager.getServiceSpec().getComponents().forEach(comp -> - comp.setState(ComponentState.STABLE)); - serviceManager.handle(new ServiceEvent(ServiceEventType.CHECK_STABLE)); + initUpgrade(context, "v2", false, true, false); + ServiceManager manager = context.getServiceManager(); + //make components stable by upgrading all instances + upgradeAllInstances(context); + + GenericTestUtils.waitFor(()-> + context.service.getState().equals(ServiceState.STABLE), + CHECK_EVERY_MILLIS, TIMEOUT); Assert.assertEquals("service stable", ServiceState.STABLE, - serviceManager.getServiceSpec().getState()); + manager.getServiceSpec().getState()); } - @Test + @Test(timeout = TIMEOUT) public void testRestartWithPendingUpgrade() - throws IOException, SliderException { - ServiceManager serviceManager = createTestServiceManager("testRestart"); - upgrade(serviceManager, "v2", true, false); - serviceManager.handle(new ServiceEvent(ServiceEventType.START)); + throws Exception { + ServiceContext context = createServiceContext("testRestart"); + initUpgrade(context, "v2", true, false, false); + ServiceManager manager = context.getServiceManager(); + + context.scheduler.getDispatcher().getEventHandler().handle( + new ServiceEvent(ServiceEventType.START)); + context.scheduler.getDispatcher().stop(); Assert.assertEquals("service should still be upgrading", - ServiceState.UPGRADING, serviceManager.getServiceSpec().getState()); + ServiceState.UPGRADING, manager.getServiceSpec().getState()); } - @Test - public void testCheckState() throws IOException, SliderException { - ServiceManager serviceManager = createTestServiceManager( - "testCheckState"); - upgrade(serviceManager, "v2", true, false); + @Test(timeout = TIMEOUT) + public void testFinalize() throws Exception { + ServiceContext context = createServiceContext("testCheckState"); + initUpgrade(context, "v2", true, false, false); + ServiceManager manager = context.getServiceManager(); Assert.assertEquals("service not upgrading", ServiceState.UPGRADING, - serviceManager.getServiceSpec().getState()); + manager.getServiceSpec().getState()); - // make components stable - serviceManager.getServiceSpec().getComponents().forEach(comp -> { - comp.setState(ComponentState.STABLE); - }); - ServiceEvent checkStable = new ServiceEvent(ServiceEventType.CHECK_STABLE); - serviceManager.handle(checkStable); - Assert.assertEquals("service should still be upgrading", - ServiceState.UPGRADING, serviceManager.getServiceSpec().getState()); + //make components stable by upgrading all instances + upgradeAllInstances(context); // finalize service - ServiceEvent restart = new ServiceEvent(ServiceEventType.START); - serviceManager.handle(restart); - Assert.assertEquals("service not stable", - ServiceState.STABLE, serviceManager.getServiceSpec().getState()); + context.scheduler.getDispatcher().getEventHandler().handle( + new ServiceEvent(ServiceEventType.START)); + GenericTestUtils.waitFor(()-> + context.service.getState().equals(ServiceState.STABLE), + CHECK_EVERY_MILLIS, TIMEOUT); + Assert.assertEquals("service not re-started", ServiceState.STABLE, + manager.getServiceSpec().getState()); - validateUpgradeFinalization(serviceManager.getName(), "v2"); + validateUpgradeFinalization(manager.getName(), "v2"); } - @Test - public void testCheckStateAutoFinalize() throws IOException, SliderException { - ServiceManager serviceManager = createTestServiceManager( - "testCheckState"); - serviceManager.getServiceSpec().setState( + @Test(timeout = TIMEOUT) + public void testAutoFinalize() throws Exception { + ServiceContext context = createServiceContext("testCheckStateAutoFinalize"); + ServiceManager manager = context.getServiceManager(); + manager.getServiceSpec().setState( ServiceState.UPGRADING_AUTO_FINALIZE); - upgrade(serviceManager, "v2", true, true); - Assert.assertEquals("service not upgrading", - ServiceState.UPGRADING_AUTO_FINALIZE, - serviceManager.getServiceSpec().getState()); + initUpgrade(context, "v2", true, true, false); // make components stable - serviceManager.getServiceSpec().getComponents().forEach(comp -> - comp.setState(ComponentState.STABLE)); - ServiceEvent checkStable = new ServiceEvent(ServiceEventType.CHECK_STABLE); - serviceManager.handle(checkStable); + upgradeAllInstances(context); + + GenericTestUtils.waitFor(() -> + context.service.getState().equals(ServiceState.STABLE), + CHECK_EVERY_MILLIS, TIMEOUT); Assert.assertEquals("service not stable", - ServiceState.STABLE, serviceManager.getServiceSpec().getState()); + ServiceState.STABLE, manager.getServiceSpec().getState()); - validateUpgradeFinalization(serviceManager.getName(), "v2"); + validateUpgradeFinalization(manager.getName(), "v2"); } @Test - public void testInvalidUpgrade() throws IOException, SliderException { - ServiceManager serviceManager = createTestServiceManager( - "testInvalidUpgrade"); - serviceManager.getServiceSpec().setState( + public void testInvalidUpgrade() throws Exception { + ServiceContext serviceContext = createServiceContext("testInvalidUpgrade"); + ServiceManager manager = serviceContext.getServiceManager(); + manager.getServiceSpec().setState( ServiceState.UPGRADING_AUTO_FINALIZE); Service upgradedDef = ServiceTestUtils.createExampleApplication(); - upgradedDef.setName(serviceManager.getName()); + upgradedDef.setName(manager.getName()); upgradedDef.setVersion("v2"); upgradedDef.setLifetime(2L); writeUpgradedDef(upgradedDef); try { - serviceManager.processUpgradeRequest("v2", true); + manager.processUpgradeRequest("v2", true, false); } catch (Exception ex) { Assert.assertTrue(ex instanceof UnsupportedOperationException); return; @@ -164,6 +170,32 @@ public void testInvalidUpgrade() throws IOException, SliderException { Assert.fail(); } + @Test(timeout = TIMEOUT) + public void testExpressUpgrade() throws Exception { + ServiceContext context = createServiceContext("testExpressUpgrade"); + ServiceManager manager = context.getServiceManager(); + manager.getServiceSpec().setState( + ServiceState.EXPRESS_UPGRADING); + initUpgrade(context, "v2", true, true, true); + + List comps = ServiceApiUtil.resolveCompsDependency(context.service); + // wait till instances of first component are in upgrade + String comp1 = comps.get(0); + upgradeInstancesOf(context, comp1); + + // wait till instances of second component are in upgrade + String comp2 = comps.get(1); + upgradeInstancesOf(context, comp2); + + GenericTestUtils.waitFor(() -> + context.service.getState().equals(ServiceState.STABLE), + CHECK_EVERY_MILLIS, TIMEOUT); + + Assert.assertEquals("service not stable", + ServiceState.STABLE, manager.getServiceSpec().getState()); + validateUpgradeFinalization(manager.getName(), "v2"); + } + private void validateUpgradeFinalization(String serviceName, String expectedVersion) throws IOException { Service savedSpec = ServiceApiUtil.loadService(rule.getFs(), serviceName); @@ -172,15 +204,16 @@ private void validateUpgradeFinalization(String serviceName, Assert.assertNotNull("app id not present", savedSpec.getId()); Assert.assertEquals("state not stable", ServiceState.STABLE, savedSpec.getState()); - savedSpec.getComponents().forEach(compSpec -> { - Assert.assertEquals("comp not stable", ComponentState.STABLE, - compSpec.getState()); - }); + savedSpec.getComponents().forEach(compSpec -> + Assert.assertEquals("comp not stable", ComponentState.STABLE, + compSpec.getState())); } - private void upgrade(ServiceManager serviceManager, String version, - boolean upgradeArtifact, boolean autoFinalize) - throws IOException, SliderException { + private void initUpgrade(ServiceContext context, String version, + boolean upgradeArtifact, boolean autoFinalize, boolean expressUpgrade) + throws IOException, SliderException, TimeoutException, + InterruptedException { + ServiceManager serviceManager = context.getServiceManager(); Service upgradedDef = ServiceTestUtils.createExampleApplication(); upgradedDef.setName(serviceManager.getName()); upgradedDef.setVersion(version); @@ -191,39 +224,81 @@ private void upgrade(ServiceManager serviceManager, String version, }); } writeUpgradedDef(upgradedDef); - serviceManager.processUpgradeRequest(version, autoFinalize); + serviceManager.processUpgradeRequest(version, autoFinalize, expressUpgrade); ServiceEvent upgradeEvent = new ServiceEvent(ServiceEventType.UPGRADE); - upgradeEvent.setVersion(version); - if (autoFinalize) { - upgradeEvent.setAutoFinalize(true); - } - serviceManager.handle(upgradeEvent); + upgradeEvent.setVersion(version).setExpressUpgrade(expressUpgrade) + .setAutoFinalize(autoFinalize); + + GenericTestUtils.waitFor(()-> { + ServiceState serviceState = context.service.getState(); + if (serviceState.equals(ServiceState.UPGRADING) || + serviceState.equals(ServiceState.UPGRADING_AUTO_FINALIZE) || + serviceState.equals(ServiceState.EXPRESS_UPGRADING)) { + return true; + } + return false; + }, CHECK_EVERY_MILLIS, TIMEOUT); + } + + private void upgradeAllInstances(ServiceContext context) throws + TimeoutException, InterruptedException { + // upgrade the instances + context.scheduler.getLiveInstances().forEach(((containerId, instance) -> { + ComponentInstanceEvent event = new ComponentInstanceEvent(containerId, + ComponentInstanceEventType.UPGRADE); + context.scheduler.getDispatcher().getEventHandler().handle(event); + })); + + // become ready + context.scheduler.getLiveInstances().forEach(((containerId, instance) -> { + ComponentInstanceEvent event = new ComponentInstanceEvent(containerId, + ComponentInstanceEventType.BECOME_READY); + + context.scheduler.getDispatcher().getEventHandler().handle(event); + })); + GenericTestUtils.waitFor(()-> { + for (ComponentInstance instance: + context.scheduler.getLiveInstances().values()) { + if (!instance.getContainerState().equals(ContainerState.READY)) { + return false; + } + } + return true; + }, CHECK_EVERY_MILLIS, TIMEOUT); } - private ServiceManager createTestServiceManager(String name) - throws IOException { - ServiceContext context = new ServiceContext(); - context.service = createBaseDef(name); - context.fs = rule.getFs(); - - context.scheduler = new ServiceScheduler(context) { - @Override - protected YarnRegistryViewForProviders createYarnRegistryOperations( - ServiceContext context, RegistryOperations registryClient) { - return mock(YarnRegistryViewForProviders.class); + private void upgradeInstancesOf(ServiceContext context, String compName) + throws TimeoutException, InterruptedException { + Collection compInstances = context.scheduler + .getAllComponents().get(compName).getAllComponentInstances(); + GenericTestUtils.waitFor(() -> { + for (ComponentInstance instance : compInstances) { + if (!instance.getContainerState().equals(ContainerState.UPGRADING)) { + return false; + } } - }; + return true; + }, CHECK_EVERY_MILLIS, TIMEOUT); - context.scheduler.init(rule.getConf()); + // instances of comp1 get upgraded and become ready event is triggered + // become ready + compInstances.forEach(instance -> { + ComponentInstanceEvent event = new ComponentInstanceEvent( + instance.getContainer().getId(), + ComponentInstanceEventType.BECOME_READY); - Map - componentState = context.scheduler.getAllComponents(); - context.service.getComponents().forEach(component -> { - componentState.put(component.getName(), - new org.apache.hadoop.yarn.service.component.Component(component, - 1L, context)); + context.scheduler.getDispatcher().getEventHandler().handle(event); }); - return new ServiceManager(context); + } + + private ServiceContext createServiceContext(String name) + throws Exception { + Service service = createBaseDef(name); + ServiceContext context = new MockRunningServiceContext(rule, + service); + context.scheduler.getDispatcher().setDrainEventsOnStop(); + context.scheduler.getDispatcher().start(); + return context; } public static Service createBaseDef(String name) { @@ -257,4 +332,6 @@ private void writeUpgradedDef(Service upgradedDef) upgradedDef); } + private static final int TIMEOUT = 200000; + private static final int CHECK_EVERY_MILLIS = 100; } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java index 8b13b2495b8..216d88fc4c3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java @@ -415,6 +415,41 @@ public void testUpgrade() throws Exception { client.actionDestroy(service.getName()); } + @Test(timeout = 200000) + public void testExpressUpgrade() throws Exception { + setupInternal(NUM_NMS); + getConf().setBoolean(YARN_SERVICE_UPGRADE_ENABLED, true); + ServiceClient client = createClient(getConf()); + + Service service = createExampleApplication(); + client.actionCreate(service); + waitForServiceToBeStable(client, service); + + // upgrade the service + Component component = service.getComponents().iterator().next(); + service.setState(ServiceState.EXPRESS_UPGRADING); + service.setVersion("v2"); + component.getConfiguration().getEnv().put("key1", "val1"); + Component component2 = service.getComponent("compb"); + component2.getConfiguration().getEnv().put("key2", "val2"); + client.actionUpgradeExpress(service); + + // wait for upgrade to complete + waitForServiceToBeStable(client, service); + Service active = client.getStatus(service.getName()); + Assert.assertEquals("component not stable", ComponentState.STABLE, + active.getComponent(component.getName()).getState()); + Assert.assertEquals("compa does not have new env", "val1", + active.getComponent(component.getName()).getConfiguration() + .getEnv("key1")); + Assert.assertEquals("compb does not have new env", "val2", + active.getComponent(component2.getName()).getConfiguration() + .getEnv("key2")); + LOG.info("Stop/destroy service {}", service); + client.actionStop(service.getName(), true); + client.actionDestroy(service.getName()); + } + // Test to verify ANTI_AFFINITY placement policy // 1. Start mini cluster with 3 NMs and scheduler placement-constraint handler // 2. Create an example service with 3 containers diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java index 78a81988188..0e047c20b25 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java @@ -121,12 +121,16 @@ public void setup() throws Throwable { basedir = new File("target", "apps"); basedirProp = YARN_SERVICE_BASE_PATH + "=" + basedir.getAbsolutePath(); conf.set(YARN_SERVICE_BASE_PATH, basedir.getAbsolutePath()); + fs = new SliderFileSystem(conf); dependencyTarGzBaseDir = tmpFolder.getRoot(); + fs.getFileSystem() + .setPermission(new Path(dependencyTarGzBaseDir.getAbsolutePath()), + new FsPermission("755")); dependencyTarGz = getDependencyTarGz(dependencyTarGzBaseDir); dependencyTarGzProp = DEPENDENCY_TARBALL_PATH + "=" + dependencyTarGz .toString(); conf.set(DEPENDENCY_TARBALL_PATH, dependencyTarGz.toString()); - fs = new SliderFileSystem(conf); + if (basedir.exists()) { FileUtils.deleteDirectory(basedir); } else { @@ -162,7 +166,7 @@ public void testFlexComponents() throws Throwable { checkApp(serviceName, "master", 1L, 1000L, "qname"); } - @Test (timeout = 180000) + @Test public void testInitiateServiceUpgrade() throws Exception { String[] args = {"app", "-upgrade", "app-1", "-initiate", ExampleAppJson.resourceName(ExampleAppJson.APP_JSON), @@ -181,7 +185,7 @@ public void testInitiateAutoFinalizeServiceUpgrade() throws Exception { Assert.assertEquals(result, 0); } - @Test (timeout = 180000) + @Test public void testUpgradeInstances() throws Exception { conf.set(YARN_APP_ADMIN_CLIENT_PREFIX + DUMMY_APP_TYPE, DummyServiceClient.class.getName()); @@ -193,7 +197,7 @@ public void testUpgradeInstances() throws Exception { Assert.assertEquals(result, 0); } - @Test (timeout = 180000) + @Test public void testUpgradeComponents() throws Exception { conf.set(YARN_APP_ADMIN_CLIENT_PREFIX + DUMMY_APP_TYPE, DummyServiceClient.class.getName()); @@ -205,6 +209,18 @@ public void testUpgradeComponents() throws Exception { Assert.assertEquals(result, 0); } + @Test + public void testGetInstances() throws Exception { + conf.set(YARN_APP_ADMIN_CLIENT_PREFIX + DUMMY_APP_TYPE, + DummyServiceClient.class.getName()); + cli.setConf(conf); + String[] args = {"container", "-list", "app-1", + "-components", "comp1,comp2", + "-appTypes", DUMMY_APP_TYPE}; + int result = cli.run(ApplicationCLI.preProcessArgs(args)); + Assert.assertEquals(result, 0); + } + @Test (timeout = 180000) public void testEnableFastLaunch() throws Exception { fs.getFileSystem().create(new Path(basedir.getAbsolutePath(), "test.jar")) @@ -309,5 +325,12 @@ public int actionUpgradeComponents(String appName, List components) throws IOException, YarnException { return 0; } + + @Override + public String getInstances(String appName, List components, + String version, List containerStates) + throws IOException, YarnException { + return ""; + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceClient.java index d3664ea1dc3..700655ce5de 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceClient.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.service.client; +import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -32,8 +33,12 @@ import org.apache.hadoop.yarn.service.ClientAMProtocol; import org.apache.hadoop.yarn.proto.ClientAMProtocol.CompInstancesUpgradeRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.CompInstancesUpgradeResponseProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesRequestProto; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesResponseProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.UpgradeServiceRequestProto; import org.apache.hadoop.yarn.proto.ClientAMProtocol.UpgradeServiceResponseProto; +import org.apache.hadoop.yarn.service.MockRunningServiceContext; +import org.apache.hadoop.yarn.service.ServiceContext; import org.apache.hadoop.yarn.service.ServiceTestUtils; import org.apache.hadoop.yarn.service.api.records.Component; import org.apache.hadoop.yarn.service.api.records.Container; @@ -41,6 +46,7 @@ import org.apache.hadoop.yarn.service.api.records.ServiceState; import org.apache.hadoop.yarn.service.conf.YarnServiceConf; import org.apache.hadoop.yarn.service.exceptions.ErrorStrings; +import org.apache.hadoop.yarn.service.utils.FilterUtils; import org.apache.hadoop.yarn.service.utils.ServiceApiUtil; import org.junit.Assert; import org.junit.Rule; @@ -52,6 +58,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.List; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -122,6 +129,26 @@ public void testActionCompInstanceUpgrade() throws Exception { client.stop(); } + @Test + public void testGetCompInstances() throws Exception { + Service service = createService(); + MockServiceClient client = MockServiceClient.create(rule, service, true); + + //upgrade the service + service.setVersion("v2"); + client.initiateUpgrade(service); + + //add containers to the component that needs to be upgraded. + Component comp = service.getComponents().iterator().next(); + ContainerId containerId = ContainerId.newContainerId(client.attemptId, 1L); + comp.addContainer(new Container().id(containerId.toString())); + + Container[] containers = client.getContainers(service.getName(), + Lists.newArrayList("compa"), "v1", null); + Assert.assertEquals("num containers", 2, containers.length); + client.stop(); + } + private Service createService() throws IOException, YarnException { Service service = ServiceTestUtils.createExampleApplication(); @@ -137,6 +164,7 @@ private Service createService() throws IOException, private final ClientAMProtocol amProxy; private Object proxyResponse; private Service service; + private ServiceContext context; private MockServiceClient() { amProxy = mock(ClientAMProtocol.class); @@ -147,8 +175,12 @@ private MockServiceClient() { static MockServiceClient create(ServiceTestUtils.ServiceFSWatcher rule, Service service, boolean enableUpgrade) - throws IOException, YarnException { + throws Exception { MockServiceClient client = new MockServiceClient(); + ApplicationId applicationId = ApplicationId.newInstance( + System.currentTimeMillis(), 1); + service.setId(applicationId.toString()); + client.context = new MockRunningServiceContext(rule, service); YarnClient yarnClient = createMockYarnClient(); ApplicationReport appReport = mock(ApplicationReport.class); @@ -175,10 +207,28 @@ static MockServiceClient create(ServiceTestUtils.ServiceFSWatcher rule, CompInstancesUpgradeRequestProto.class))).thenAnswer( (Answer) invocation -> { CompInstancesUpgradeResponseProto response = - CompInstancesUpgradeResponseProto.newBuilder().build(); + CompInstancesUpgradeResponseProto.newBuilder().build(); client.proxyResponse = response; return response; }); + + when(client.amProxy.getCompInstances(Matchers.any( + GetCompInstancesRequestProto.class))).thenAnswer( + (Answer) invocation -> { + + GetCompInstancesRequestProto req = (GetCompInstancesRequestProto) + invocation.getArguments()[0]; + List containers = FilterUtils.filterInstances( + client.context, req); + GetCompInstancesResponseProto response = + GetCompInstancesResponseProto.newBuilder().setCompInstances( + ServiceApiUtil.CONTAINER_JSON_SERDE.toJson( + containers.toArray(new Container[containers.size()]))) + .build(); + client.proxyResponse = response; + return response; + }); + client.setFileSystem(rule.getFs()); client.setYarnClient(yarnClient); client.service = service; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java index d7c15ec731e..e1a4d9d7553 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponent.java @@ -18,43 +18,27 @@ package org.apache.hadoop.yarn.service.component; -import org.apache.hadoop.registry.client.api.RegistryOperations; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; -import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; -import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.client.api.NMClient; -import org.apache.hadoop.yarn.client.api.async.NMClientAsync; -import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.service.ServiceContext; -import org.apache.hadoop.yarn.service.ServiceScheduler; import org.apache.hadoop.yarn.service.ServiceTestUtils; import org.apache.hadoop.yarn.service.TestServiceManager; import org.apache.hadoop.yarn.service.api.records.ComponentState; import org.apache.hadoop.yarn.service.api.records.Service; +import org.apache.hadoop.yarn.service.api.records.ServiceState; import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEvent; import org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType; - -import org.apache.hadoop.yarn.service.containerlaunch.ContainerLaunchService; -import org.apache.hadoop.yarn.service.registry.YarnRegistryViewForProviders; +import org.apache.hadoop.yarn.service.MockRunningServiceContext; import org.apache.log4j.Logger; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; -import org.mockito.stubbing.Answer; -import java.io.IOException; import java.util.Iterator; -import java.util.Map; import static org.apache.hadoop.yarn.service.component.instance.ComponentInstanceEventType.STOP; - -import static org.mockito.Matchers.anyObject; -import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -63,7 +47,6 @@ */ public class TestComponent { - private static final int WAIT_MS_PER_LOOP = 1000; static final Logger LOG = Logger.getLogger(TestComponent.class); @Rule @@ -115,7 +98,7 @@ public void testCheckState() throws Exception { @Test public void testContainerCompletedWhenUpgrading() throws Exception { String serviceName = "testContainerComplete"; - ServiceContext context = createTestContext(rule, serviceName); + MockRunningServiceContext context = createTestContext(rule, serviceName); Component comp = context.scheduler.getAllComponents().entrySet().iterator() .next().getValue(); @@ -148,7 +131,7 @@ public void testContainerCompletedWhenUpgrading() throws Exception { ComponentState.FLEXING, comp.getComponentSpec().getState()); // new container get allocated - assignNewContainer(context.attemptId, 10, context, comp); + context.assignNewContainer(context.attemptId, 10, comp); // second instance finished upgrading ComponentInstance instance2 = instanceIter.next(); @@ -165,7 +148,8 @@ public void testContainerCompletedWhenUpgrading() throws Exception { } @Test - public void testComponentStateUpdatesWithTerminatingComponents() throws + public void testComponentStateReachesStableStateWithTerminatingComponents() + throws Exception { final String serviceName = "testComponentStateUpdatesWithTerminatingComponents"; @@ -174,7 +158,7 @@ public void testComponentStateUpdatesWithTerminatingComponents() throws serviceName); TestServiceManager.createDef(serviceName, testService); - ServiceContext context = createTestContext(rule, testService); + ServiceContext context = new MockRunningServiceContext(rule, testService); for (Component comp : context.scheduler.getAllComponents().values()) { @@ -196,7 +180,8 @@ public void testComponentStateUpdatesWithTerminatingComponents() throws org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE, "successful", 0); comp.handle(new ComponentEvent(comp.getName(), - ComponentEventType.CONTAINER_COMPLETED).setStatus(containerStatus)); + ComponentEventType.CONTAINER_COMPLETED).setStatus(containerStatus) + .setContainerId(instanceContainer.getId())); componentInstance.handle( new ComponentInstanceEvent(componentInstance.getContainer().getId(), ComponentInstanceEventType.STOP).setStatus(containerStatus)); @@ -215,6 +200,57 @@ public void testComponentStateUpdatesWithTerminatingComponents() throws } } + @Test + public void testComponentStateUpdatesWithTerminatingComponents() + throws + Exception { + final String serviceName = + "testComponentStateUpdatesWithTerminatingComponents"; + + Service testService = ServiceTestUtils.createTerminatingJobExample( + serviceName); + TestServiceManager.createDef(serviceName, testService); + + ServiceContext context = new MockRunningServiceContext(rule, testService); + + for (Component comp : context.scheduler.getAllComponents().values()) { + Iterator instanceIter = comp. + getAllComponentInstances().iterator(); + + while (instanceIter.hasNext()) { + + ComponentInstance componentInstance = instanceIter.next(); + Container instanceContainer = componentInstance.getContainer(); + + //stop 1 container + ContainerStatus containerStatus = ContainerStatus.newInstance( + instanceContainer.getId(), + org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE, + "successful", 0); + comp.handle(new ComponentEvent(comp.getName(), + ComponentEventType.CONTAINER_COMPLETED).setStatus(containerStatus) + .setContainerId(instanceContainer.getId())); + componentInstance.handle( + new ComponentInstanceEvent(componentInstance.getContainer().getId(), + ComponentInstanceEventType.STOP).setStatus(containerStatus)); + } + + ComponentState componentState = + comp.getComponentSpec().getState(); + Assert.assertEquals( + ComponentState.SUCCEEDED, + componentState); + } + + ServiceState serviceState = + testService.getState(); + Assert.assertEquals( + ServiceState.SUCCEEDED, + serviceState); + } + + + private static org.apache.hadoop.yarn.service.api.records.Component createSpecWithEnv(String serviceName, String compName, String key, String val) { @@ -225,114 +261,11 @@ public void testComponentStateUpdatesWithTerminatingComponents() throws return spec; } - public static ServiceContext createTestContext( + public static MockRunningServiceContext createTestContext( ServiceTestUtils.ServiceFSWatcher fsWatcher, String serviceName) throws Exception { - return createTestContext(fsWatcher, + return new MockRunningServiceContext(fsWatcher, TestServiceManager.createBaseDef(serviceName)); } - - public static ServiceContext createTestContext( - ServiceTestUtils.ServiceFSWatcher fsWatcher, Service serviceDef) - throws Exception { - ServiceContext context = new ServiceContext(); - context.service = serviceDef; - context.fs = fsWatcher.getFs(); - - ContainerLaunchService mockLaunchService = mock( - ContainerLaunchService.class); - - context.scheduler = new ServiceScheduler(context) { - @Override protected YarnRegistryViewForProviders - createYarnRegistryOperations( - ServiceContext context, RegistryOperations registryClient) { - return mock(YarnRegistryViewForProviders.class); - } - - @Override public NMClientAsync createNMClient() { - NMClientAsync nmClientAsync = super.createNMClient(); - NMClient nmClient = mock(NMClient.class); - try { - when(nmClient.getContainerStatus(anyObject(), anyObject())) - .thenAnswer( - (Answer) invocation -> ContainerStatus - .newInstance((ContainerId) invocation.getArguments()[0], - org.apache.hadoop.yarn.api.records.ContainerState - .RUNNING, - "", 0)); - } catch (YarnException | IOException e) { - throw new RuntimeException(e); - } - nmClientAsync.setClient(nmClient); - return nmClientAsync; - } - - @Override public ContainerLaunchService getContainerLaunchService() { - return mockLaunchService; - } - }; - context.scheduler.init(fsWatcher.getConf()); - - ServiceTestUtils.createServiceManager(context); - - doNothing().when(mockLaunchService). - reInitCompInstance(anyObject(), anyObject(), anyObject(), anyObject()); - stabilizeComponents(context); - - return context; - } - - private static void stabilizeComponents(ServiceContext context) { - - ApplicationId appId = ApplicationId.fromString(context.service.getId()); - ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); - context.attemptId = attemptId; - Map - componentState = context.scheduler.getAllComponents(); - - int counter = 0; - for (org.apache.hadoop.yarn.service.api.records.Component componentSpec : - context.service.getComponents()) { - Component component = new org.apache.hadoop.yarn.service.component. - Component(componentSpec, 1L, context); - componentState.put(component.getName(), component); - component.handle(new ComponentEvent(component.getName(), - ComponentEventType.FLEX)); - - for (int i = 0; i < componentSpec.getNumberOfContainers(); i++) { - counter++; - assignNewContainer(attemptId, counter, context, component); - } - - component.handle(new ComponentEvent(component.getName(), - ComponentEventType.CHECK_STABLE)); - } - } - - private static void assignNewContainer( - ApplicationAttemptId attemptId, long containerNum, - ServiceContext context, Component component) { - - - Container container = org.apache.hadoop.yarn.api.records.Container - .newInstance(ContainerId.newContainerId(attemptId, containerNum), - NODE_ID, "localhost", null, null, - null); - component.handle(new ComponentEvent(component.getName(), - ComponentEventType.CONTAINER_ALLOCATED) - .setContainer(container).setContainerId(container.getId())); - ComponentInstance instance = context.scheduler.getLiveInstances().get( - container.getId()); - ComponentInstanceEvent startEvent = new ComponentInstanceEvent( - container.getId(), ComponentInstanceEventType.START); - instance.handle(startEvent); - - ComponentInstanceEvent readyEvent = new ComponentInstanceEvent( - container.getId(), ComponentInstanceEventType.BECOME_READY); - instance.handle(readyEvent); - } - - private static final NodeId NODE_ID = NodeId.fromString("localhost:0"); - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponentRestartPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponentRestartPolicy.java index 60f5c918f9a..03158cfc18e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponentRestartPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/TestComponentRestartPolicy.java @@ -110,7 +110,6 @@ public void testOnFailureRestartPolicy() throws Exception { assertEquals(true, restartPolicy.isReadyForDownStream(component)); - when(component.getNumSucceededInstances()).thenReturn(new Long(2)); when(component.getNumFailedInstances()).thenReturn(new Long(1)); when(component.getNumDesiredInstances()).thenReturn(3); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java index 26e8c931258..e0399816db8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/component/instance/TestComponentInstance.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - *

    - * http://www.apache.org/licenses/LICENSE-2.0 - *

    + * + * http://www.apache.org/licenses/LICENSE-2.0 + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -60,19 +60,20 @@ */ public class TestComponentInstance { - @Rule public ServiceTestUtils.ServiceFSWatcher rule = + @Rule + public ServiceTestUtils.ServiceFSWatcher rule = new ServiceTestUtils.ServiceFSWatcher(); - @Test public void testContainerUpgrade() throws Exception { + @Test + public void testContainerUpgrade() throws Exception { ServiceContext context = TestComponent.createTestContext(rule, "testContainerUpgrade"); - Component component = - context.scheduler.getAllComponents().entrySet().iterator().next() - .getValue(); + Component component = context.scheduler.getAllComponents().entrySet() + .iterator().next().getValue(); upgradeComponent(component); - ComponentInstance instance = - component.getAllComponentInstances().iterator().next(); + ComponentInstance instance = component.getAllComponentInstances().iterator() + .next(); ComponentInstanceEvent instanceEvent = new ComponentInstanceEvent( instance.getContainer().getId(), ComponentInstanceEventType.UPGRADE); instance.handle(instanceEvent); @@ -82,16 +83,16 @@ containerSpec.getState()); } - @Test public void testContainerReadyAfterUpgrade() throws Exception { + @Test + public void testContainerReadyAfterUpgrade() throws Exception { ServiceContext context = TestComponent.createTestContext(rule, "testContainerStarted"); - Component component = - context.scheduler.getAllComponents().entrySet().iterator().next() - .getValue(); + Component component = context.scheduler.getAllComponents().entrySet() + .iterator().next().getValue(); upgradeComponent(component); - ComponentInstance instance = - component.getAllComponentInstances().iterator().next(); + ComponentInstance instance = component.getAllComponentInstances().iterator() + .next(); ComponentInstanceEvent instanceEvent = new ComponentInstanceEvent( instance.getContainer().getId(), ComponentInstanceEventType.UPGRADE); @@ -100,9 +101,8 @@ instance.handle(new ComponentInstanceEvent(instance.getContainer().getId(), ComponentInstanceEventType.BECOME_READY)); Assert.assertEquals("instance not ready", ContainerState.READY, - instance.getCompSpec() - .getContainer(instance.getContainer().getId().toString()) - .getState()); + component.getComponentSpec().getContainer(instance.getContainer() + .getId().toString()).getState()); } private void upgradeComponent(Component component) { @@ -113,9 +113,8 @@ private void upgradeComponent(Component component) { private Component createComponent(ServiceScheduler scheduler, org.apache.hadoop.yarn.service.api.records.Component.RestartPolicyEnum - restartPolicy, - int nSucceededInstances, int nFailedInstances, int totalAsk, - int componentId) { + restartPolicy, int nSucceededInstances, int nFailedInstances, + int totalAsk, int componentId) { assert (nSucceededInstances + nFailedInstances) <= totalAsk; @@ -205,6 +204,8 @@ private ComponentInstance createComponentInstance(Component component, when(componentInstance.getComponent()).thenReturn(component); when(componentInstance.getCompInstanceName()).thenReturn( "compInstance" + instanceId); + Container container = mock(Container.class); + when(componentInstance.getContainerSpec()).thenReturn(container); ServiceUtils.ProcessTerminationHandler terminationHandler = mock( ServiceUtils.ProcessTerminationHandler.class); @@ -214,7 +215,8 @@ private ComponentInstance createComponentInstance(Component component, return componentInstance; } - @Test public void testComponentRestartPolicy() { + @Test + public void testComponentRestartPolicy() { Map allComponents = new HashMap<>(); Service mockService = mock(Service.class); @@ -227,12 +229,15 @@ private ComponentInstance createComponentInstance(Component component, Mockito.doNothing().when(serviceScheduler).setGracefulStop( any(FinalApplicationStatus.class)); + final String containerDiag = "Container succeeded"; + ComponentInstanceEvent componentInstanceEvent = mock( ComponentInstanceEvent.class); ContainerId containerId = ContainerId.newContainerId(ApplicationAttemptId .newInstance(ApplicationId.newInstance(1234L, 1), 1), 1); ContainerStatus containerStatus = ContainerStatus.newInstance(containerId, - org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE, "hello", 0); + org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE, + containerDiag, 0); when(componentInstanceEvent.getStatus()).thenReturn(containerStatus); @@ -245,7 +250,7 @@ private ComponentInstance createComponentInstance(Component component, comp.getAllComponentInstances().iterator().next(); ComponentInstance.handleComponentInstanceRelaunch(componentInstance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); verify(comp, never()).markAsSucceeded(any(ComponentInstance.class)); verify(comp, never()).markAsFailed(any(ComponentInstance.class)); @@ -262,7 +267,7 @@ private ComponentInstance createComponentInstance(Component component, componentInstance = comp.getAllComponentInstances().iterator().next(); containerStatus.setExitStatus(1); ComponentInstance.handleComponentInstanceRelaunch(componentInstance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); verify(comp, never()).markAsSucceeded(any(ComponentInstance.class)); verify(comp, never()).markAsFailed(any(ComponentInstance.class)); verify(comp, times(1)).reInsertPendingInstance( @@ -286,7 +291,7 @@ private ComponentInstance createComponentInstance(Component component, when(comp.getNumSucceededInstances()).thenReturn(new Long(1)); ComponentInstance.handleComponentInstanceRelaunch(componentInstance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); verify(comp, times(1)).markAsSucceeded(any(ComponentInstance.class)); verify(comp, never()).markAsFailed(any(ComponentInstance.class)); verify(comp, times(0)).reInsertPendingInstance( @@ -304,8 +309,7 @@ private ComponentInstance createComponentInstance(Component component, when(comp.getNumFailedInstances()).thenReturn(new Long(1)); ComponentInstance.handleComponentInstanceRelaunch(componentInstance, - componentInstanceEvent); - + componentInstanceEvent, false, containerDiag); verify(comp, never()).markAsSucceeded(any(ComponentInstance.class)); verify(comp, times(1)).markAsFailed(any(ComponentInstance.class)); verify(comp, times(0)).reInsertPendingInstance( @@ -323,7 +327,7 @@ private ComponentInstance createComponentInstance(Component component, componentInstance = comp.getAllComponentInstances().iterator().next(); containerStatus.setExitStatus(1); ComponentInstance.handleComponentInstanceRelaunch(componentInstance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); verify(comp, never()).markAsSucceeded(any(ComponentInstance.class)); verify(comp, never()).markAsFailed(any(ComponentInstance.class)); verify(comp, times(1)).reInsertPendingInstance( @@ -340,7 +344,7 @@ private ComponentInstance createComponentInstance(Component component, componentInstance = comp.getAllComponentInstances().iterator().next(); containerStatus.setExitStatus(1); ComponentInstance.handleComponentInstanceRelaunch(componentInstance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); verify(comp, never()).markAsSucceeded(any(ComponentInstance.class)); verify(comp, times(1)).markAsFailed(any(ComponentInstance.class)); verify(comp, times(0)).reInsertPendingInstance( @@ -363,8 +367,7 @@ private ComponentInstance createComponentInstance(Component component, containerStatus.setExitStatus(1); ComponentInstance commponentInstance = iter.next(); ComponentInstance.handleComponentInstanceRelaunch(commponentInstance, - componentInstanceEvent); - + componentInstanceEvent, false, containerDiag); verify(comp, never()).markAsSucceeded(any(ComponentInstance.class)); verify(comp, never()).markAsFailed(any(ComponentInstance.class)); verify(comp, times(1)).reInsertPendingInstance( @@ -404,7 +407,7 @@ private ComponentInstance createComponentInstance(Component component, when(component2Instance.getComponent().getNumFailedInstances()) .thenReturn(new Long(failed2Instances.size())); ComponentInstance.handleComponentInstanceRelaunch(component2Instance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); } Map failed1Instances = new HashMap<>(); @@ -418,7 +421,7 @@ private ComponentInstance createComponentInstance(Component component, when(component1Instance.getComponent().getNumFailedInstances()) .thenReturn(new Long(failed1Instances.size())); ComponentInstance.handleComponentInstanceRelaunch(component1Instance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); } verify(comp, never()).markAsSucceeded(any(ComponentInstance.class)); @@ -458,7 +461,7 @@ private ComponentInstance createComponentInstance(Component component, when(component2Instance.getComponent().getNumSucceededInstances()) .thenReturn(new Long(succeeded2Instances.size())); ComponentInstance.handleComponentInstanceRelaunch(component2Instance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); } Map succeeded1Instances = new HashMap<>(); @@ -471,7 +474,7 @@ private ComponentInstance createComponentInstance(Component component, when(component1Instance.getComponent().getNumSucceededInstances()) .thenReturn(new Long(succeeded1Instances.size())); ComponentInstance.handleComponentInstanceRelaunch(component1Instance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); } verify(comp, times(2)).markAsSucceeded(any(ComponentInstance.class)); @@ -500,7 +503,7 @@ private ComponentInstance createComponentInstance(Component component, for (ComponentInstance component2Instance : component2Instances) { ComponentInstance.handleComponentInstanceRelaunch(component2Instance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); } succeeded1Instances = new HashMap<>(); @@ -511,7 +514,7 @@ private ComponentInstance createComponentInstance(Component component, when(component1Instance.getComponent().getSucceededInstances()) .thenReturn(succeeded1Instances.values()); ComponentInstance.handleComponentInstanceRelaunch(component1Instance, - componentInstanceEvent); + componentInstanceEvent, false, containerDiag); } verify(comp, times(2)).markAsSucceeded(any(ComponentInstance.class)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/containerlaunch/TestAbstractLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/containerlaunch/TestAbstractLauncher.java index f4f1a50e439..108078ca7f0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/containerlaunch/TestAbstractLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/containerlaunch/TestAbstractLauncher.java @@ -19,13 +19,33 @@ package org.apache.hadoop.yarn.service.containerlaunch; import org.apache.hadoop.yarn.service.ServiceContext; +import org.apache.hadoop.yarn.service.api.records.Configuration; +import org.apache.hadoop.yarn.service.component.AlwaysRestartPolicy; +import org.apache.hadoop.yarn.service.component.Component; +import org.apache.hadoop.yarn.service.component.NeverRestartPolicy; +import org.apache.hadoop.yarn.service.component.OnFailureRestartPolicy; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; +import org.apache.hadoop.yarn.service.provider.defaultImpl + .DefaultProviderService; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import java.io.IOException; +import static org.apache.hadoop.fi.FiConfig.getConfig; +import static org.apache.hadoop.yarn.service.conf.YarnServiceConf + .DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL; +import static org.apache.hadoop.yarn.service.conf.YarnServiceConf + .DEFAULT_CONTAINER_RETRY_INTERVAL; +import static org.apache.hadoop.yarn.service.conf.YarnServiceConf + .DEFAULT_CONTAINER_RETRY_MAX; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyZeroInteractions; +import static org.mockito.Mockito.when; /** * Tests for {@link AbstractLauncher}. @@ -51,4 +71,50 @@ public void testDockerContainerMounts() throws IOException { Assert.assertEquals("s1:t1:ro,s2:t2:ro", dockerContainerMounts); } + + @Test + public void testContainerRetries() throws Exception { + + DefaultProviderService providerService = new DefaultProviderService(); + AbstractLauncher mockLauncher = mock(AbstractLauncher.class); + ContainerLaunchService.ComponentLaunchContext componentLaunchContext = + mock(ContainerLaunchService.ComponentLaunchContext.class); + + ComponentInstance componentInstance = mock(ComponentInstance.class); + + //Never Restart Policy + Component component = mock(Component.class); + when(componentInstance.getComponent()).thenReturn(component); + + when(component.getRestartPolicyHandler()).thenReturn(NeverRestartPolicy + .getInstance()); + + providerService.buildContainerRetry(mockLauncher, getConfig(), + componentLaunchContext, componentInstance); + verifyZeroInteractions(mockLauncher); + + + //OnFailure restart policy + when(component.getRestartPolicyHandler()).thenReturn(OnFailureRestartPolicy + .getInstance()); + when(componentLaunchContext.getConfiguration()).thenReturn(new + Configuration()); + providerService.buildContainerRetry(mockLauncher, getConfig(), + componentLaunchContext, componentInstance); + verify(mockLauncher).setRetryContext(DEFAULT_CONTAINER_RETRY_MAX, + DEFAULT_CONTAINER_RETRY_INTERVAL, + DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL); + + reset(mockLauncher); + + //Always restart policy + when(component.getRestartPolicyHandler()).thenReturn(AlwaysRestartPolicy + .getInstance()); + providerService.buildContainerRetry(mockLauncher, getConfig(), + componentLaunchContext, componentInstance); + + verify(mockLauncher).setRetryContext(DEFAULT_CONTAINER_RETRY_MAX, + DEFAULT_CONTAINER_RETRY_INTERVAL, + DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/provider/TestAbstractProviderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/provider/TestAbstractProviderService.java new file mode 100644 index 00000000000..81ccc7fed47 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/provider/TestAbstractProviderService.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.service.provider; + +import com.google.common.collect.Lists; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.service.MockRunningServiceContext; +import org.apache.hadoop.yarn.service.ServiceContext; +import org.apache.hadoop.yarn.service.ServiceTestUtils; +import org.apache.hadoop.yarn.service.TestServiceManager; +import org.apache.hadoop.yarn.service.api.records.Artifact; +import org.apache.hadoop.yarn.service.api.records.Service; +import org.apache.hadoop.yarn.service.component.Component; +import org.apache.hadoop.yarn.service.component.instance.ComponentInstance; +import org.apache.hadoop.yarn.service.containerlaunch.AbstractLauncher; +import org.apache.hadoop.yarn.service.containerlaunch.ContainerLaunchService; +import org.apache.hadoop.yarn.service.provider.docker.DockerProviderService; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Tests for {@link AbstractProviderService} + */ +public class TestAbstractProviderService { + + private ServiceContext serviceContext; + private Service testService; + private AbstractLauncher launcher; + + @Rule + public ServiceTestUtils.ServiceFSWatcher rule = + new ServiceTestUtils.ServiceFSWatcher(); + + @Before + public void setup() throws Exception { + testService = TestServiceManager.createBaseDef("testService"); + serviceContext = new MockRunningServiceContext(rule, testService); + launcher = new AbstractLauncher(serviceContext); + rule.getFs().setAppDir(new Path("target/testAbstractProviderService")); + } + + @After + public void teardown() throws Exception { + FileUtils.deleteQuietly( + new File(rule.getFs().getAppDir().toUri().getPath())); + } + + @Test + public void testBuildContainerLaunchCommand() throws Exception { + AbstractProviderService providerService = new DockerProviderService(); + Component component = serviceContext.scheduler.getAllComponents().entrySet() + .iterator().next().getValue(); + ContainerLaunchService.ComponentLaunchContext clc = + createEntryPointCLCFor(testService, component); + + ComponentInstance instance = component.getAllComponentInstances().iterator() + .next(); + Container container = mock(Container.class); + providerService.buildContainerLaunchCommand(launcher, testService, instance, + rule.getFs(), serviceContext.scheduler.getConfig(), container, clc, + null); + + Assert.assertEquals("commands", Lists.newArrayList(clc.getLaunchCommand()), + launcher.getCommands()); + } + + @Test + public void testBuildContainerLaunchContext() throws Exception { + AbstractProviderService providerService = new DockerProviderService(); + Component component = serviceContext.scheduler.getAllComponents().entrySet() + .iterator().next().getValue(); + ContainerLaunchService.ComponentLaunchContext clc = + createEntryPointCLCFor(testService, component); + + ComponentInstance instance = component.getAllComponentInstances().iterator() + .next(); + Container container = mock(Container.class); + ContainerId containerId = ContainerId.newContainerId( + ApplicationAttemptId.newInstance(ApplicationId.newInstance( + System.currentTimeMillis(), 1), 1), 1L); + when(container.getId()).thenReturn(containerId); + providerService.buildContainerLaunchContext(launcher, testService, instance, + rule.getFs(), serviceContext.scheduler.getConfig(), container, clc); + + Assert.assertEquals("artifact", clc.getArtifact().getId(), + launcher.getDockerImage()); + } + + private static ContainerLaunchService.ComponentLaunchContext + createEntryPointCLCFor(Service service, Component component) { + String launchCmd = "sleep,9000"; + Artifact artifact = new Artifact(); + artifact.setType(Artifact.TypeEnum.DOCKER); + artifact.setId("example"); + Map env = new HashMap<>(); + env.put("YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL", "true"); + env.put("YARN_CONTAINER_RUNTIME_DOCKER_RUN_OVERRIDE_DISABLE", "true"); + component.getComponentSpec().getConfiguration().setEnv(env); + + return new ContainerLaunchService.ComponentLaunchContext( + component.getName(), + service.getVersion()) + .setArtifact(artifact) + .setConfiguration(component.getComponentSpec().getConfiguration()) + .setLaunchCommand(launchCmd); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/providers/TestAbstractClientProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/providers/TestAbstractClientProvider.java index 1d6be910c55..ae626086f98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/providers/TestAbstractClientProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/providers/TestAbstractClientProvider.java @@ -45,12 +45,12 @@ private static class ClientProvider extends AbstractClientProvider { @Override - public void validateArtifact(Artifact artifact, FileSystem fileSystem) - throws IOException { + public void validateArtifact(Artifact artifact, String compName, + FileSystem fileSystem) throws IOException { } @Override - protected void validateConfigFile(ConfigFile configFile, + protected void validateConfigFile(ConfigFile configFile, String compName, FileSystem fileSystem) throws IOException { } } @@ -62,33 +62,34 @@ public void testConfigFiles() throws IOException { FileStatus mockFileStatus = mock(FileStatus.class); when(mockFs.exists(anyObject())).thenReturn(true); + String compName = "sleeper"; ConfigFile configFile = new ConfigFile(); List configFiles = new ArrayList<>(); configFiles.add(configFile); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); Assert.fail(EXCEPTION_PREFIX + "null file type"); } catch (IllegalArgumentException e) { } configFile.setType(ConfigFile.TypeEnum.TEMPLATE); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); Assert.fail(EXCEPTION_PREFIX + "empty src_file for type template"); } catch (IllegalArgumentException e) { } configFile.setSrcFile("srcfile"); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); Assert.fail(EXCEPTION_PREFIX + "empty dest file"); } catch (IllegalArgumentException e) { } configFile.setDestFile("destfile"); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); } catch (IllegalArgumentException e) { Assert.fail(NO_EXCEPTION_PREFIX + e.getMessage()); } @@ -99,21 +100,21 @@ public void testConfigFiles() throws IOException { configFile.setDestFile("path/destfile2"); configFiles.add(configFile); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); Assert.fail(EXCEPTION_PREFIX + "dest file with multiple path elements"); } catch (IllegalArgumentException e) { } configFile.setDestFile("/path/destfile2"); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); } catch (IllegalArgumentException e) { Assert.fail(NO_EXCEPTION_PREFIX + e.getMessage()); } configFile.setDestFile("destfile"); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); Assert.fail(EXCEPTION_PREFIX + "duplicate dest file"); } catch (IllegalArgumentException e) { } @@ -125,14 +126,14 @@ public void testConfigFiles() throws IOException { configFile.setDestFile("path/destfile3"); configFiles.add(configFile); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); Assert.fail(EXCEPTION_PREFIX + "dest file with multiple path elements"); } catch (IllegalArgumentException e) { } configFile.setDestFile("/path/destfile3"); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); Assert.fail(EXCEPTION_PREFIX + "src file should be specified"); } catch (IllegalArgumentException e) { } @@ -140,7 +141,7 @@ public void testConfigFiles() throws IOException { //should succeed configFile.setSrcFile("srcFile"); configFile.setDestFile("destfile3"); - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); when(mockFileStatus.isDirectory()).thenReturn(true); when(mockFs.getFileStatus(new Path("srcFile"))) @@ -154,7 +155,7 @@ public void testConfigFiles() throws IOException { configFiles.add(configFile); try { - clientProvider.validateConfigFiles(configFiles, mockFs); + clientProvider.validateConfigFiles(configFiles, compName, mockFs); Assert.fail(EXCEPTION_PREFIX + "src file is a directory"); } catch (IllegalArgumentException e) { } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/providers/TestDefaultClientProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/providers/TestDefaultClientProvider.java new file mode 100644 index 00000000000..366515c8b42 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/providers/TestDefaultClientProvider.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.service.providers; + +import static org.mockito.Matchers.anyObject; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.yarn.service.api.records.ConfigFile; +import org.apache.hadoop.yarn.service.exceptions.RestApiErrorMessages; +import org.apache.hadoop.yarn.service.provider.defaultImpl.DefaultClientProvider; +import org.junit.Assert; +import org.junit.Test; + +public class TestDefaultClientProvider { + private static final String EXCEPTION_PREFIX = "Should have thrown " + + "exception: "; + private static final String NO_EXCEPTION_PREFIX = "Should not have thrown " + + "exception: "; + + @Test + public void testConfigFile() throws IOException { + DefaultClientProvider defaultClientProvider = new DefaultClientProvider(); + FileSystem mockFs = mock(FileSystem.class); + when(mockFs.exists(anyObject())).thenReturn(true); + + String compName = "sleeper"; + ConfigFile configFile = new ConfigFile(); + configFile.setDestFile("/var/tmp/a.txt"); + + try { + defaultClientProvider.validateConfigFile(configFile, compName, mockFs); + Assert.fail(EXCEPTION_PREFIX + " dest_file must be relative"); + } catch (IllegalArgumentException e) { + String actualMsg = String.format( + RestApiErrorMessages.ERROR_CONFIGFILE_DEST_FILE_FOR_COMP_NOT_ABSOLUTE, + compName, "no", configFile.getDestFile()); + Assert.assertEquals(actualMsg, e.getLocalizedMessage()); + } + + configFile.setDestFile("../a.txt"); + try { + defaultClientProvider.validateConfigFile(configFile, compName, mockFs); + } catch (IllegalArgumentException e) { + Assert.fail(NO_EXCEPTION_PREFIX + e.getLocalizedMessage()); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/timelineservice/TestServiceTimelinePublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/timelineservice/TestServiceTimelinePublisher.java index cff7229db34..a77e6c8d317 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/timelineservice/TestServiceTimelinePublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/timelineservice/TestServiceTimelinePublisher.java @@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity.Identifier; import org.apache.hadoop.yarn.client.api.TimelineV2Client; @@ -122,7 +123,8 @@ public void testServiceAttemptEntity() { context.attemptId = ApplicationAttemptId .newInstance(ApplicationId.fromString(service.getId()), 1); String exitDiags = "service killed"; - serviceTimelinePublisher.serviceAttemptUnregistered(context, exitDiags); + serviceTimelinePublisher.serviceAttemptUnregistered(context, + FinalApplicationStatus.ENDED, exitDiags); lastPublishedEntities = ((DummyTimelineClient) timelineClient).getLastPublishedEntities(); for (TimelineEntity timelineEntity : lastPublishedEntities) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/utils/TestFilterUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/utils/TestFilterUtils.java new file mode 100644 index 00000000000..065c37ad17a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/utils/TestFilterUtils.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.service.utils; + +import com.google.common.collect.Lists; +import org.apache.hadoop.yarn.proto.ClientAMProtocol.GetCompInstancesRequestProto; +import org.apache.hadoop.yarn.service.ServiceContext; +import org.apache.hadoop.yarn.service.ServiceTestUtils; +import org.apache.hadoop.yarn.service.TestServiceManager; +import org.apache.hadoop.yarn.service.api.records.Container; +import org.apache.hadoop.yarn.service.MockRunningServiceContext; +import org.apache.hadoop.yarn.service.api.records.ContainerState; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +import java.util.List; + +public class TestFilterUtils { + + @Rule + public ServiceTestUtils.ServiceFSWatcher rule = + new ServiceTestUtils.ServiceFSWatcher(); + + @Test + public void testNoFilter() throws Exception { + GetCompInstancesRequestProto req = GetCompInstancesRequestProto.newBuilder() + .build(); + List containers = FilterUtils.filterInstances( + new MockRunningServiceContext(rule, + TestServiceManager.createBaseDef("service")), req); + Assert.assertEquals("num containers", 4, containers.size()); + } + + @Test + public void testFilterWithComp() throws Exception { + GetCompInstancesRequestProto req = GetCompInstancesRequestProto.newBuilder() + .addAllComponentNames(Lists.newArrayList("compa")).build(); + List containers = FilterUtils.filterInstances( + new MockRunningServiceContext(rule, + TestServiceManager.createBaseDef("service")), req); + Assert.assertEquals("num containers", 2, containers.size()); + } + + @Test + public void testFilterWithVersion() throws Exception { + ServiceContext sc = new MockRunningServiceContext(rule, + TestServiceManager.createBaseDef("service")); + GetCompInstancesRequestProto.Builder reqBuilder = + GetCompInstancesRequestProto.newBuilder(); + + reqBuilder.setVersion("v2"); + Assert.assertEquals("num containers", 0, + FilterUtils.filterInstances(sc, reqBuilder.build()).size()); + + reqBuilder.addAllComponentNames(Lists.newArrayList("compa")) + .setVersion("v1").build(); + + Assert.assertEquals("num containers", 2, + FilterUtils.filterInstances(sc, reqBuilder.build()).size()); + + reqBuilder.setVersion("v2").build(); + Assert.assertEquals("num containers", 0, + FilterUtils.filterInstances(sc, reqBuilder.build()).size()); + } + + @Test + public void testFilterWithState() throws Exception { + ServiceContext sc = new MockRunningServiceContext(rule, + TestServiceManager.createBaseDef("service")); + GetCompInstancesRequestProto.Builder reqBuilder = + GetCompInstancesRequestProto.newBuilder(); + + reqBuilder.addAllContainerStates(Lists.newArrayList( + ContainerState.READY.toString())); + Assert.assertEquals("num containers", 4, + FilterUtils.filterInstances(sc, reqBuilder.build()).size()); + + reqBuilder.clearContainerStates(); + reqBuilder.addAllContainerStates(Lists.newArrayList( + ContainerState.STOPPED.toString())); + Assert.assertEquals("num containers", 0, + FilterUtils.filterInstances(sc, reqBuilder.build()).size()); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceApiUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/utils/TestServiceApiUtil.java similarity index 83% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceApiUtil.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/utils/TestServiceApiUtil.java index ae031d4aad9..98e7474237f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestServiceApiUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/utils/TestServiceApiUtil.java @@ -15,11 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.yarn.service; +package org.apache.hadoop.yarn.service.utils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.registry.client.api.RegistryConstants; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.service.ServiceTestUtils; import org.apache.hadoop.yarn.service.api.records.Artifact; import org.apache.hadoop.yarn.service.api.records.Component; import org.apache.hadoop.yarn.service.api.records.KerberosPrincipal; @@ -30,8 +31,6 @@ import org.apache.hadoop.yarn.service.api.records.Resource; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.exceptions.RestApiErrorMessages; -import org.apache.hadoop.yarn.service.utils.ServiceApiUtil; -import org.apache.hadoop.yarn.service.utils.SliderFileSystem; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -39,6 +38,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -53,7 +53,7 @@ /** * Test for ServiceApiUtil helper methods. */ -public class TestServiceApiUtil { +public class TestServiceApiUtil extends ServiceTestUtils { private static final Logger LOG = LoggerFactory .getLogger(TestServiceApiUtil.class); private static final String EXCEPTION_PREFIX = "Should have thrown " + @@ -227,14 +227,16 @@ public void testArtifacts() throws IOException { // no artifact id fails with default type Artifact artifact = new Artifact(); app.setArtifact(artifact); - Component comp = ServiceTestUtils.createComponent("comp1"); + String compName = "comp1"; + Component comp = ServiceTestUtils.createComponent(compName); app.setComponents(Collections.singletonList(comp)); try { ServiceApiUtil.validateAndResolveService(app, sfs, CONF_DNS_ENABLED); Assert.fail(EXCEPTION_PREFIX + "service with no artifact id"); } catch (IllegalArgumentException e) { - assertEquals(ERROR_ARTIFACT_ID_INVALID, e.getMessage()); + assertEquals(String.format(ERROR_ARTIFACT_ID_FOR_COMP_INVALID, compName), + e.getMessage()); } // no artifact id fails with SERVICE type @@ -252,7 +254,8 @@ public void testArtifacts() throws IOException { ServiceApiUtil.validateAndResolveService(app, sfs, CONF_DNS_ENABLED); Assert.fail(EXCEPTION_PREFIX + "service with no artifact id"); } catch (IllegalArgumentException e) { - assertEquals(ERROR_ARTIFACT_ID_INVALID, e.getMessage()); + assertEquals(String.format(ERROR_ARTIFACT_ID_FOR_COMP_INVALID, compName), + e.getMessage()); } // everything valid here @@ -622,4 +625,119 @@ public void testKerberosPrincipal() throws IOException { Assert.fail(NO_EXCEPTION_PREFIX + e.getMessage()); } } + + @Test + public void testKerberosPrincipalNameFormat() throws IOException { + Service app = createValidApplication("comp-a"); + KerberosPrincipal kp = new KerberosPrincipal(); + kp.setPrincipalName("user@domain.com"); + app.setKerberosPrincipal(kp); + + try { + ServiceApiUtil.validateKerberosPrincipal(app.getKerberosPrincipal()); + Assert.fail(EXCEPTION_PREFIX + "service with invalid principal name " + + "format."); + } catch (IllegalArgumentException e) { + assertEquals( + String.format( + RestApiErrorMessages.ERROR_KERBEROS_PRINCIPAL_NAME_FORMAT, + kp.getPrincipalName()), + e.getMessage()); + } + + kp.setPrincipalName("user/_HOST@domain.com"); + try { + ServiceApiUtil.validateKerberosPrincipal(app.getKerberosPrincipal()); + } catch (IllegalArgumentException e) { + Assert.fail(NO_EXCEPTION_PREFIX + e.getMessage()); + } + } + + @Test + public void testResolveCompsDependency() { + Service service = createExampleApplication(); + List dependencies = new ArrayList(); + dependencies.add("compb"); + Component compa = createComponent("compa"); + compa.setDependencies(dependencies); + Component compb = createComponent("compb"); + service.addComponent(compa); + service.addComponent(compb); + List order = ServiceApiUtil.resolveCompsDependency(service); + List expected = new ArrayList(); + expected.add("compb"); + expected.add("compa"); + for (int i = 0; i < expected.size(); i++) { + Assert.assertEquals("Components are not equal.", expected.get(i), + order.get(i)); + } + } + + @Test + public void testResolveCompsDependencyReversed() { + Service service = createExampleApplication(); + List dependencies = new ArrayList(); + dependencies.add("compa"); + Component compa = createComponent("compa"); + Component compb = createComponent("compb"); + compb.setDependencies(dependencies); + service.addComponent(compa); + service.addComponent(compb); + List order = ServiceApiUtil.resolveCompsDependency(service); + List expected = new ArrayList(); + expected.add("compa"); + expected.add("compb"); + for (int i = 0; i < expected.size(); i++) { + Assert.assertEquals("Components are not equal.", expected.get(i), + order.get(i)); + } + } + + @Test + public void testResolveCompsCircularDependency() { + Service service = createExampleApplication(); + List dependencies = new ArrayList(); + List dependencies2 = new ArrayList(); + dependencies.add("compb"); + dependencies2.add("compa"); + Component compa = createComponent("compa"); + compa.setDependencies(dependencies); + Component compb = createComponent("compb"); + compa.setDependencies(dependencies2); + service.addComponent(compa); + service.addComponent(compb); + List order = ServiceApiUtil.resolveCompsDependency(service); + List expected = new ArrayList(); + expected.add("compa"); + expected.add("compb"); + for (int i = 0; i < expected.size(); i++) { + Assert.assertEquals("Components are not equal.", expected.get(i), + order.get(i)); + } + } + + @Test + public void testResolveNoCompsDependency() { + Service service = createExampleApplication(); + Component compa = createComponent("compa"); + Component compb = createComponent("compb"); + service.addComponent(compa); + service.addComponent(compb); + List order = ServiceApiUtil.resolveCompsDependency(service); + List expected = new ArrayList(); + expected.add("compa"); + expected.add("compb"); + for (int i = 0; i < expected.size(); i++) { + Assert.assertEquals("Components are not equal.", expected.get(i), + order.get(i)); + } + } + + public static Service createExampleApplication() { + + Service exampleApp = new Service(); + exampleApp.setName("example-app"); + exampleApp.setVersion("v1"); + return exampleApp; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/README.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/README.md new file mode 100644 index 00000000000..3e04730421a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/README.md @@ -0,0 +1,53 @@ + + +# Overview + +```$xslt + _ _ + | | (_) + ___ _ _ | |__ _ __ ___ __ _ _ __ _ _ __ ___ + / __|| | | || '_ \ | '_ ` _ \ / _` || '__|| || '_ \ / _ \ + \__ \| |_| || |_) || | | | | || (_| || | | || | | || __/ + |___/ \__,_||_.__/ |_| |_| |_| \__,_||_| |_||_| |_| \___| + + ? + ~~~~~~~~~~~~~~~~~~~~~~~~~~~|^"~~~~~~~~~~~~~~~~~~~~~~~~~o~~~~~~~~~~~ + o | o __o + o | o |X__> + ___o | __o + (X___>-- __|__ |X__> o + | \ __o + | \ |X__> + _______________________|_______\________________ + < \____________ _ + \ \ (_) + \ O O O >=) + \__________________________________________________________/ (_) +``` + +Submarine is a project which allows infra engineer / data scientist to run *unmodified* Tensorflow programs on YARN. + +Goals of Submarine: +- It allows jobs easy access data/models in HDFS and other storages. +- Can launch services to serve Tensorflow/MXNet models. +- Support run distributed Tensorflow jobs with simple configs. +- Support run user-specified Docker images. +- Support specify GPU and other resources. +- Support launch tensorboard for training jobs if user specified. +- Support customized DNS name for roles (like tensorboard.$user.$domain:6006) + +Please jump to [QuickStart](src/site/QuickStart.md) guide to quickly understand how to use this framework. + +If you're a developer, please find [Developer](src/site/DeveloperGuide.md) guide for more details. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/pom.xml new file mode 100644 index 00000000000..90a1a6ce006 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/pom.xml @@ -0,0 +1,213 @@ + + + + + hadoop-yarn-applications + org.apache.hadoop + 3.2.0-SNAPSHOT + + 4.0.0 + hadoop-yarn-submarine + 3.2.0-SNAPSHOT + Yet Another Learning Platform + + + + ${project.parent.parent.basedir} + + + + + + org.apache.hadoop + hadoop-common + provided + + + + junit + junit + test + + + + log4j + log4j + + + com.google.guava + guava + + + commons-logging + commons-logging + + + commons-cli + commons-cli + + + commons-io + commons-io + + + org.apache.hadoop + hadoop-yarn-server-applicationhistoryservice + + + org.apache.hadoop + hadoop-yarn-server-timelineservice + test-jar + test + + + + org.apache.hadoop + hadoop-annotations + + + + org.apache.hadoop + hadoop-common + test-jar + test + + + + org.apache.hadoop + hadoop-yarn-api + + + + org.apache.hadoop + hadoop-yarn-common + + + + org.apache.hadoop + hadoop-yarn-client + + + + org.apache.hadoop + hadoop-yarn-server-nodemanager + test + + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + test + + + + org.apache.hadoop + hadoop-yarn-server-tests + test-jar + test + + + org.mockito + mockito-all + test + + + org.apache.hadoop + hadoop-yarn-server-timeline-pluginstorage + + + org.apache.hadoop + hadoop-yarn-server-timeline-pluginstorage + test-jar + test + + + org.apache.hadoop + hadoop-yarn-common + test-jar + test + + + org.apache.hadoop + hadoop-hdfs + test + + + org.apache.hadoop + hadoop-hdfs-client + test + + + org.apache.hadoop + hadoop-hdfs + test + test-jar + + + org.apache.hadoop + hadoop-fs2img + 3.2.0-SNAPSHOT + + + org.apache.hadoop + hadoop-yarn-services-api + 3.2.0-SNAPSHOT + + + org.apache.hadoop + hadoop-yarn-services-core + 3.2.0-SNAPSHOT + + + + + + + maven-jar-plugin + + + + jar + + + test-compile + + + + + + org.apache.hadoop.yarn.submarine.client.cli.Cli + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${java.home} + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/AbstractCli.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/AbstractCli.java new file mode 100644 index 00000000000..f6a92147c16 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/AbstractCli.java @@ -0,0 +1,47 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli; + +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineException; + +import java.io.IOException; + +public abstract class AbstractCli implements Tool { + protected ClientContext clientContext; + + public AbstractCli(ClientContext cliContext) { + this.clientContext = cliContext; + } + + @Override + public abstract int run(String[] args) + throws ParseException, IOException, YarnException, InterruptedException, + SubmarineException; + + @Override + public void setConf(Configuration conf) { + clientContext.setSubmarineConfig(conf); + } + + @Override + public Configuration getConf() { + return clientContext.getSubmarineConfig(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/Cli.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/Cli.java new file mode 100644 index 00000000000..b4c5e4c3df5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/Cli.java @@ -0,0 +1,106 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.fs.DefaultRemoteDirectoryManager; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Arrays; + +public class Cli { + private static final Logger LOG = + LoggerFactory.getLogger(Cli.class); + + private static void printHelp() { + StringBuilder helpMsg = new StringBuilder(); + helpMsg.append("\n\nUsage: [] []\n"); + helpMsg.append(" Below are all objects / actions:\n"); + helpMsg.append(" job \n"); + helpMsg.append(" run : run a job, please see 'job run --help' for usage \n"); + helpMsg.append(" show : get status of job, please see 'job show --help' for usage \n"); + + System.out.println(helpMsg.toString()); + } + + private static ClientContext getClientContext() { + Configuration conf = new YarnConfiguration(); + ClientContext clientContext = new ClientContext(); + clientContext.setConfiguration(conf); + clientContext.setRemoteDirectoryManager( + new DefaultRemoteDirectoryManager(clientContext)); + RuntimeFactory runtimeFactory = RuntimeFactory.getRuntimeFactory( + clientContext); + clientContext.setRuntimeFactory(runtimeFactory); + return clientContext; + } + + public static void main(String[] args) throws Exception { + System.out.println(" _ _ \n" + + " | | (_) \n" + + " ___ _ _ | |__ _ __ ___ __ _ _ __ _ _ __ ___ \n" + + " / __|| | | || '_ \\ | '_ ` _ \\ / _` || '__|| || '_ \\ / _ \\\n" + + " \\__ \\| |_| || |_) || | | | | || (_| || | | || | | || __/\n" + + " |___/ \\__,_||_.__/ |_| |_| |_| \\__,_||_| |_||_| |_| \\___|\n" + + " \n" + + " ?\n" + + " ~~~~~~~~~~~~~~~~~~~~~~~~~~~|^\"~~~~~~~~~~~~~~~~~~~~~~~~~o~~~~~~~~~~~\n" + + " o | o __o\n" + + " o | o |X__>\n" + + " ___o | __o\n" + + " (X___>-- __|__ |X__> o\n" + + " | \\ __o\n" + + " | \\ |X__>\n" + + " _______________________|_______\\________________\n" + + " < \\____________ _\n" + + " \\ \\ (_)\n" + + " \\ O O O >=)\n" + + " \\__________________________________________________________/ (_)\n" + + "\n"); + + if (CliUtils.argsForHelp(args)) { + printHelp(); + System.exit(0); + } + + if (args.length < 2) { + LOG.error("Bad parameters specified."); + printHelp(); + System.exit(-1); + } + + String[] moduleArgs = Arrays.copyOfRange(args, 2, args.length); + ClientContext clientContext = getClientContext(); + + if (args[0].equals("job")) { + String subCmd = args[1]; + if (subCmd.equals(CliConstants.RUN)) { + new RunJobCli(clientContext).run(moduleArgs); + } else if (subCmd.equals(CliConstants.SHOW)) { + new ShowJobCli(clientContext).run(moduleArgs); + } else { + printHelp(); + throw new IllegalArgumentException("Unknown option for job"); + } + } else { + printHelp(); + throw new IllegalArgumentException("Bad parameters "); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java new file mode 100644 index 00000000000..d0958a8fe93 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliConstants.java @@ -0,0 +1,48 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli; + +/* + * NOTE: use lowercase + "_" for the option name + */ +public class CliConstants { + public static final String RUN = "run"; + public static final String SERVE = "serve"; + public static final String LIST = "list"; + public static final String SHOW = "show"; + public static final String NAME = "name"; + public static final String INPUT_PATH = "input_path"; + public static final String CHECKPOINT_PATH = "checkpoint_path"; + public static final String SAVED_MODEL_PATH = "saved_model_path"; + public static final String N_WORKERS = "num_workers"; + public static final String N_SERVING_TASKS = "num_serving_tasks"; + public static final String N_PS = "num_ps"; + public static final String WORKER_RES = "worker_resources"; + public static final String SERVING_RES = "serving_resources"; + public static final String PS_RES = "ps_resources"; + public static final String DOCKER_IMAGE = "docker_image"; + public static final String QUEUE = "queue"; + public static final String TENSORBOARD = "tensorboard"; + public static final String WORKER_LAUNCH_CMD = "worker_launch_cmd"; + public static final String SERVING_LAUNCH_CMD = "serving_launch_cmd"; + public static final String PS_LAUNCH_CMD = "ps_launch_cmd"; + public static final String ENV = "env"; + public static final String VERBOSE = "verbose"; + public static final String SERVING_FRAMEWORK = "serving_framework"; + public static final String STOP = "stop"; + public static final String WAIT_JOB_FINISH = "wait_job_finish"; + public static final String PS_DOCKER_IMAGE = "ps_docker_image"; + public static final String WORKER_DOCKER_IMAGE = "worker_docker_image"; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliUtils.java new file mode 100644 index 00000000000..6dd3e4d8afe --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/CliUtils.java @@ -0,0 +1,174 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters; +import org.apache.hadoop.yarn.submarine.common.fs.RemoteDirectoryManager; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class CliUtils { + private final static String RES_PATTERN = "^[^=]+=\\d+\\s?\\w*$"; + /** + * Replace patterns inside cli + * + * @return launch command after pattern replace + */ + public static String replacePatternsInLaunchCommand(String specifiedCli, + RunJobParameters jobRunParameters, + RemoteDirectoryManager directoryManager) throws IOException { + String jobDir = jobRunParameters.getCheckpointPath(); + if (null == jobDir) { + jobDir = directoryManager.getJobCheckpointDir(jobRunParameters.getName(), + true).toString(); + } + + String input = jobRunParameters.getInputPath(); + String savedModelDir = jobRunParameters.getSavedModelPath(); + if (null == savedModelDir) { + savedModelDir = jobDir; + } + + Map replacePattern = new HashMap<>(); + if (jobDir != null) { + replacePattern.put("%" + CliConstants.CHECKPOINT_PATH + "%", jobDir); + } + if (input != null) { + replacePattern.put("%" + CliConstants.INPUT_PATH + "%", input); + } + if (savedModelDir != null) { + replacePattern.put("%" + CliConstants.SAVED_MODEL_PATH + "%", + savedModelDir); + } + + String newCli = specifiedCli; + for (Map.Entry replace : replacePattern.entrySet()) { + newCli = newCli.replace(replace.getKey(), replace.getValue()); + } + + return newCli; + } + + // TODO, this duplicated to Client of distributed shell, should cleanup + private static Map parseResourcesString(String resourcesStr) { + Map resources = new HashMap<>(); + + // Ignore the grouping "[]" + if (resourcesStr.startsWith("[")) { + resourcesStr = resourcesStr.substring(1); + } + if (resourcesStr.endsWith("]")) { + resourcesStr = resourcesStr.substring(0, resourcesStr.length()); + } + + for (String resource : resourcesStr.trim().split(",")) { + resource = resource.trim(); + if (!resource.matches(RES_PATTERN)) { + throw new IllegalArgumentException("\"" + resource + "\" is not a " + + "valid resource type/amount pair. " + + "Please provide key=amount pairs separated by commas."); + } + String[] splits = resource.split("="); + String key = splits[0], value = splits[1]; + String units = ResourceUtils.getUnits(value); + + String valueWithoutUnit = value.substring(0, value.length() - units.length()).trim(); + Long resourceValue = Long.valueOf(valueWithoutUnit); + + // Convert commandline unit to standard YARN unit. + if (units.equals("M") || units.equals("m")) { + units = "Mi"; + } else if (units.equals("G") || units.equals("g")) { + units = "Gi"; + } else if (units.isEmpty()) { + // do nothing; + } else{ + throw new IllegalArgumentException("Acceptable units are M/G or empty"); + } + + // special handle memory-mb and memory + if (key.equals(ResourceInformation.MEMORY_URI)) { + if (!units.isEmpty()) { + resourceValue = UnitsConversionUtil.convert(units, "Mi", + resourceValue); + } + } + + if (key.equals("memory")) { + key = ResourceInformation.MEMORY_URI; + resourceValue = UnitsConversionUtil.convert(units, "Mi", resourceValue); + } + + // special handle gpu + if (key.equals("gpu")) { + key = ResourceInformation.GPU_URI; + } + + // special handle fpga + if (key.equals("fpga")) { + key = ResourceInformation.FPGA_URI; + } + + resources.put(key, resourceValue); + } + return resources; + } + + private static void validateResourceTypes(Iterable resourceNames, + List resourceTypes) throws IOException, YarnException { + for (String resourceName : resourceNames) { + if (!resourceTypes.stream().anyMatch( + e -> e.getName().equals(resourceName))) { + throw new ResourceNotFoundException( + "Unknown resource: " + resourceName); + } + } + } + + public static Resource createResourceFromString(String resourceStr, + List resourceTypes) throws IOException, YarnException { + Map typeToValue = parseResourcesString(resourceStr); + validateResourceTypes(typeToValue.keySet(), resourceTypes); + Resource resource = Resource.newInstance(0, 0); + for (Map.Entry entry : typeToValue.entrySet()) { + resource.setResourceValue(entry.getKey(), entry.getValue()); + } + return resource; + } + + // Is it for help? + public static boolean argsForHelp(String[] args) { + if (args == null || args.length == 0) + return true; + + if (args.length == 1) { + if (args[0].equals("-h") || args[0].equals("--help")) { + return true; + } + } + + return false; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java new file mode 100644 index 00000000000..d7dfc0dab4f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/RunJobCli.java @@ -0,0 +1,204 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters; +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineException; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobMonitor; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter; +import org.apache.hadoop.yarn.submarine.runtimes.common.StorageKeyConstants; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class RunJobCli extends AbstractCli { + private static final Logger LOG = + LoggerFactory.getLogger(RunJobCli.class); + + private Options options; + private RunJobParameters parameters = new RunJobParameters(); + + private JobSubmitter jobSubmitter; + private JobMonitor jobMonitor; + + public RunJobCli(ClientContext cliContext) { + this(cliContext, cliContext.getRuntimeFactory().getJobSubmitterInstance(), + cliContext.getRuntimeFactory().getJobMonitorInstance()); + } + + @VisibleForTesting + public RunJobCli(ClientContext cliContext, JobSubmitter jobSubmitter, + JobMonitor jobMonitor) { + super(cliContext); + options = generateOptions(); + this.jobSubmitter = jobSubmitter; + this.jobMonitor = jobMonitor; + } + + public void printUsages() { + new HelpFormatter().printHelp("job run", options); + } + + private Options generateOptions() { + Options options = new Options(); + options.addOption(CliConstants.NAME, true, "Name of the job"); + options.addOption(CliConstants.INPUT_PATH, true, + "Input of the job, could be local or other FS directory"); + options.addOption(CliConstants.CHECKPOINT_PATH, true, + "Training output directory of the job, " + + "could be local or other FS directory. This typically includes " + + "checkpoint files and exported model "); + options.addOption(CliConstants.SAVED_MODEL_PATH, true, + "Model exported path (savedmodel) of the job, which is needed when " + + "exported model is not placed under ${checkpoint_path}" + + "could be local or other FS directory. This will be used to serve."); + options.addOption(CliConstants.N_WORKERS, true, + "Numnber of worker tasks of the job, by default it's 1"); + options.addOption(CliConstants.N_PS, true, + "Number of PS tasks of the job, by default it's 0"); + options.addOption(CliConstants.WORKER_RES, true, + "Resource of each worker, for example " + + "memory-mb=2048,vcores=2,yarn.io/gpu=2"); + options.addOption(CliConstants.PS_RES, true, + "Resource of each PS, for example " + + "memory-mb=2048,vcores=2,yarn.io/gpu=2"); + options.addOption(CliConstants.DOCKER_IMAGE, true, "Docker image name/tag"); + options.addOption(CliConstants.QUEUE, true, + "Name of queue to run the job, by default it uses default queue"); + options.addOption(CliConstants.TENSORBOARD, true, + "Should we run TensorBoard" + " for this job? By default it's true"); + options.addOption(CliConstants.WORKER_LAUNCH_CMD, true, + "Commandline of worker, arguments will be " + + "directly used to launch the worker"); + options.addOption(CliConstants.PS_LAUNCH_CMD, true, + "Commandline of worker, arguments will be " + + "directly used to launch the PS"); + options.addOption(CliConstants.ENV, true, + "Common environment variable of worker/ps"); + options.addOption(CliConstants.VERBOSE, false, + "Print verbose log for troubleshooting"); + options.addOption(CliConstants.WAIT_JOB_FINISH, false, + "Specified when user want to wait the job finish"); + options.addOption(CliConstants.PS_DOCKER_IMAGE, true, + "Specify docker image for PS, when this is not specified, PS uses --" + + CliConstants.DOCKER_IMAGE + " as default."); + options.addOption(CliConstants.WORKER_DOCKER_IMAGE, true, + "Specify docker image for WORKER, when this is not specified, WORKER " + + "uses --" + CliConstants.DOCKER_IMAGE + " as default."); + options.addOption("h", "help", false, "Print help"); + return options; + } + + private void replacePatternsInParameters() throws IOException { + if (parameters.getPSLaunchCmd() != null && !parameters.getPSLaunchCmd() + .isEmpty()) { + String afterReplace = CliUtils.replacePatternsInLaunchCommand( + parameters.getPSLaunchCmd(), parameters, + clientContext.getRemoteDirectoryManager()); + parameters.setPSLaunchCmd(afterReplace); + } + + if (parameters.getWorkerLaunchCmd() != null && !parameters + .getWorkerLaunchCmd().isEmpty()) { + String afterReplace = CliUtils.replacePatternsInLaunchCommand( + parameters.getWorkerLaunchCmd(), parameters, + clientContext.getRemoteDirectoryManager()); + parameters.setWorkerLaunchCmd(afterReplace); + } + } + + private void parseCommandLineAndGetRunJobParameters(String[] args) + throws ParseException, IOException, YarnException { + try { + // Do parsing + GnuParser parser = new GnuParser(); + CommandLine cli = parser.parse(options, args); + parameters.updateParametersByParsedCommandline(cli, options, clientContext); + } catch (ParseException e) { + LOG.error("Exception in parse:", e.getMessage()); + printUsages(); + throw e; + } + + // replace patterns + replacePatternsInParameters(); + } + + private void storeJobInformation(String jobName, ApplicationId applicationId, + String[] args) throws IOException { + Map jobInfo = new HashMap<>(); + jobInfo.put(StorageKeyConstants.JOB_NAME, jobName); + jobInfo.put(StorageKeyConstants.APPLICATION_ID, applicationId.toString()); + + if (parameters.getCheckpointPath() != null) { + jobInfo.put(StorageKeyConstants.CHECKPOINT_PATH, + parameters.getCheckpointPath()); + } + if (parameters.getInputPath() != null) { + jobInfo.put(StorageKeyConstants.INPUT_PATH, + parameters.getInputPath()); + } + if (parameters.getSavedModelPath() != null) { + jobInfo.put(StorageKeyConstants.SAVED_MODEL_PATH, + parameters.getSavedModelPath()); + } + + String joinedArgs = String.join(" ", args); + jobInfo.put(StorageKeyConstants.JOB_RUN_ARGS, joinedArgs); + clientContext.getRuntimeFactory().getSubmarineStorage().addNewJob(jobName, + jobInfo); + } + + @Override + public int run(String[] args) + throws ParseException, IOException, YarnException, InterruptedException, + SubmarineException { + if (CliUtils.argsForHelp(args)) { + printUsages(); + return 0; + } + + parseCommandLineAndGetRunJobParameters(args); + ApplicationId applicationId = this.jobSubmitter.submitJob(parameters); + storeJobInformation(parameters.getName(), applicationId, args); + if (parameters.isWaitJobFinish()) { + this.jobMonitor.waitTrainingFinal(parameters.getName()); + } + + return 0; + } + + @VisibleForTesting + public JobSubmitter getJobSubmitter() { + return jobSubmitter; + } + + @VisibleForTesting + RunJobParameters getRunJobParameters() { + return parameters; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/ShowJobCli.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/ShowJobCli.java new file mode 100644 index 00000000000..6b76192af11 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/ShowJobCli.java @@ -0,0 +1,125 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.client.cli.param.ShowJobParameters; +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineException; +import org.apache.hadoop.yarn.submarine.runtimes.common.StorageKeyConstants; +import org.apache.hadoop.yarn.submarine.runtimes.common.SubmarineStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Map; + +public class ShowJobCli extends AbstractCli { + private static final Logger LOG = LoggerFactory.getLogger(ShowJobCli.class); + + private Options options; + private ShowJobParameters parameters = new ShowJobParameters(); + + public ShowJobCli(ClientContext cliContext) { + super(cliContext); + options = generateOptions(); + } + + public void printUsages() { + new HelpFormatter().printHelp("job show", options); + } + + private Options generateOptions() { + Options options = new Options(); + options.addOption(CliConstants.NAME, true, "Name of the job"); + options.addOption("h", "help", false, "Print help"); + return options; + } + + private void parseCommandLineAndGetShowJobParameters(String[] args) + throws IOException, YarnException { + // Do parsing + GnuParser parser = new GnuParser(); + CommandLine cli; + try { + cli = parser.parse(options, args); + parameters.updateParametersByParsedCommandline(cli, options, + clientContext); + } catch (ParseException e) { + printUsages(); + } + } + + private void printIfNotNull(String keyForPrint, String keyInStorage, + Map jobInfo) { + if (jobInfo.containsKey(keyInStorage)) { + System.out.println("\t" + keyForPrint + ": " + jobInfo.get(keyInStorage)); + } + } + + private void printJobInfo(Map jobInfo) { + System.out.println("Job Meta Info:"); + printIfNotNull("Application Id", StorageKeyConstants.APPLICATION_ID, + jobInfo); + printIfNotNull("Input Path", StorageKeyConstants.INPUT_PATH, jobInfo); + printIfNotNull("Saved Model Path", StorageKeyConstants.SAVED_MODEL_PATH, + jobInfo); + printIfNotNull("Checkpoint Path", StorageKeyConstants.CHECKPOINT_PATH, + jobInfo); + printIfNotNull("Run Parameters", StorageKeyConstants.JOB_RUN_ARGS, + jobInfo); + } + + @VisibleForTesting + protected void getAndPrintJobInfo() throws IOException { + SubmarineStorage storage = + clientContext.getRuntimeFactory().getSubmarineStorage(); + + Map jobInfo = null; + try { + jobInfo = storage.getJobInfoByName(parameters.getName()); + } catch (IOException e) { + LOG.error("Failed to retrieve job info", e); + throw e; + } + + printJobInfo(jobInfo); + } + + @VisibleForTesting + public ShowJobParameters getParameters() { + return parameters; + } + + @Override + public int run(String[] args) + throws ParseException, IOException, YarnException, InterruptedException, + SubmarineException { + if (CliUtils.argsForHelp(args)) { + printUsages(); + return 0; + } + + parseCommandLineAndGetShowJobParameters(args); + getAndPrintJobInfo(); + return 0; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/BaseParameters.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/BaseParameters.java new file mode 100644 index 00000000000..609e8683088 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/BaseParameters.java @@ -0,0 +1,56 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli.param; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.client.cli.CliConstants; +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs; + +import java.io.IOException; + +/** + * Base class of all parameters. + */ +public abstract class BaseParameters { + private String name; + + public void updateParametersByParsedCommandline(CommandLine parsedCommandLine, + Options options, ClientContext clientContext) + throws ParseException, IOException, YarnException { + String name = parsedCommandLine.getOptionValue(CliConstants.NAME); + if (name == null) { + throw new ParseException("--name is absent"); + } + + if (parsedCommandLine.hasOption(CliConstants.VERBOSE)) { + SubmarineLogs.verboseOn(); + } + + this.setName(name); + } + + public String getName() { + return name; + } + + public BaseParameters setName(String name) { + this.name = name; + return this; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java new file mode 100644 index 00000000000..6cab9e32355 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunJobParameters.java @@ -0,0 +1,222 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli.param; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.client.cli.CliConstants; +import org.apache.hadoop.yarn.submarine.client.cli.CliUtils; +import org.apache.hadoop.yarn.submarine.common.ClientContext; + +import java.io.IOException; + +/** + * Parameters used to run a job + */ +public class RunJobParameters extends RunParameters { + private String input; + private String checkpointPath; + + private int numWorkers; + private int numPS; + private Resource workerResource; + private Resource psResource; + private boolean tensorboardEnabled; + private String workerLaunchCmd; + private String psLaunchCmd; + + private String psDockerImage = null; + private String workerDockerImage = null; + + private boolean waitJobFinish = false; + private boolean distributed = false; + + @Override + public void updateParametersByParsedCommandline(CommandLine parsedCommandLine, + Options options, ClientContext clientContext) + throws ParseException, IOException, YarnException { + + String input = parsedCommandLine.getOptionValue(CliConstants.INPUT_PATH); + String jobDir = parsedCommandLine.getOptionValue(CliConstants.CHECKPOINT_PATH); + int nWorkers = 1; + if (parsedCommandLine.getOptionValue(CliConstants.N_WORKERS) != null) { + nWorkers = Integer.parseInt( + parsedCommandLine.getOptionValue(CliConstants.N_WORKERS)); + } + + int nPS = 0; + if (parsedCommandLine.getOptionValue(CliConstants.N_PS) != null) { + nPS = Integer.parseInt( + parsedCommandLine.getOptionValue(CliConstants.N_PS)); + } + + // Check #workers and #ps. + // When distributed training is required + if (nWorkers >= 2 && nPS > 0) { + distributed = true; + } else if (nWorkers == 1 && nPS > 0) { + throw new ParseException("Only specified one worker but non-zero PS, " + + "please double check."); + } + + String workerResourceStr = parsedCommandLine.getOptionValue( + CliConstants.WORKER_RES); + if (workerResourceStr == null) { + throw new ParseException("--" + CliConstants.WORKER_RES + " is absent."); + } + Resource workerResource = CliUtils.createResourceFromString( + workerResourceStr, + clientContext.getOrCreateYarnClient().getResourceTypeInfo()); + + Resource psResource = null; + if (nPS > 0) { + String psResourceStr = parsedCommandLine.getOptionValue(CliConstants.PS_RES); + if (psResourceStr == null) { + throw new ParseException("--" + CliConstants.PS_RES + " is absent."); + } + psResource = CliUtils.createResourceFromString(psResourceStr, + clientContext.getOrCreateYarnClient().getResourceTypeInfo()); + } + + boolean tensorboard = false; + if (parsedCommandLine.getOptionValue(CliConstants.TENSORBOARD) != null) { + tensorboard = Boolean.parseBoolean( + parsedCommandLine.getOptionValue(CliConstants.TENSORBOARD)); + } + + if (parsedCommandLine.hasOption(CliConstants.WAIT_JOB_FINISH)) { + this.waitJobFinish = true; + } + + psDockerImage = parsedCommandLine.getOptionValue( + CliConstants.PS_DOCKER_IMAGE); + workerDockerImage = parsedCommandLine.getOptionValue( + CliConstants.WORKER_DOCKER_IMAGE); + + String workerLaunchCmd = parsedCommandLine.getOptionValue( + CliConstants.WORKER_LAUNCH_CMD); + String psLaunchCommand = parsedCommandLine.getOptionValue( + CliConstants.PS_LAUNCH_CMD); + + this.setInputPath(input).setCheckpointPath(jobDir).setNumPS(nPS).setNumWorkers(nWorkers) + .setPSLaunchCmd(psLaunchCommand).setWorkerLaunchCmd(workerLaunchCmd) + .setPsResource(psResource).setWorkerResource(workerResource) + .setTensorboardEnabled(tensorboard); + + super.updateParametersByParsedCommandline(parsedCommandLine, + options, clientContext); + } + + public String getInputPath() { + return input; + } + + public RunJobParameters setInputPath(String input) { + this.input = input; + return this; + } + + public String getCheckpointPath() { + return checkpointPath; + } + + public RunJobParameters setCheckpointPath(String checkpointPath) { + this.checkpointPath = checkpointPath; + return this; + } + + public int getNumWorkers() { + return numWorkers; + } + + public RunJobParameters setNumWorkers(int numWorkers) { + this.numWorkers = numWorkers; + return this; + } + + public int getNumPS() { + return numPS; + } + + public RunJobParameters setNumPS(int numPS) { + this.numPS = numPS; + return this; + } + + public Resource getWorkerResource() { + return workerResource; + } + + public RunJobParameters setWorkerResource(Resource workerResource) { + this.workerResource = workerResource; + return this; + } + + public Resource getPsResource() { + return psResource; + } + + public RunJobParameters setPsResource(Resource psResource) { + this.psResource = psResource; + return this; + } + + public boolean isTensorboardEnabled() { + return tensorboardEnabled; + } + + public RunJobParameters setTensorboardEnabled(boolean tensorboardEnabled) { + this.tensorboardEnabled = tensorboardEnabled; + return this; + } + + public String getWorkerLaunchCmd() { + return workerLaunchCmd; + } + + public RunJobParameters setWorkerLaunchCmd(String workerLaunchCmd) { + this.workerLaunchCmd = workerLaunchCmd; + return this; + } + + public String getPSLaunchCmd() { + return psLaunchCmd; + } + + public RunJobParameters setPSLaunchCmd(String psLaunchCmd) { + this.psLaunchCmd = psLaunchCmd; + return this; + } + + public boolean isWaitJobFinish() { + return waitJobFinish; + } + + + public String getPsDockerImage() { + return psDockerImage; + } + + public String getWorkerDockerImage() { + return workerDockerImage; + } + + public boolean isDistributed() { + return distributed; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunParameters.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunParameters.java new file mode 100644 index 00000000000..28884d8d081 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/RunParameters.java @@ -0,0 +1,103 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli.param; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.client.cli.CliConstants; +import org.apache.hadoop.yarn.submarine.common.ClientContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Parameters required to run anything on cluster. Such as run job / serve model + */ +public abstract class RunParameters extends BaseParameters { + private String savedModelPath; + private String dockerImageName; + private List envars = new ArrayList<>(); + private String queue; + + @Override + public void updateParametersByParsedCommandline(CommandLine parsedCommandLine, + Options options, ClientContext clientContext) throws ParseException, + IOException, YarnException { + String savedModelPath = parsedCommandLine.getOptionValue( + CliConstants.SAVED_MODEL_PATH); + this.setSavedModelPath(savedModelPath); + + // Envars + List envarsList = new ArrayList<>(); + String[] envars = parsedCommandLine.getOptionValues(CliConstants.ENV); + if (envars != null) { + for (String envar : envars) { + envarsList.add(envar); + } + } + this.setEnvars(envarsList); + + String queue = parsedCommandLine.getOptionValue( + CliConstants.QUEUE); + this.setQueue(queue); + + String dockerImage = parsedCommandLine.getOptionValue( + CliConstants.DOCKER_IMAGE); + this.setDockerImageName(dockerImage); + + super.updateParametersByParsedCommandline(parsedCommandLine, + options, clientContext); + } + + public String getQueue() { + return queue; + } + + public RunParameters setQueue(String queue) { + this.queue = queue; + return this; + } + + public String getDockerImageName() { + return dockerImageName; + } + + public RunParameters setDockerImageName(String dockerImageName) { + this.dockerImageName = dockerImageName; + return this; + } + + + public List getEnvars() { + return envars; + } + + public RunParameters setEnvars(List envars) { + this.envars = envars; + return this; + } + + public String getSavedModelPath() { + return savedModelPath; + } + + public RunParameters setSavedModelPath(String savedModelPath) { + this.savedModelPath = savedModelPath; + return this; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/ShowJobParameters.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/ShowJobParameters.java new file mode 100644 index 00000000000..e5f19d663f3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/client/cli/param/ShowJobParameters.java @@ -0,0 +1,18 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.client.cli.param; + +public class ShowJobParameters extends BaseParameters { +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/ClientContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/ClientContext.java new file mode 100644 index 00000000000..31a8b1b3268 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/ClientContext.java @@ -0,0 +1,77 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.submarine.common.conf.SubmarineConfiguration; +import org.apache.hadoop.yarn.submarine.common.fs.RemoteDirectoryManager; +import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory; + +public class ClientContext { + private Configuration yarnConf = new YarnConfiguration(); + + private RemoteDirectoryManager remoteDirectoryManager; + private YarnClient yarnClient; + private Configuration submarineConfig; + private RuntimeFactory runtimeFactory; + + public ClientContext() { + submarineConfig = new SubmarineConfiguration(); + } + + public synchronized YarnClient getOrCreateYarnClient() { + if (yarnClient == null) { + yarnClient = YarnClient.createYarnClient(); + yarnClient.init(yarnConf); + yarnClient.start(); + } + return yarnClient; + } + + public Configuration getYarnConfig() { + return yarnConf; + } + + public void setConfiguration(Configuration conf) { + this.yarnConf = conf; + } + + public RemoteDirectoryManager getRemoteDirectoryManager() { + return remoteDirectoryManager; + } + + public void setRemoteDirectoryManager( + RemoteDirectoryManager remoteDirectoryManager) { + this.remoteDirectoryManager = remoteDirectoryManager; + } + + public Configuration getSubmarineConfig() { + return submarineConfig; + } + + public void setSubmarineConfig(Configuration submarineConfig) { + this.submarineConfig = submarineConfig; + } + + public RuntimeFactory getRuntimeFactory() { + return runtimeFactory; + } + + public void setRuntimeFactory(RuntimeFactory runtimeFactory) { + this.runtimeFactory = runtimeFactory; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/Envs.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/Envs.java new file mode 100644 index 00000000000..a1d80dbc294 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/Envs.java @@ -0,0 +1,27 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common; + +public class Envs { + public static final String TASK_TYPE_ENV = "_TASK_TYPE"; + public static final String TASK_INDEX_ENV = "_TASK_INDEX"; + + /* + * HDFS/HADOOP-related configs + */ + public static final String HADOOP_HDFS_HOME = "HADOOP_HDFS_HOME"; + public static final String JAVA_HOME = "JAVA_HOME"; + public static final String HADOOP_CONF_DIR = "HADOOP_CONF_DIR"; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobComponentStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobComponentStatus.java new file mode 100644 index 00000000000..22468c250f1 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobComponentStatus.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.common.api; + +import org.apache.hadoop.yarn.service.api.records.Component; +import org.apache.hadoop.yarn.service.api.records.Container; +import org.apache.hadoop.yarn.service.api.records.ContainerState; + +/** + * Status of component of training job + */ +public class JobComponentStatus { + private String compName; + private long numReadyContainers = 0; + private long numRunningButUnreadyContainers = 0; + private long totalAskedContainers; + + public JobComponentStatus(String compName, long nReadyContainers, + long nRunningButUnreadyContainers, long totalAskedContainers) { + this.compName = compName; + this.numReadyContainers = nReadyContainers; + this.numRunningButUnreadyContainers = nRunningButUnreadyContainers; + this.totalAskedContainers = totalAskedContainers; + } + + public String getCompName() { + return compName; + } + + public void setCompName(String compName) { + this.compName = compName; + } + + public long getNumReadyContainers() { + return numReadyContainers; + } + + public void setNumReadyContainers(long numReadyContainers) { + this.numReadyContainers = numReadyContainers; + } + + public long getNumRunningButUnreadyContainers() { + return numRunningButUnreadyContainers; + } + + public void setNumRunningButUnreadyContainers( + long numRunningButUnreadyContainers) { + this.numRunningButUnreadyContainers = numRunningButUnreadyContainers; + } + + public long getTotalAskedContainers() { + return totalAskedContainers; + } + + public void setTotalAskedContainers(long totalAskedContainers) { + this.totalAskedContainers = totalAskedContainers; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobState.java new file mode 100644 index 00000000000..eef273a5607 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobState.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.common.api; + +/** + * State of training job + */ +public enum JobState { + /** + * Job accepted by scheduler and start running + */ + RUNNING, + + /** + * Job killed by user + */ + KILLED, + + /** + * Job failed + */ + FAILED, + + /** + * Job succeeded + */ + SUCCEEDED, + + /** + * Job paused by user + */ + PAUSED; + + public static boolean isFinal(JobState state) { + return state == KILLED || state == SUCCEEDED || state == FAILED; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobStatus.java new file mode 100644 index 00000000000..6e390f3dcc8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/JobStatus.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.common.api; + +import java.io.PrintStream; +import java.time.Instant; +import java.util.List; + +/** + * Status of training job. + */ +public class JobStatus { + + protected String jobName; + protected JobState state; + protected String tensorboardLink = "N/A"; + protected List componentStatus; + + public void nicePrint(PrintStream out) { + out.println( + "Job Name=" + this.jobName + ", status=" + state.name() + " time=" + + Instant.now()); + if (JobState.isFinal(this.state)) { + return; + } + + if (tensorboardLink.startsWith("http")) { + out.println(" Tensorboard link: " + tensorboardLink); + } + + out.println(" Components:"); + for (JobComponentStatus comp : componentStatus) { + out.println(" [" + comp.getCompName() + "] Ready=" + comp + .getNumReadyContainers() + " + Running-But-Non-Ready=" + comp + .getNumRunningButUnreadyContainers() + " | Asked=" + comp + .getTotalAskedContainers()); + } + out.println("------------------"); + } + + public JobState getState() { + return state; + } + + public String getTensorboardLink() { + return tensorboardLink; + } + + public List getComponentStatus() { + return componentStatus; + } + + public String getJobName() { + return jobName; + } + + public void setJobName(String jobName) { + this.jobName = jobName; + } + + public void setState(JobState state) { + this.state = state; + } + + public void setTensorboardLink(String tensorboardLink) { + this.tensorboardLink = tensorboardLink; + } + + public void setComponentStatus(List componentStatus) { + this.componentStatus = componentStatus; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/TaskType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/TaskType.java new file mode 100644 index 00000000000..535d994b55a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/TaskType.java @@ -0,0 +1,32 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common.api; + +public enum TaskType { + PRIMARY_WORKER("master"), + WORKER("worker"), + PS("ps"), + TENSORBOARD("tensorboard"); + + private String compName; + + TaskType(String compName) { + this.compName = compName; + } + + public String getComponentName() { + return compName; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobComponentStatusBuilder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobComponentStatusBuilder.java new file mode 100644 index 00000000000..fbefe6b31ab --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobComponentStatusBuilder.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.common.api.builder; + +import org.apache.hadoop.yarn.service.api.records.Component; +import org.apache.hadoop.yarn.service.api.records.Container; +import org.apache.hadoop.yarn.service.api.records.ContainerState; +import org.apache.hadoop.yarn.submarine.common.api.JobComponentStatus; + +public class JobComponentStatusBuilder { + public static JobComponentStatus fromServiceComponent(Component component) { + long totalAskedContainers = component.getNumberOfContainers(); + int numReadyContainers = 0; + int numRunningButUnreadyContainers = 0; + String compName = component.getName(); + + for (Container c : component.getContainers()) { + if (c.getState() == ContainerState.READY) { + numReadyContainers++; + } else if (c.getState() == ContainerState.RUNNING_BUT_UNREADY) { + numRunningButUnreadyContainers++; + } + } + + return new JobComponentStatus(compName, numReadyContainers, + numRunningButUnreadyContainers, totalAskedContainers); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobStatusBuilder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobStatusBuilder.java new file mode 100644 index 00000000000..2f7971e09b4 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/api/builder/JobStatusBuilder.java @@ -0,0 +1,64 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common.api.builder; + +import org.apache.hadoop.yarn.service.api.records.Component; +import org.apache.hadoop.yarn.service.api.records.Service; +import org.apache.hadoop.yarn.service.api.records.ServiceState; +import org.apache.hadoop.yarn.submarine.common.api.JobComponentStatus; +import org.apache.hadoop.yarn.submarine.common.api.JobState; +import org.apache.hadoop.yarn.submarine.common.api.JobStatus; + +import java.util.ArrayList; +import java.util.List; + +public class JobStatusBuilder { + public static JobStatus fromServiceSpec(Service serviceSpec) { + JobStatus status = new JobStatus(); + status.setState(fromServiceState(serviceSpec.getState())); + + // If it is a final state, return. + if (JobState.isFinal(status.getState())) { + return status; + } + + List componentStatusList = new ArrayList<>(); + + for (Component component : serviceSpec.getComponents()) { + componentStatusList.add( + JobComponentStatusBuilder.fromServiceComponent(component)); + } + status.setComponentStatus(componentStatusList); + + // TODO, handle tensorboard differently. + // status.setTensorboardLink(getTensorboardLink(serviceSpec, clientContext)); + + status.setJobName(serviceSpec.getName()); + + return status; + } + + private static JobState fromServiceState(ServiceState serviceState) { + switch (serviceState) { + case STOPPED: + // TODO, once YARN-8488 gets committed, we need to update this. + return JobState.SUCCEEDED; + case FAILED: + return JobState.FAILED; + } + + return JobState.RUNNING; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineConfiguration.java new file mode 100644 index 00000000000..c9e6b7bf6e6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineConfiguration.java @@ -0,0 +1,51 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common.conf; + +import org.apache.hadoop.conf.Configuration; + +public class SubmarineConfiguration extends Configuration { + private static final String SUBMARINE_CONFIGURATION_FILE = "submarine.xml"; + + public SubmarineConfiguration() { + this(new Configuration(false), true); + } + + public SubmarineConfiguration(Configuration configuration) { + this(configuration, false); + } + + public SubmarineConfiguration(Configuration configuration, + boolean loadLocalConfig) { + super(configuration); + if (loadLocalConfig) { + addResource(SUBMARINE_CONFIGURATION_FILE); + } + } + + /* + * Runtime of submarine + */ + + private static final String PREFIX = "submarine."; + + public static final String RUNTIME_CLASS = PREFIX + "runtime.class"; + public static final String DEFAULT_RUNTIME_CLASS = + "org.apache.hadoop.yarn.submarine.runtimes.yarnservice.YarnServiceRuntimeFactory"; + + public void setSubmarineRuntimeClass(String runtimeClass) { + set(RUNTIME_CLASS, runtimeClass); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineLogs.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineLogs.java new file mode 100644 index 00000000000..6bb3248bb5c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/conf/SubmarineLogs.java @@ -0,0 +1,31 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common.conf; + +public class SubmarineLogs { + private static volatile boolean verbose = false; + + public static boolean isVerbose() { + return SubmarineLogs.verbose; + } + + public static void verboseOn() { + SubmarineLogs.verbose = true; + } + + public static void verboseOff() { + SubmarineLogs.verbose = false; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/exception/SubmarineException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/exception/SubmarineException.java new file mode 100644 index 00000000000..b6a39b9fa8e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/exception/SubmarineException.java @@ -0,0 +1,21 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common.exception; + +public class SubmarineException extends Exception { + public SubmarineException(String msg) { + super(msg); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/exception/SubmarineRuntimeException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/exception/SubmarineRuntimeException.java new file mode 100644 index 00000000000..4fb74fd55ac --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/exception/SubmarineRuntimeException.java @@ -0,0 +1,25 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common.exception; + +public class SubmarineRuntimeException extends RuntimeException { + public SubmarineRuntimeException(String s) { + super(s); + } + + public SubmarineRuntimeException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/fs/DefaultRemoteDirectoryManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/fs/DefaultRemoteDirectoryManager.java new file mode 100644 index 00000000000..fe8956a362c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/fs/DefaultRemoteDirectoryManager.java @@ -0,0 +1,84 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common.fs; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.submarine.client.cli.CliConstants; +import org.apache.hadoop.yarn.submarine.common.ClientContext; + +import java.io.IOException; + +/** + * Manages remote directories for staging, log, etc. + * TODO, need to properly handle permission / name validation, etc. + */ +public class DefaultRemoteDirectoryManager implements RemoteDirectoryManager { + FileSystem fs; + + public DefaultRemoteDirectoryManager(ClientContext context) { + try { + this.fs = FileSystem.get(context.getYarnConfig()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public Path getJobStagingArea(String jobName, boolean create) throws IOException { + Path staging = new Path(getJobRootFolder(jobName), "staging"); + if (create) { + createFolderIfNotExist(staging); + } + return staging; + } + + @Override + public Path getJobCheckpointDir(String jobName, boolean create) + throws IOException { + Path jobDir = new Path(getJobStagingArea(jobName, create), + CliConstants.CHECKPOINT_PATH); + if (create) { + createFolderIfNotExist(jobDir); + } + return jobDir; + } + + @Override + public Path getModelDir(String modelName, boolean create) throws IOException { + Path modelDir = new Path(new Path("submarine", "models"), modelName); + if (create) { + createFolderIfNotExist(modelDir); + } + return modelDir; + } + + @Override + public FileSystem getFileSystem() { + return fs; + } + + private Path getJobRootFolder(String jobName) throws IOException { + return new Path(new Path("submarine", "jobs"), jobName); + } + + private void createFolderIfNotExist(Path path) throws IOException { + if (!fs.exists(path)) { + if (!fs.mkdirs(path)) { + throw new IOException("Failed to create folder=" + path); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/fs/RemoteDirectoryManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/fs/RemoteDirectoryManager.java new file mode 100644 index 00000000000..132b314699a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/common/fs/RemoteDirectoryManager.java @@ -0,0 +1,30 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.common.fs; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.IOException; + +public interface RemoteDirectoryManager { + Path getJobStagingArea(String jobName, boolean create) throws IOException; + + Path getJobCheckpointDir(String jobName, boolean create) throws IOException; + + Path getModelDir(String modelName, boolean create) throws IOException; + + FileSystem getFileSystem() throws IOException; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/RuntimeFactory.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/RuntimeFactory.java new file mode 100644 index 00000000000..9c164c68c6c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/RuntimeFactory.java @@ -0,0 +1,106 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.conf.SubmarineConfiguration; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineRuntimeException; +import org.apache.hadoop.yarn.submarine.runtimes.common.FSBasedSubmarineStorageImpl; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobMonitor; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter; +import org.apache.hadoop.yarn.submarine.runtimes.common.SubmarineStorage; +import org.apache.hadoop.yarn.submarine.runtimes.yarnservice.YarnServiceJobMonitor; +import org.apache.hadoop.yarn.submarine.runtimes.yarnservice.YarnServiceJobSubmitter; + +import java.lang.reflect.InvocationTargetException; + +public abstract class RuntimeFactory { + protected ClientContext clientContext; + private JobSubmitter jobSubmitter; + private JobMonitor jobMonitor; + private SubmarineStorage submarineStorage; + + public RuntimeFactory(ClientContext clientContext) { + this.clientContext = clientContext; + } + + public static RuntimeFactory getRuntimeFactory( + ClientContext clientContext) { + Configuration submarineConfiguration = + clientContext.getSubmarineConfig(); + String runtimeClass = submarineConfiguration.get( + SubmarineConfiguration.RUNTIME_CLASS, + SubmarineConfiguration.DEFAULT_RUNTIME_CLASS); + + try { + Class runtimeClazz = Class.forName(runtimeClass); + if (RuntimeFactory.class.isAssignableFrom(runtimeClazz)) { + return (RuntimeFactory) runtimeClazz.getConstructor(ClientContext.class).newInstance(clientContext); + } else { + throw new SubmarineRuntimeException("Class: " + runtimeClass + + " not instance of " + RuntimeFactory.class.getCanonicalName()); + } + } catch (ClassNotFoundException | IllegalAccessException | + InstantiationException | NoSuchMethodException | + InvocationTargetException e) { + throw new SubmarineRuntimeException( + "Could not instantiate RuntimeFactory: " + runtimeClass, e); + } + } + + protected abstract JobSubmitter internalCreateJobSubmitter(); + + protected abstract JobMonitor internalCreateJobMonitor(); + + protected abstract SubmarineStorage internalCreateSubmarineStorage(); + + public synchronized JobSubmitter getJobSubmitterInstance() { + if (jobSubmitter == null) { + jobSubmitter = internalCreateJobSubmitter(); + } + return jobSubmitter; + } + + public synchronized JobMonitor getJobMonitorInstance() { + if (jobMonitor == null) { + jobMonitor = internalCreateJobMonitor(); + } + return jobMonitor; + } + + public synchronized SubmarineStorage getSubmarineStorage() { + if (submarineStorage == null) { + submarineStorage = internalCreateSubmarineStorage(); + } + return submarineStorage; + } + + @VisibleForTesting + public synchronized void setJobSubmitterInstance(JobSubmitter jobSubmitter) { + this.jobSubmitter = jobSubmitter; + } + + @VisibleForTesting + public synchronized void setJobMonitorInstance(JobMonitor jobMonitor) { + this.jobMonitor = jobMonitor; + } + + @VisibleForTesting + public synchronized void setSubmarineStorage(SubmarineStorage storage) { + this.submarineStorage = storage; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/FSBasedSubmarineStorageImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/FSBasedSubmarineStorageImpl.java new file mode 100644 index 00000000000..ebf95814096 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/FSBasedSubmarineStorageImpl.java @@ -0,0 +1,106 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + + +package org.apache.hadoop.yarn.submarine.runtimes.common; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.fs.RemoteDirectoryManager; + +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectInputStream; +import java.io.ObjectOutput; +import java.io.ObjectOutputStream; +import java.util.Map; + +/** + * A super naive FS-based storage. + */ +public class FSBasedSubmarineStorageImpl extends SubmarineStorage { + ClientContext clientContext; + RemoteDirectoryManager rdm; + + public FSBasedSubmarineStorageImpl(ClientContext clientContext) { + this.clientContext = clientContext; + rdm = clientContext.getRemoteDirectoryManager(); + } + + @Override + public void addNewJob(String jobName, Map jobInfo) + throws IOException { + Path jobInfoPath = getJobInfoPath(jobName, true); + FSDataOutputStream fos = rdm.getFileSystem().create(jobInfoPath); + serializeMap(fos, jobInfo); + } + + @Override + public Map getJobInfoByName(String jobName) + throws IOException { + Path jobInfoPath = getJobInfoPath(jobName, false); + FSDataInputStream fis = rdm.getFileSystem().open(jobInfoPath); + return deserializeMap(fis); + } + + @Override + public void addNewModel(String modelName, String version, + Map modelInfo) throws IOException { + Path modelInfoPath = getModelInfoPath(modelName, version, true); + FSDataOutputStream fos = rdm.getFileSystem().create(modelInfoPath); + serializeMap(fos, modelInfo); + } + + @Override + public Map getModelInfoByName(String modelName, + String version) throws IOException { + Path modelInfoPath = getModelInfoPath(modelName, version, false); + FSDataInputStream fis = rdm.getFileSystem().open(modelInfoPath); + return deserializeMap(fis); + } + + private Path getModelInfoPath(String modelName, String version, boolean create) + throws IOException { + Path modelDir = rdm.getModelDir(modelName, create); + Path modelInfo = new Path(modelDir, version + ".info"); + return modelInfo; + } + + private void serializeMap(FSDataOutputStream fos, Map map) + throws IOException { + ObjectOutput oo = new ObjectOutputStream(fos); + oo.writeObject(map); + oo.close(); + } + + private Map deserializeMap(FSDataInputStream fis) + throws IOException { + ObjectInput oi = new ObjectInputStream(fis); + Map newMap = null; + try { + newMap = (Map) oi.readObject(); + } catch (ClassNotFoundException e) { + throw new IOException(e); + } + return newMap; + } + + private Path getJobInfoPath(String jobName, boolean create) throws IOException { + Path path = rdm.getJobStagingArea(jobName, create); + Path fileName = new Path(path, "job.info"); + return fileName; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/JobMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/JobMonitor.java new file mode 100644 index 00000000000..c81393b08be --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/JobMonitor.java @@ -0,0 +1,84 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.common; + +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.api.JobState; +import org.apache.hadoop.yarn.submarine.common.api.JobStatus; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +/** + * Monitor status of job(s) + */ +public abstract class JobMonitor { + private static final Logger LOG = + LoggerFactory.getLogger(JobMonitor.class); + protected ClientContext clientContext; + + public JobMonitor(ClientContext clientContext) { + this.clientContext = clientContext; + } + + /** + * Returns status of training job. + * + * @param jobName name of job + * @return job status + * @throws IOException anything else happens + * @throws YarnException anything related to YARN happens + */ + public abstract JobStatus getTrainingJobStatus(String jobName) + throws IOException, YarnException; + + /** + * Continue wait and print status if job goes to ready or final state. + * @param jobName + * @throws IOException + * @throws YarnException + * @throws SubmarineException + */ + public void waitTrainingFinal(String jobName) + throws IOException, YarnException, SubmarineException { + // Wait 5 sec between each fetch. + int waitIntervalSec = 5; + JobStatus js; + while (true) { + js = getTrainingJobStatus(jobName); + JobState jobState = js.getState(); + js.nicePrint(System.err); + + if (JobState.isFinal(jobState)) { + if (jobState.equals(JobState.FAILED)) { + throw new SubmarineException("Job failed"); + } else if (jobState.equals(JobState.KILLED)) { + throw new SubmarineException("Job killed"); + } + LOG.info("Job exited with state=" + jobState); + break; + } + + try { + Thread.sleep(waitIntervalSec * 1000); + } catch (InterruptedException e) { + throw new IOException(e); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/JobSubmitter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/JobSubmitter.java new file mode 100644 index 00000000000..1749390071a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/JobSubmitter.java @@ -0,0 +1,36 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.common; + +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters; + +import java.io.IOException; + +/** + * Submit job to cluster master + */ +public interface JobSubmitter { + /** + * Submit job to cluster + * @param parameters run job parameters + * @return applicatioId when successfully submitted + * @throws YarnException for issues while contacting YARN daemons + * @throws IOException for other issues. + */ + ApplicationId submitJob(RunJobParameters parameters) + throws IOException, YarnException; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/StorageKeyConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/StorageKeyConstants.java new file mode 100644 index 00000000000..1fbbe7ae465 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/StorageKeyConstants.java @@ -0,0 +1,24 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.common; + +public class StorageKeyConstants { + public static final String JOB_NAME = "JOB_NAME"; + public static final String JOB_RUN_ARGS = "JOB_RUN_ARGS"; + public static final String APPLICATION_ID = "APPLICATION_ID"; + public static final String CHECKPOINT_PATH = "CHECKPOINT_PATH"; + public static final String INPUT_PATH = "INPUT_PATH"; + public static final String SAVED_MODEL_PATH = "SAVED_MODEL_PATH"; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/SubmarineStorage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/SubmarineStorage.java new file mode 100644 index 00000000000..9c2004fa969 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/common/SubmarineStorage.java @@ -0,0 +1,57 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.common; + +import java.io.IOException; +import java.util.Map; + +/** + * Persistent job/model, etc. + */ +public abstract class SubmarineStorage { + /** + * Add a new job by name + * @param jobName name of job. + * @param jobInfo info of the job. + */ + public abstract void addNewJob(String jobName, Map jobInfo) + throws IOException; + + /** + * Get job info by job name. + * @param jobName name of job + * @return info of the job. + */ + public abstract Map getJobInfoByName(String jobName) + throws IOException; + + /** + * Add a new model + * @param modelName name of model + * @param version version of the model, when null is specified, it will be + * "default" + * @param modelInfo info of the model. + */ + public abstract void addNewModel(String modelName, String version, + Map modelInfo) throws IOException; + + /** + * Get model info by name and version. + * @param modelName name of model. + * @param version version of the model, when null is specifed, it will be + */ + public abstract Map getModelInfoByName(String modelName, String version) + throws IOException; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobMonitor.java new file mode 100644 index 00000000000..94d30b030f5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobMonitor.java @@ -0,0 +1,46 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.yarnservice; + +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.service.api.records.Service; +import org.apache.hadoop.yarn.service.client.ServiceClient; +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.api.JobStatus; +import org.apache.hadoop.yarn.submarine.common.api.builder.JobStatusBuilder; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobMonitor; + +import java.io.IOException; + +public class YarnServiceJobMonitor extends JobMonitor { + private ServiceClient serviceClient = null; + + public YarnServiceJobMonitor(ClientContext clientContext) { + super(clientContext); + } + + @Override + public synchronized JobStatus getTrainingJobStatus(String jobName) + throws IOException, YarnException { + if (this.serviceClient == null) { + this.serviceClient = YarnServiceUtils.createServiceClient( + clientContext.getYarnConfig()); + } + + Service serviceSpec = this.serviceClient.getStatus(jobName); + JobStatus jobStatus = JobStatusBuilder.fromServiceSpec(serviceSpec); + return jobStatus; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobSubmitter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobSubmitter.java new file mode 100644 index 00000000000..a2a2067425f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceJobSubmitter.java @@ -0,0 +1,461 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.yarnservice; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.service.api.ServiceApiConstants; +import org.apache.hadoop.yarn.service.api.records.Artifact; +import org.apache.hadoop.yarn.service.api.records.Component; +import org.apache.hadoop.yarn.service.api.records.ConfigFile; +import org.apache.hadoop.yarn.service.api.records.Resource; +import org.apache.hadoop.yarn.service.api.records.ResourceInformation; +import org.apache.hadoop.yarn.service.api.records.Service; +import org.apache.hadoop.yarn.service.client.ServiceClient; +import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters; +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.Envs; +import org.apache.hadoop.yarn.submarine.common.api.TaskType; +import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; + +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION; + +/** + * Submit a job to cluster + */ +public class YarnServiceJobSubmitter implements JobSubmitter { + private static final Logger LOG = + LoggerFactory.getLogger(YarnServiceJobSubmitter.class); + ClientContext clientContext; + Service serviceSpec; + private Set uploadedFiles = new HashSet<>(); + + public YarnServiceJobSubmitter(ClientContext clientContext) { + this.clientContext = clientContext; + } + + private Resource getServiceResourceFromYarnResource( + org.apache.hadoop.yarn.api.records.Resource yarnResource) { + Resource serviceResource = new Resource(); + serviceResource.setCpus(yarnResource.getVirtualCores()); + serviceResource.setMemory(String.valueOf(yarnResource.getMemorySize())); + + Map riMap = new HashMap<>(); + for (org.apache.hadoop.yarn.api.records.ResourceInformation ri : yarnResource + .getAllResourcesListCopy()) { + ResourceInformation serviceRi = + new ResourceInformation(); + serviceRi.setValue(ri.getValue()); + serviceRi.setUnit(ri.getUnits()); + riMap.put(ri.getName(), serviceRi); + } + serviceResource.setResourceInformations(riMap); + + return serviceResource; + } + + private String getValueOfEnvionment(String envar) { + // extract value from "key=value" form + if (envar == null || !envar.contains("=")) { + return ""; + } else { + return envar.substring(envar.indexOf("=") + 1); + } + } + + private void addHdfsClassPathIfNeeded(RunJobParameters parameters, + FileWriter fw, Component comp) throws IOException { + // Find envs to use HDFS + String hdfsHome = null; + String javaHome = null; + + boolean hadoopEnv = false; + + for (String envar : parameters.getEnvars()) { + if (envar.startsWith("DOCKER_HADOOP_HDFS_HOME=")) { + hdfsHome = getValueOfEnvionment(envar); + hadoopEnv = true; + } else if (envar.startsWith("DOCKER_JAVA_HOME=")) { + javaHome = getValueOfEnvionment(envar); + } + } + + boolean lackingEnvs = false; + + if ((parameters.getInputPath() != null && parameters.getInputPath() + .contains("hdfs://")) || (parameters.getCheckpointPath() != null + && parameters.getCheckpointPath().contains("hdfs://")) || ( + parameters.getSavedModelPath() != null && parameters.getSavedModelPath() + .contains("hdfs://")) || hadoopEnv) { + // HDFS is asked either in input or output, set LD_LIBRARY_PATH + // and classpath + + if (hdfsHome != null) { + // Unset HADOOP_HOME/HADOOP_YARN_HOME to make sure host machine's envs + // won't pollute docker's env. + fw.append("export HADOOP_HOME=\n"); + fw.append("export HADOOP_YARN_HOME=\n"); + fw.append("export HADOOP_HDFS_HOME=" + hdfsHome + "\n"); + fw.append("export HADOOP_COMMON_HOME=" + hdfsHome + "\n"); + } else{ + lackingEnvs = true; + } + + // hadoop confs will be uploaded to HDFS and localized to container's + // local folder, so here set $HADOOP_CONF_DIR to $WORK_DIR. + fw.append("export HADOOP_CONF_DIR=$WORK_DIR\n"); + if (javaHome != null) { + fw.append("export JAVA_HOME=" + javaHome + "\n"); + fw.append("export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:" + + "$JAVA_HOME/lib/amd64/server\n"); + } else { + lackingEnvs = true; + } + fw.append("export CLASSPATH=`$HADOOP_HDFS_HOME/bin/hadoop classpath --glob`\n"); + } + + if (lackingEnvs) { + LOG.error("When hdfs is being used to read/write models/data. Following" + + "envs are required: 1) DOCKER_HADOOP_HDFS_HOME= 2) DOCKER_JAVA_HOME=. You can use --env to pass these envars."); + throw new IOException("Failed to detect HDFS-related environments."); + } + + // Trying to upload core-site.xml and hdfs-site.xml + Path stagingDir = + clientContext.getRemoteDirectoryManager().getJobStagingArea( + parameters.getName(), true); + File coreSite = findFileOnClassPath("core-site.xml"); + File hdfsSite = findFileOnClassPath("hdfs-site.xml"); + if (coreSite == null || hdfsSite == null) { + LOG.error("hdfs is being used, however we couldn't locate core-site.xml/" + + "hdfs-site.xml from classpath, please double check you classpath" + + "setting and make sure they're included."); + throw new IOException( + "Failed to locate core-site.xml / hdfs-site.xml from class path"); + } + uploadToRemoteFileAndLocalizeToContainerWorkDir(stagingDir, + coreSite.getAbsolutePath(), "core-site.xml", comp); + uploadToRemoteFileAndLocalizeToContainerWorkDir(stagingDir, + hdfsSite.getAbsolutePath(), "hdfs-site.xml", comp); + + // DEBUG + if (SubmarineLogs.isVerbose()) { + fw.append("echo \"CLASSPATH:$CLASSPATH\"\n"); + fw.append("echo \"HADOOP_CONF_DIR:$HADOOP_CONF_DIR\"\n"); + fw.append("echo \"HADOOP_TOKEN_FILE_LOCATION:$HADOOP_TOKEN_FILE_LOCATION\"\n"); + fw.append("echo \"JAVA_HOME:$JAVA_HOME\"\n"); + fw.append("echo \"LD_LIBRARY_PATH:$LD_LIBRARY_PATH\"\n"); + fw.append("echo \"HADOOP_HDFS_HOME:$HADOOP_HDFS_HOME\"\n"); + } + } + + private void addCommonEnvironments(Component component, TaskType taskType) { + Map envs = component.getConfiguration().getEnv(); + envs.put(Envs.TASK_INDEX_ENV, ServiceApiConstants.COMPONENT_ID); + envs.put(Envs.TASK_TYPE_ENV, taskType.name()); + } + + /* + * Generate a command launch script on local disk, returns patch to the script + */ + private String generateCommandLaunchScript(RunJobParameters parameters, + TaskType taskType, Component comp) throws IOException { + File file = File.createTempFile(taskType.name() + "-launch-script", ".sh"); + FileWriter fw = new FileWriter(file); + + fw.append("#!/bin/bash\n"); + + addHdfsClassPathIfNeeded(parameters, fw, comp); + + // For primary_worker + if (taskType == TaskType.PRIMARY_WORKER) { + // Do we need tensorboard? + if (parameters.isTensorboardEnabled()) { + int tensorboardPort = 6006; + // Run tensorboard at the background + fw.append( + "tensorboard --port " + tensorboardPort + " --logdir " + parameters + .getCheckpointPath() + " &\n"); + } + } + + // When distributed training is required + if (parameters.isDistributed()) { + // Generated TF_CONFIG + String tfConfigEnv = YarnServiceUtils.getTFConfigEnv( + taskType.getComponentName(), parameters.getNumWorkers(), + parameters.getNumPS(), parameters.getName(), + System.getProperty("user.name"), + clientContext.getYarnConfig().get("hadoop.registry.dns.domain-name")); + fw.append("export TF_CONFIG=\"" + tfConfigEnv + "\"\n"); + } + + // Print launch command + if (taskType.equals(TaskType.WORKER) || taskType.equals( + TaskType.PRIMARY_WORKER)) { + fw.append(parameters.getWorkerLaunchCmd() + '\n'); + + if (SubmarineLogs.isVerbose()) { + LOG.info("Worker command =[" + parameters.getWorkerLaunchCmd() + "]"); + } + } else if (taskType.equals(TaskType.PS)) { + fw.append(parameters.getPSLaunchCmd() + '\n'); + + if (SubmarineLogs.isVerbose()) { + LOG.info("PS command =[" + parameters.getPSLaunchCmd() + "]"); + } + } + + fw.close(); + return file.getAbsolutePath(); + } + + private String getScriptFileName(TaskType taskType) { + return "run-" + taskType.name() + ".sh"; + } + + private File findFileOnClassPath(final String fileName) { + final String classpath = System.getProperty("java.class.path"); + final String pathSeparator = System.getProperty("path.separator"); + final StringTokenizer tokenizer = new StringTokenizer(classpath, + pathSeparator); + + while (tokenizer.hasMoreTokens()) { + final String pathElement = tokenizer.nextToken(); + final File directoryOrJar = new File(pathElement); + final File absoluteDirectoryOrJar = directoryOrJar.getAbsoluteFile(); + if (absoluteDirectoryOrJar.isFile()) { + final File target = new File(absoluteDirectoryOrJar.getParent(), + fileName); + if (target.exists()) { + return target; + } + } else{ + final File target = new File(directoryOrJar, fileName); + if (target.exists()) { + return target; + } + } + } + + return null; + } + + private void uploadToRemoteFileAndLocalizeToContainerWorkDir(Path stagingDir, + String fileToUpload, String destFilename, Component comp) + throws IOException { + FileSystem fs = FileSystem.get(clientContext.getYarnConfig()); + + // Upload to remote FS under staging area + File localFile = new File(fileToUpload); + if (!localFile.exists()) { + throw new FileNotFoundException( + "Trying to upload file=" + localFile.getAbsolutePath() + + " to remote, but couldn't find local file."); + } + String filename = new File(fileToUpload).getName(); + + Path uploadedFilePath = new Path(stagingDir, filename); + if (!uploadedFiles.contains(uploadedFilePath)) { + if (SubmarineLogs.isVerbose()) { + LOG.info("Copying local file=" + fileToUpload + " to remote=" + + uploadedFilePath); + } + fs.copyFromLocalFile(new Path(fileToUpload), uploadedFilePath); + uploadedFiles.add(uploadedFilePath); + } + + FileStatus fileStatus = fs.getFileStatus(uploadedFilePath); + LOG.info("Uploaded file path = " + fileStatus.getPath()); + + // Set it to component's files list + comp.getConfiguration().getFiles().add(new ConfigFile().srcFile( + fileStatus.getPath().toUri().toString()).destFile(destFilename) + .type(ConfigFile.TypeEnum.STATIC)); + } + + private void handleLaunchCommand(RunJobParameters parameters, + TaskType taskType, Component component) throws IOException { + // Get staging area directory + Path stagingDir = + clientContext.getRemoteDirectoryManager().getJobStagingArea( + parameters.getName(), true); + + // Generate script file in the local disk + String localScriptFile = generateCommandLaunchScript(parameters, taskType, + component); + String destScriptFileName = getScriptFileName(taskType); + uploadToRemoteFileAndLocalizeToContainerWorkDir(stagingDir, localScriptFile, + destScriptFileName, component); + + component.setLaunchCommand("./" + destScriptFileName); + } + + private void addWorkerComponent(Service service, + RunJobParameters parameters, TaskType taskType) throws IOException { + Component workerComponent = new Component(); + addCommonEnvironments(workerComponent, taskType); + + workerComponent.setName(taskType.getComponentName()); + + if (taskType.equals(TaskType.PRIMARY_WORKER)) { + workerComponent.setNumberOfContainers(1L); + } else{ + workerComponent.setNumberOfContainers( + (long) parameters.getNumWorkers() - 1); + } + + if (parameters.getWorkerDockerImage() != null) { + workerComponent.setArtifact( + getDockerArtifact(parameters.getWorkerDockerImage())); + } + + workerComponent.setResource( + getServiceResourceFromYarnResource(parameters.getWorkerResource())); + handleLaunchCommand(parameters, taskType, workerComponent); + workerComponent.setRestartPolicy(Component.RestartPolicyEnum.NEVER); + service.addComponent(workerComponent); + } + + // Handle worker and primary_worker. + private void addWorkerComponents(Service service, RunJobParameters parameters) + throws IOException { + addWorkerComponent(service, parameters, TaskType.PRIMARY_WORKER); + + if (parameters.getNumWorkers() > 1) { + addWorkerComponent(service, parameters, TaskType.WORKER); + } + } + + private void appendToEnv(Service service, String key, String value, + String delim) { + Map env = service.getConfiguration().getEnv(); + if (!env.containsKey(key)) { + env.put(key, value); + } else { + if (!value.isEmpty()) { + String existingValue = env.get(key); + if (!existingValue.endsWith(delim)) { + env.put(key, existingValue + delim + value); + } else { + env.put(key, existingValue + value); + } + } + } + } + + private void handleServiceEnvs(Service service, RunJobParameters parameters) { + if (parameters.getEnvars() != null) { + for (String envarPair : parameters.getEnvars()) { + String key, value; + if (envarPair.contains("=")) { + int idx = envarPair.indexOf('='); + key = envarPair.substring(0, idx); + value = envarPair.substring(idx + 1); + } else{ + // No "=" found so use the whole key + key = envarPair; + value = ""; + } + appendToEnv(service, key, value, ":"); + } + } + + // Append other configs like /etc/passwd, /etc/krb5.conf + appendToEnv(service, "YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS", + "/etc/passwd:/etc/passwd:ro", ","); + + String authenication = clientContext.getYarnConfig().get( + HADOOP_SECURITY_AUTHENTICATION); + if (authenication != null && authenication.equals("kerberos")) { + appendToEnv(service, "YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS", + "/etc/krb5.conf:/etc/krb5.conf:ro", ","); + } + } + + private Artifact getDockerArtifact(String dockerImageName) { + return new Artifact().type(Artifact.TypeEnum.DOCKER).id(dockerImageName); + } + + private Service createServiceByParameters(RunJobParameters parameters) + throws IOException { + Service service = new Service(); + service.setName(parameters.getName()); + service.setVersion(String.valueOf(System.currentTimeMillis())); + service.setArtifact(getDockerArtifact(parameters.getDockerImageName())); + + handleServiceEnvs(service, parameters); + + addWorkerComponents(service, parameters); + + if (parameters.getNumPS() > 0) { + Component psComponent = new Component(); + psComponent.setName(TaskType.PS.getComponentName()); + addCommonEnvironments(psComponent, TaskType.PS); + psComponent.setNumberOfContainers((long) parameters.getNumPS()); + psComponent.setRestartPolicy(Component.RestartPolicyEnum.NEVER); + psComponent.setResource( + getServiceResourceFromYarnResource(parameters.getPsResource())); + + // Override global docker image if needed. + if (parameters.getPsDockerImage() != null) { + psComponent.setArtifact( + getDockerArtifact(parameters.getPsDockerImage())); + } + handleLaunchCommand(parameters, TaskType.PS, psComponent); + service.addComponent(psComponent); + } + return service; + } + + /** + * {@inheritDoc} + */ + @Override + public ApplicationId submitJob(RunJobParameters parameters) + throws IOException, YarnException { + Service service = createServiceByParameters(parameters); + ServiceClient serviceClient = YarnServiceUtils.createServiceClient( + clientContext.getYarnConfig()); + ApplicationId appid = serviceClient.actionCreate(service); + serviceClient.stop(); + this.serviceSpec = service; + return appid; + } + + @VisibleForTesting + public Service getServiceSpec() { + return serviceSpec; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceRuntimeFactory.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceRuntimeFactory.java new file mode 100644 index 00000000000..3489e49233f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceRuntimeFactory.java @@ -0,0 +1,44 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.yarnservice; + +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory; +import org.apache.hadoop.yarn.submarine.runtimes.common.FSBasedSubmarineStorageImpl; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobMonitor; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter; +import org.apache.hadoop.yarn.submarine.runtimes.common.SubmarineStorage; + +public class YarnServiceRuntimeFactory extends RuntimeFactory { + + public YarnServiceRuntimeFactory(ClientContext clientContext) { + super(clientContext); + } + + @Override + protected JobSubmitter internalCreateJobSubmitter() { + return new YarnServiceJobSubmitter(super.clientContext); + } + + @Override + protected JobMonitor internalCreateJobMonitor() { + return new YarnServiceJobMonitor(super.clientContext); + } + + @Override + protected SubmarineStorage internalCreateSubmarineStorage() { + return new FSBasedSubmarineStorageImpl(super.clientContext); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceUtils.java new file mode 100644 index 00000000000..f7ecc97c941 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/main/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/YarnServiceUtils.java @@ -0,0 +1,78 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.yarnservice; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.service.client.ServiceClient; +import org.apache.hadoop.yarn.submarine.common.Envs; + +public class YarnServiceUtils { + // This will be true only in UT. + private static ServiceClient stubServiceClient = null; + + public static ServiceClient createServiceClient( + Configuration yarnConfiguration) { + if (stubServiceClient != null) { + return stubServiceClient; + } + + ServiceClient serviceClient = new ServiceClient(); + serviceClient.init(yarnConfiguration); + serviceClient.start(); + return serviceClient; + } + + @VisibleForTesting + public static void setStubServiceClient(ServiceClient stubServiceClient) { + YarnServiceUtils.stubServiceClient = stubServiceClient; + } + + public static String getTFConfigEnv(String curCommponentName, int nWorkers, + int nPs, String serviceName, String userName, String domain) { + String commonEndpointSuffix = + "." + serviceName + "." + userName + "." + domain + ":8000"; + + String json = "{\\\"cluster\\\":{"; + + String master = getComponentArrayJson("master", 1, commonEndpointSuffix) + + ","; + String worker = getComponentArrayJson("worker", nWorkers - 1, + commonEndpointSuffix) + ","; + String ps = getComponentArrayJson("ps", nPs, commonEndpointSuffix) + "},"; + + String task = + "\\\"task\\\":{" + " \\\"type\\\":\\\"" + curCommponentName + "\\\"," + + " \\\"index\\\":" + '$' + Envs.TASK_INDEX_ENV + "},"; + String environment = "\\\"environment\\\":\\\"cloud\\\"}"; + + return json + master + worker + ps + task + environment; + } + + private static String getComponentArrayJson(String componentName, int count, + String endpointSuffix) { + String component = "\\\"" + componentName + "\\\":"; + String array = "["; + for (int i = 0; i < count; i++) { + array = array + "\\\"" + componentName + "-" + i + + endpointSuffix + "\\\""; + if (i != count - 1) { + array = array + ","; + } + } + array = array + "]"; + return component + array; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/site/DeveloperGuide.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/site/DeveloperGuide.md new file mode 100644 index 00000000000..ce26ea7b126 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/site/DeveloperGuide.md @@ -0,0 +1,26 @@ + + +# Developper Guide + +(Need add more details) + +By default, submarine uses YARN service framework as runtime. If you want to add your own implementation. You can add a new `RuntimeFactory` implementation and configure following option to `submarine.xml` (which should be placed under same `$HADOOP_CONF_DIR`) + +``` + + submarine.runtime.class + ... full qualified class name for your runtime factory ... + +``` diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/site/QuickStart.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/site/QuickStart.md new file mode 100644 index 00000000000..b720b5a5cae --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/site/QuickStart.md @@ -0,0 +1,134 @@ + + +# Quick Start Guide + +## Prerequisite + +Must: +- Apache Hadoop 3.1.0, YARN service enabled. + +Optional: +- Enable YARN DNS. (When distributed training required.) +- Enable GPU on YARN support. (When GPU-based training required.) + +## Run jobs + +### Commandline options + +```$xslt +usage: job run + -checkpoint_path Training output directory of the job, could + be local or other FS directory. This + typically includes checkpoint files and + exported model + -docker_image Docker image name/tag + -env Common environment variable of worker/ps + -input_path Input of the job, could be local or other FS + directory + -name Name of the job + -num_ps Number of PS tasks of the job, by default + it's 0 + -num_workers Numnber of worker tasks of the job, by + default it's 1 + -ps_docker_image Specify docker image for PS, when this is + not specified, PS uses --docker_image as + default. + -ps_launch_cmd Commandline of worker, arguments will be + directly used to launch the PS + -ps_resources Resource of each PS, for example + memory-mb=2048,vcores=2,yarn.io/gpu=2 + -queue Name of queue to run the job, by default it + uses default queue + -saved_model_path Model exported path (savedmodel) of the job, + which is needed when exported model is not + placed under ${checkpoint_path}could be + local or other FS directory. This will be + used to serve. + -tensorboard Should we run TensorBoard for this job? By + default it's true + -verbose Print verbose log for troubleshooting + -wait_job_finish Specified when user want to wait the job + finish + -worker_docker_image Specify docker image for WORKER, when this + is not specified, WORKER uses --docker_image + as default. + -worker_launch_cmd Commandline of worker, arguments will be + directly used to launch the worker + -worker_resources Resource of each worker, for example + memory-mb=2048,vcores=2,yarn.io/gpu=2 +``` + +### Launch Standalone Tensorflow Application: + +#### Commandline +``` +yarn jar path-to/hadoop-yarn-applications-submarine-3.2.0-SNAPSHOT.jar job run \ + --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre/ \ + --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 --name tf-job-001 \ + --docker_image \ + --input_path hdfs://default/dataset/cifar-10-data \ + --checkpoint_path hdfs://default/tmp/cifar-10-jobdir \ + --worker_resources memory=4G,vcores=2,gpu=2 \ + --worker_launch_cmd "python ... (Your training application cmd)" +``` + +#### Notes: + +1) `DOCKER_JAVA_HOME` points to JAVA_HOME inside Docker image. +2) `DOCKER_HADOOP_HDFS_HOME` points to HADOOP_HDFS_HOME inside Docker image. +3) `--worker_resources` can include gpu when you need GPU to train your task. + +### Launch Distributed Tensorflow Application: + +#### Commandline + +``` +yarn jar hadoop-yarn-applications-submarine-.jar job run \ + --name tf-job-001 --docker_image \ + --input_path hdfs://default/dataset/cifar-10-data \ + --checkpoint_path hdfs://default/tmp/cifar-10-jobdir \ + --env DOCKER_JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre/ \ + --env DOCKER_HADOOP_HDFS_HOME=/hadoop-3.1.0 \ + --num_workers 2 \ + --worker_resources memory=8G,vcores=2,gpu=1 --worker_launch_cmd "cmd for worker ..." \ + --num_ps 2 \ + --ps_resources memory=4G,vcores=2,gpu=0 --ps_launch_cmd "cmd for ps" \ +``` + +#### Notes: + +1) Very similar to standalone TF application, but you need to specify #worker/#ps +2) Different resources can be specified for worker and PS. +3) `TF_CONFIG` environment will be auto generated and set before executing user's launch command. + +## Run jobs + +### Get Job Status + +``` +yarn jar hadoop-yarn-applications-submarine-3.2.0-SNAPSHOT.jar job show --name tf-job-001 +``` + +Output looks like: +``` +Job Meta Info: + Application Id: application_1532131617202_0005 + Input Path: hdfs://default/dataset/cifar-10-data + Checkpoint Path: hdfs://default/tmp/cifar-10-jobdir + Run Parameters: --name tf-job-001 --docker_image wtan/tf-1.8.0-gpu:0.0.3 + (... all your commandline before run the job) +``` + +After that, you can run ```tensorboard --logdir=``` to view Tensorboard of the job. \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/TestRunJobCliParsing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/TestRunJobCliParsing.java new file mode 100644 index 00000000000..295d6a87c9c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/TestRunJobCliParsing.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.hadoop.yarn.submarine.client.cli; + +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.client.cli.param.RunJobParameters; +import org.apache.hadoop.yarn.submarine.common.MockClientContext; +import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs; +import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobMonitor; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter; +import org.apache.hadoop.yarn.submarine.runtimes.common.SubmarineStorage; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestRunJobCliParsing { + @Before + public void before() { + SubmarineLogs.verboseOff(); + } + + @Test + public void testPrintHelp() { + MockClientContext mockClientContext = new MockClientContext(); + JobSubmitter mockJobSubmitter = mock(JobSubmitter.class); + JobMonitor mockJobMonitor = mock(JobMonitor.class); + RunJobCli runJobCli = new RunJobCli(mockClientContext, mockJobSubmitter, + mockJobMonitor); + runJobCli.printUsages(); + } + + private MockClientContext getMockClientContext() + throws IOException, YarnException { + MockClientContext mockClientContext = new MockClientContext(); + JobSubmitter mockJobSubmitter = mock(JobSubmitter.class); + when(mockJobSubmitter.submitJob(any(RunJobParameters.class))).thenReturn( + ApplicationId.newInstance(1234L, 1)); + JobMonitor mockJobMonitor = mock(JobMonitor.class); + SubmarineStorage storage = mock(SubmarineStorage.class); + RuntimeFactory rtFactory = mock(RuntimeFactory.class); + + when(rtFactory.getJobSubmitterInstance()).thenReturn(mockJobSubmitter); + when(rtFactory.getJobMonitorInstance()).thenReturn(mockJobMonitor); + when(rtFactory.getSubmarineStorage()).thenReturn(storage); + + mockClientContext.setRuntimeFactory(rtFactory); + return mockClientContext; + } + + @Test + public void testBasicRunJobForDistributedTraining() throws Exception { + RunJobCli runJobCli = new RunJobCli(getMockClientContext()); + + Assert.assertFalse(SubmarineLogs.isVerbose()); + + runJobCli.run( + new String[] { "--name", "my-job", "--docker_image", "tf-docker:1.1.0", + "--input_path", "hdfs://input", "--checkpoint_path", "hdfs://output", + "--num_workers", "3", "--num_ps", "2", "--worker_launch_cmd", + "python run-job.py", "--worker_resources", "memory=2048M,vcores=2", + "--ps_resources", "memory=4G,vcores=4", "--tensorboard", "true", + "--ps_launch_cmd", "python run-ps.py", "--verbose" }); + + RunJobParameters jobRunParameters = runJobCli.getRunJobParameters(); + + Assert.assertEquals(jobRunParameters.getInputPath(), "hdfs://input"); + Assert.assertEquals(jobRunParameters.getCheckpointPath(), "hdfs://output"); + Assert.assertEquals(jobRunParameters.getNumPS(), 2); + Assert.assertEquals(jobRunParameters.getPSLaunchCmd(), "python run-ps.py"); + Assert.assertEquals(Resources.createResource(4096, 4), + jobRunParameters.getPsResource()); + Assert.assertEquals(jobRunParameters.getWorkerLaunchCmd(), + "python run-job.py"); + Assert.assertEquals(Resources.createResource(2048, 2), + jobRunParameters.getWorkerResource()); + Assert.assertEquals(jobRunParameters.getDockerImageName(), + "tf-docker:1.1.0"); + Assert.assertTrue(SubmarineLogs.isVerbose()); + } + + @Test + public void testBasicRunJobForSingleNodeTraining() throws Exception { + RunJobCli runJobCli = new RunJobCli(getMockClientContext()); + Assert.assertFalse(SubmarineLogs.isVerbose()); + + runJobCli.run( + new String[] { "--name", "my-job", "--docker_image", "tf-docker:1.1.0", + "--input_path", "hdfs://input", "--checkpoint_path", "hdfs://output", + "--num_workers", "1", "--worker_launch_cmd", "python run-job.py", + "--worker_resources", "memory=4g,vcores=2", "--tensorboard", + "true", "--verbose", "--wait_job_finish" }); + + RunJobParameters jobRunParameters = runJobCli.getRunJobParameters(); + + Assert.assertEquals(jobRunParameters.getInputPath(), "hdfs://input"); + Assert.assertEquals(jobRunParameters.getCheckpointPath(), "hdfs://output"); + Assert.assertEquals(jobRunParameters.getNumWorkers(), 1); + Assert.assertEquals(jobRunParameters.getWorkerLaunchCmd(), + "python run-job.py"); + Assert.assertEquals(Resources.createResource(4096, 2), + jobRunParameters.getWorkerResource()); + Assert.assertTrue(SubmarineLogs.isVerbose()); + Assert.assertTrue(jobRunParameters.isWaitJobFinish()); + } + + @Test + public void testLaunchCommandPatternReplace() throws Exception { + RunJobCli runJobCli = new RunJobCli(getMockClientContext()); + Assert.assertFalse(SubmarineLogs.isVerbose()); + + runJobCli.run( + new String[] { "--name", "my-job", "--docker_image", "tf-docker:1.1.0", + "--input_path", "hdfs://input", "--checkpoint_path", "hdfs://output", + "--num_workers", "3", "--num_ps", "2", "--worker_launch_cmd", + "python run-job.py --input=%input_path% --model_dir=%checkpoint_path% --export_dir=%saved_model_path%/savedmodel", + "--worker_resources", "memory=2048,vcores=2", "--ps_resources", + "memory=4096,vcores=4", "--tensorboard", "true", "--ps_launch_cmd", + "python run-ps.py --input=%input_path% --model_dir=%checkpoint_path%/model", + "--verbose" }); + + Assert.assertEquals( + "python run-job.py --input=hdfs://input --model_dir=hdfs://output " + + "--export_dir=hdfs://output/savedmodel", + runJobCli.getRunJobParameters().getWorkerLaunchCmd()); + Assert.assertEquals( + "python run-ps.py --input=hdfs://input --model_dir=hdfs://output/model", + runJobCli.getRunJobParameters().getPSLaunchCmd()); + } + + @Test + public void testResourceUnitParsing() throws Exception { + Resource res = CliUtils.createResourceFromString("memory=20g,vcores=3", + ResourceUtils.getResourcesTypeInfo()); + Assert.assertEquals(Resources.createResource(20 * 1024, 3), res); + + res = CliUtils.createResourceFromString("memory=20G,vcores=3", + ResourceUtils.getResourcesTypeInfo()); + Assert.assertEquals(Resources.createResource(20 * 1024, 3), res); + + res = CliUtils.createResourceFromString("memory=20M,vcores=3", + ResourceUtils.getResourcesTypeInfo()); + Assert.assertEquals(Resources.createResource(20, 3), res); + + res = CliUtils.createResourceFromString("memory=20m,vcores=3", + ResourceUtils.getResourcesTypeInfo()); + Assert.assertEquals(Resources.createResource(20, 3), res); + + res = CliUtils.createResourceFromString("memory-mb=20,vcores=3", + ResourceUtils.getResourcesTypeInfo()); + Assert.assertEquals(Resources.createResource(20, 3), res); + + res = CliUtils.createResourceFromString("memory-mb=20m,vcores=3", + ResourceUtils.getResourcesTypeInfo()); + Assert.assertEquals(Resources.createResource(20, 3), res); + + res = CliUtils.createResourceFromString("memory-mb=20G,vcores=3", + ResourceUtils.getResourcesTypeInfo()); + Assert.assertEquals(Resources.createResource(20 * 1024, 3), res); + + // W/o unit for memory means bits, and 20 bits will be rounded to 0 + res = CliUtils.createResourceFromString("memory=20,vcores=3", + ResourceUtils.getResourcesTypeInfo()); + Assert.assertEquals(Resources.createResource(0, 3), res); + + // Test multiple resources + List resTypes = new ArrayList<>( + ResourceUtils.getResourcesTypeInfo()); + resTypes.add(ResourceTypeInfo.newInstance(ResourceInformation.GPU_URI, "")); + ResourceUtils.reinitializeResources(resTypes); + res = CliUtils.createResourceFromString("memory=2G,vcores=3,gpu=0", + resTypes); + Assert.assertEquals(2 * 1024, res.getMemorySize()); + Assert.assertEquals(0, res.getResourceValue(ResourceInformation.GPU_URI)); + + res = CliUtils.createResourceFromString("memory=2G,vcores=3,gpu=3", + resTypes); + Assert.assertEquals(2 * 1024, res.getMemorySize()); + Assert.assertEquals(3, res.getResourceValue(ResourceInformation.GPU_URI)); + + res = CliUtils.createResourceFromString("memory=2G,vcores=3", + resTypes); + Assert.assertEquals(2 * 1024, res.getMemorySize()); + Assert.assertEquals(0, res.getResourceValue(ResourceInformation.GPU_URI)); + + res = CliUtils.createResourceFromString("memory=2G,vcores=3,yarn.io/gpu=0", + resTypes); + Assert.assertEquals(2 * 1024, res.getMemorySize()); + Assert.assertEquals(0, res.getResourceValue(ResourceInformation.GPU_URI)); + + res = CliUtils.createResourceFromString("memory=2G,vcores=3,yarn.io/gpu=3", + resTypes); + Assert.assertEquals(2 * 1024, res.getMemorySize()); + Assert.assertEquals(3, res.getResourceValue(ResourceInformation.GPU_URI)); + + // TODO, add more negative tests. + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/TestShowJobCliParsing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/TestShowJobCliParsing.java new file mode 100644 index 00000000000..9c0d872d623 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/TestShowJobCliParsing.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.hadoop.yarn.submarine.client.cli; + +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.submarine.client.cli.param.ShowJobParameters; +import org.apache.hadoop.yarn.submarine.common.MockClientContext; +import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs; +import org.apache.hadoop.yarn.submarine.common.exception.SubmarineException; +import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory; +import org.apache.hadoop.yarn.submarine.runtimes.common.MemorySubmarineStorage; +import org.apache.hadoop.yarn.submarine.runtimes.common.StorageKeyConstants; +import org.apache.hadoop.yarn.submarine.runtimes.common.SubmarineStorage; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestShowJobCliParsing { + @Before + public void before() { + SubmarineLogs.verboseOff(); + } + + @Test + public void testPrintHelp() { + MockClientContext mockClientContext = new MockClientContext(); + ShowJobCli showJobCli = new ShowJobCli(mockClientContext); + showJobCli.printUsages(); + } + + @Test + public void testShowJob() + throws InterruptedException, SubmarineException, YarnException, + ParseException, IOException { + MockClientContext mockClientContext = new MockClientContext(); + ShowJobCli showJobCli = new ShowJobCli(mockClientContext) { + @Override + protected void getAndPrintJobInfo() { + // do nothing + } + }; + showJobCli.run(new String[] { "--name", "my-job" }); + ShowJobParameters parameters = showJobCli.getParameters(); + Assert.assertEquals(parameters.getName(), "my-job"); + } + + private Map getMockJobInfo(String jobName) { + Map map = new HashMap<>(); + map.put(StorageKeyConstants.APPLICATION_ID, + ApplicationId.newInstance(1234L, 1).toString()); + map.put(StorageKeyConstants.JOB_RUN_ARGS, "job run 123456"); + map.put(StorageKeyConstants.INPUT_PATH, "hdfs://" + jobName); + return map; + } + + @Test + public void testSimpleShowJob() + throws InterruptedException, SubmarineException, YarnException, + ParseException, IOException { + SubmarineStorage storage = new MemorySubmarineStorage(); + MockClientContext mockClientContext = new MockClientContext(); + RuntimeFactory runtimeFactory = mock(RuntimeFactory.class); + when(runtimeFactory.getSubmarineStorage()).thenReturn(storage); + mockClientContext.setRuntimeFactory(runtimeFactory); + + ShowJobCli showJobCli = new ShowJobCli(mockClientContext); + + try { + showJobCli.run(new String[] { "--name", "my-job" }); + } catch (IOException e) { + // expected + } + + + storage.addNewJob("my-job", getMockJobInfo("my-job")); + showJobCli.run(new String[] { "--name", "my-job" }); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/yarnservice/TestYarnServiceRunJobCli.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/yarnservice/TestYarnServiceRunJobCli.java new file mode 100644 index 00000000000..e1756b86bd9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/yarnservice/TestYarnServiceRunJobCli.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.client.cli.yarnservice; + +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.service.api.records.Component; +import org.apache.hadoop.yarn.service.api.records.Service; +import org.apache.hadoop.yarn.service.client.ServiceClient; +import org.apache.hadoop.yarn.submarine.client.cli.RunJobCli; +import org.apache.hadoop.yarn.submarine.common.MockClientContext; +import org.apache.hadoop.yarn.submarine.common.api.TaskType; +import org.apache.hadoop.yarn.submarine.common.conf.SubmarineLogs; +import org.apache.hadoop.yarn.submarine.runtimes.common.JobSubmitter; +import org.apache.hadoop.yarn.submarine.runtimes.common.StorageKeyConstants; +import org.apache.hadoop.yarn.submarine.runtimes.common.SubmarineStorage; +import org.apache.hadoop.yarn.submarine.runtimes.yarnservice.YarnServiceJobSubmitter; +import org.apache.hadoop.yarn.submarine.runtimes.yarnservice.YarnServiceUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.Map; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestYarnServiceRunJobCli { + @Before + public void before() throws IOException, YarnException { + SubmarineLogs.verboseOff(); + ServiceClient serviceClient = mock(ServiceClient.class); + when(serviceClient.actionCreate(any(Service.class))).thenReturn( + ApplicationId.newInstance(1234L, 1)); + YarnServiceUtils.setStubServiceClient(serviceClient); + } + + @Test + public void testPrintHelp() { + MockClientContext mockClientContext = + YarnServiceCliTestUtils.getMockClientContext(); + RunJobCli runJobCli = new RunJobCli(mockClientContext); + runJobCli.printUsages(); + } + + private Service getServiceSpecFromJobSubmitter(JobSubmitter jobSubmitter) { + return ((YarnServiceJobSubmitter) jobSubmitter).getServiceSpec(); + } + + @Test + public void testBasicRunJobForDistributedTraining() throws Exception { + MockClientContext mockClientContext = + YarnServiceCliTestUtils.getMockClientContext(); + RunJobCli runJobCli = new RunJobCli(mockClientContext); + Assert.assertFalse(SubmarineLogs.isVerbose()); + + runJobCli.run( + new String[] { "--name", "my-job", "--docker_image", "tf-docker:1.1.0", + "--input_path", "s3://input", "--checkpoint_path", + "s3://output", "--num_workers", "3", "--num_ps", "2", + "--worker_launch_cmd", "python run-job.py", "--worker_resources", + "memory=2048M,vcores=2", "--ps_resources", "memory=4096M,vcores=4", + "--tensorboard", "true", "--ps_docker_image", "ps.image", + "--worker_docker_image", "worker.image", + "--ps_launch_cmd", "python run-ps.py", "--verbose" }); + Service serviceSpec = getServiceSpecFromJobSubmitter( + runJobCli.getJobSubmitter()); + Assert.assertEquals(3, serviceSpec.getComponents().size()); + Assert.assertTrue( + serviceSpec.getComponent(TaskType.WORKER.getComponentName()) != null); + Assert.assertTrue( + serviceSpec.getComponent(TaskType.PRIMARY_WORKER.getComponentName()) + != null); + Assert.assertTrue( + serviceSpec.getComponent(TaskType.PS.getComponentName()) != null); + Component primaryWorkerComp = serviceSpec.getComponent( + TaskType.PRIMARY_WORKER.getComponentName()); + Assert.assertEquals(2048, primaryWorkerComp.getResource().calcMemoryMB()); + Assert.assertEquals(2, + primaryWorkerComp.getResource().getCpus().intValue()); + + Component workerComp = serviceSpec.getComponent( + TaskType.WORKER.getComponentName()); + Assert.assertEquals(2048, workerComp.getResource().calcMemoryMB()); + Assert.assertEquals(2, workerComp.getResource().getCpus().intValue()); + + Component psComp = serviceSpec.getComponent(TaskType.PS.getComponentName()); + Assert.assertEquals(4096, psComp.getResource().calcMemoryMB()); + Assert.assertEquals(4, psComp.getResource().getCpus().intValue()); + + Assert.assertEquals("worker.image", workerComp.getArtifact().getId()); + Assert.assertEquals("ps.image", psComp.getArtifact().getId()); + + Assert.assertTrue(SubmarineLogs.isVerbose()); + + // TODO, ADD TEST TO USE SERVICE CLIENT TO VALIDATE THE JSON SPEC + } + + @Test + public void testBasicRunJobForSingleNodeTraining() throws Exception { + MockClientContext mockClientContext = + YarnServiceCliTestUtils.getMockClientContext(); + RunJobCli runJobCli = new RunJobCli(mockClientContext); + Assert.assertFalse(SubmarineLogs.isVerbose()); + + runJobCli.run( + new String[] { "--name", "my-job", "--docker_image", "tf-docker:1.1.0", + "--input_path", "s3://input", "--checkpoint_path", + "s3://output", "--num_workers", "1", "--worker_launch_cmd", + "python run-job.py", "--worker_resources", "memory=2G,vcores=2", + "--tensorboard", "true", "--verbose" }); + Service serviceSpec = getServiceSpecFromJobSubmitter( + runJobCli.getJobSubmitter()); + Assert.assertEquals(1, serviceSpec.getComponents().size()); + Assert.assertTrue( + serviceSpec.getComponent(TaskType.PRIMARY_WORKER.getComponentName()) + != null); + Component primaryWorkerComp = serviceSpec.getComponent( + TaskType.PRIMARY_WORKER.getComponentName()); + Assert.assertEquals(2048, primaryWorkerComp.getResource().calcMemoryMB()); + Assert.assertEquals(2, + primaryWorkerComp.getResource().getCpus().intValue()); + + Assert.assertTrue(SubmarineLogs.isVerbose()); + + // TODO, ADD TEST TO USE SERVICE CLIENT TO VALIDATE THE JSON SPEC + } + + @Test + public void testParameterStorageForTrainingJob() throws Exception { + MockClientContext mockClientContext = + YarnServiceCliTestUtils.getMockClientContext(); + RunJobCli runJobCli = new RunJobCli(mockClientContext); + Assert.assertFalse(SubmarineLogs.isVerbose()); + + runJobCli.run( + new String[] { "--name", "my-job", "--docker_image", "tf-docker:1.1.0", + "--input_path", "s3://input", "--checkpoint_path", + "s3://output", "--num_workers", "1", "--worker_launch_cmd", + "python run-job.py", "--worker_resources", "memory=2G,vcores=2", + "--tensorboard", "true", "--verbose" }); + SubmarineStorage storage = + mockClientContext.getRuntimeFactory().getSubmarineStorage(); + Map jobInfo = storage.getJobInfoByName("my-job"); + Assert.assertTrue(jobInfo.size() > 0); + Assert.assertEquals(jobInfo.get(StorageKeyConstants.INPUT_PATH), + "s3://input"); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/yarnservice/YarnServiceCliTestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/yarnservice/YarnServiceCliTestUtils.java new file mode 100644 index 00000000000..e4825eaced5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/client/cli/yarnservice/YarnServiceCliTestUtils.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.client.cli.yarnservice; + +import org.apache.hadoop.yarn.submarine.common.MockClientContext; +import org.apache.hadoop.yarn.submarine.runtimes.RuntimeFactory; +import org.apache.hadoop.yarn.submarine.runtimes.common.MemorySubmarineStorage; +import org.apache.hadoop.yarn.submarine.runtimes.yarnservice.YarnServiceRuntimeFactory; + +public class YarnServiceCliTestUtils { + public static MockClientContext getMockClientContext() { + MockClientContext mockClientContext = new MockClientContext(); + RuntimeFactory runtimeFactory = new YarnServiceRuntimeFactory( + mockClientContext); + mockClientContext.setRuntimeFactory(runtimeFactory); + runtimeFactory.setSubmarineStorage(new MemorySubmarineStorage()); + return mockClientContext; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/common/MockClientContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/common/MockClientContext.java new file mode 100644 index 00000000000..5c06ddc7560 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/common/MockClientContext.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.common; + +import org.apache.hadoop.yarn.submarine.common.fs.MockRemoteDirectoryManager; +import org.apache.hadoop.yarn.submarine.common.fs.RemoteDirectoryManager; +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.service.client.ServiceClient; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; + +import java.io.IOException; + +import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class MockClientContext extends ClientContext { + private MockRemoteDirectoryManager remoteDirectoryMgr = + new MockRemoteDirectoryManager(); + + @Override + public RemoteDirectoryManager getRemoteDirectoryManager() { + return remoteDirectoryMgr; + } + + @Override + public synchronized YarnClient getOrCreateYarnClient() { + YarnClient client = mock(YarnClient.class); + try { + when(client.getResourceTypeInfo()).thenReturn( + ResourceUtils.getResourcesTypeInfo()); + } catch (YarnException e) { + fail(e.getMessage()); + } catch (IOException e) { + fail(e.getMessage()); + } + return client; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/common/fs/MockRemoteDirectoryManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/common/fs/MockRemoteDirectoryManager.java new file mode 100644 index 00000000000..a195b599f9b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/common/fs/MockRemoteDirectoryManager.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.common.fs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.File; +import java.io.IOException; + +public class MockRemoteDirectoryManager implements RemoteDirectoryManager { + private File jobsParentDir = null; + private File modelParentDir = null; + + @Override + public Path getJobStagingArea(String jobName, boolean create) + throws IOException { + if (jobsParentDir == null && create) { + jobsParentDir = new File( + "target/_staging_area_" + System.currentTimeMillis()); + if (!jobsParentDir.mkdirs()) { + throw new IOException( + "Failed to mkdirs for" + jobsParentDir.getAbsolutePath()); + } + } + + File jobDir = new File(jobsParentDir.getAbsolutePath(), jobName); + if (create && !jobDir.exists()) { + if (!jobDir.mkdirs()) { + throw new IOException("Failed to mkdirs for " + jobDir.getAbsolutePath()); + } + } + return new Path(jobDir.getAbsolutePath()); + } + + @Override + public Path getJobCheckpointDir(String jobName, boolean create) + throws IOException { + return null; + } + + @Override + public Path getModelDir(String modelName, boolean create) throws IOException { + if (modelParentDir == null && create) { + modelParentDir = new File( + "target/_models_" + System.currentTimeMillis()); + if (!modelParentDir.mkdirs()) { + throw new IOException( + "Failed to mkdirs for " + modelParentDir.getAbsolutePath()); + } + } + + File modelDir = new File(modelParentDir.getAbsolutePath(), modelName); + if (create) { + if (!modelDir.exists() && !modelDir.mkdirs()) { + throw new IOException("Failed to mkdirs for " + modelDir.getAbsolutePath()); + } + } + return new Path(modelDir.getAbsolutePath()); + } + + @Override + public FileSystem getFileSystem() throws IOException { + return FileSystem.getLocal(new Configuration()); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/common/MemorySubmarineStorage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/common/MemorySubmarineStorage.java new file mode 100644 index 00000000000..013614e6406 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/common/MemorySubmarineStorage.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.common; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class MemorySubmarineStorage extends SubmarineStorage { + private Map> jobsInfo = new HashMap<>(); + private Map>> modelsInfo = + new HashMap<>(); + + @Override + public synchronized void addNewJob(String jobName, Map jobInfo) + throws IOException { + jobsInfo.put(jobName, jobInfo); + } + + @Override + public synchronized Map getJobInfoByName(String jobName) + throws IOException { + Map info = jobsInfo.get(jobName); + if (info == null) { + throw new IOException("Failed to find job=" + jobName); + } + return info; + } + + @Override + public synchronized void addNewModel(String modelName, String version, + Map modelInfo) throws IOException { + if (!modelsInfo.containsKey(modelName)) { + modelsInfo.put(modelName, new HashMap<>()); + } + modelsInfo.get(modelName).put(version, modelInfo); + } + + @Override + public synchronized Map getModelInfoByName(String modelName, + String version) throws IOException { + + boolean notFound = false; + Map info = null; + try { + info = modelsInfo.get(modelName).get(version); + } catch (NullPointerException e) { + notFound = true; + } + + if (notFound || info == null) { + throw new IOException( + "Failed to find, model=" + modelName + " version=" + version); + } + + return info; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/common/TestFSBasedSubmarineStorage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/common/TestFSBasedSubmarineStorage.java new file mode 100644 index 00000000000..52a68b3f883 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/common/TestFSBasedSubmarineStorage.java @@ -0,0 +1,73 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.common; + +import org.apache.hadoop.yarn.submarine.common.ClientContext; +import org.apache.hadoop.yarn.submarine.common.fs.MockRemoteDirectoryManager; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestFSBasedSubmarineStorage { + private Map getMap(String prefix) { + Map map = new HashMap<>(); + map.put(prefix + "1", "1"); + map.put(prefix + "2", "2"); + map.put(prefix + "3", "3"); + map.put(prefix + "4", "4"); + return map; + } + + private void compareMap(Map map1, Map map2) { + Assert.assertEquals(map1.size(), map2.size()); + for (String k : map1.keySet()) { + Assert.assertEquals(map1.get(k), map2.get(k)); + } + } + + @Test + public void testStorageOps() throws IOException { + MockRemoteDirectoryManager remoteDirectoryManager = new MockRemoteDirectoryManager(); + ClientContext clientContext = mock(ClientContext.class); + when(clientContext.getRemoteDirectoryManager()).thenReturn(remoteDirectoryManager); + FSBasedSubmarineStorageImpl storage = new FSBasedSubmarineStorageImpl( + clientContext); + storage.addNewJob("job1", getMap("job1")); + storage.addNewJob("job2", getMap("job2")); + storage.addNewJob("job3", getMap("job3")); + storage.addNewJob("job4", new HashMap<>()); + storage.addNewModel("model1", "1.0", getMap("model1_1.0")); + storage.addNewModel("model1", "2.0.0", getMap("model1_2.0.0")); + storage.addNewModel("model2", null, getMap("model1_default")); + storage.addNewModel("model2", "1.0", getMap("model2_1.0")); + + // create a new storage and read it back. + storage = new FSBasedSubmarineStorageImpl( + clientContext); + compareMap(getMap("job1"), storage.getJobInfoByName("job1")); + compareMap(getMap("job2"), storage.getJobInfoByName("job2")); + compareMap(getMap("job3"), storage.getJobInfoByName("job3")); + compareMap(new HashMap<>(), storage.getJobInfoByName("job4")); + compareMap(getMap("model1_1.0"), storage.getModelInfoByName("model1", "1.0")); + compareMap(getMap("model1_2.0.0"), storage.getModelInfoByName("model1", "2.0.0")); + compareMap(getMap("model2_1.0"), storage.getModelInfoByName("model2", "1.0")); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/TestTFConfigGenerator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/TestTFConfigGenerator.java new file mode 100644 index 00000000000..d7dc8749440 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/java/org/apache/hadoop/yarn/submarine/runtimes/yarnservice/TestTFConfigGenerator.java @@ -0,0 +1,42 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ + +package org.apache.hadoop.yarn.submarine.runtimes.yarnservice; + +import org.codehaus.jettison.json.JSONException; +import org.junit.Assert; +import org.junit.Test; + +public class TestTFConfigGenerator { + @Test + public void testSimpleDistributedTFConfigGenerator() throws JSONException { + String json = YarnServiceUtils.getTFConfigEnv("worker", 5, 3, "wtan", + "tf-job-001", "example.com"); + String expected = + "{\\\"cluster\\\":{\\\"master\\\":[\\\"master-0.wtan.tf-job-001.example.com:8000\\\"],\\\"worker\\\":[\\\"worker-0.wtan.tf-job-001.example.com:8000\\\",\\\"worker-1.wtan.tf-job-001.example.com:8000\\\",\\\"worker-2.wtan.tf-job-001.example.com:8000\\\",\\\"worker-3.wtan.tf-job-001.example.com:8000\\\"],\\\"ps\\\":[\\\"ps-0.wtan.tf-job-001.example.com:8000\\\",\\\"ps-1.wtan.tf-job-001.example.com:8000\\\",\\\"ps-2.wtan.tf-job-001.example.com:8000\\\"]},\\\"task\\\":{ \\\"type\\\":\\\"worker\\\", \\\"index\\\":$_TASK_INDEX},\\\"environment\\\":\\\"cloud\\\"}"; + Assert.assertEquals(expected, json); + + json = YarnServiceUtils.getTFConfigEnv("ps", 5, 3, "wtan", "tf-job-001", + "example.com"); + expected = + "{\\\"cluster\\\":{\\\"master\\\":[\\\"master-0.wtan.tf-job-001.example.com:8000\\\"],\\\"worker\\\":[\\\"worker-0.wtan.tf-job-001.example.com:8000\\\",\\\"worker-1.wtan.tf-job-001.example.com:8000\\\",\\\"worker-2.wtan.tf-job-001.example.com:8000\\\",\\\"worker-3.wtan.tf-job-001.example.com:8000\\\"],\\\"ps\\\":[\\\"ps-0.wtan.tf-job-001.example.com:8000\\\",\\\"ps-1.wtan.tf-job-001.example.com:8000\\\",\\\"ps-2.wtan.tf-job-001.example.com:8000\\\"]},\\\"task\\\":{ \\\"type\\\":\\\"ps\\\", \\\"index\\\":$_TASK_INDEX},\\\"environment\\\":\\\"cloud\\\"}"; + Assert.assertEquals(expected, json); + + json = YarnServiceUtils.getTFConfigEnv("master", 2, 1, "wtan", "tf-job-001", + "example.com"); + expected = + "{\\\"cluster\\\":{\\\"master\\\":[\\\"master-0.wtan.tf-job-001.example.com:8000\\\"],\\\"worker\\\":[\\\"worker-0.wtan.tf-job-001.example.com:8000\\\"],\\\"ps\\\":[\\\"ps-0.wtan.tf-job-001.example.com:8000\\\"]},\\\"task\\\":{ \\\"type\\\":\\\"master\\\", \\\"index\\\":$_TASK_INDEX},\\\"environment\\\":\\\"cloud\\\"}"; + Assert.assertEquals(expected, json); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/resources/core-site.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/resources/core-site.xml new file mode 100644 index 00000000000..50ec1460bd6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/resources/core-site.xml @@ -0,0 +1,21 @@ + + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/resources/hdfs-site.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/resources/hdfs-site.xml new file mode 100644 index 00000000000..50ec1460bd6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-submarine/src/test/resources/hdfs-site.xml @@ -0,0 +1,21 @@ + + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml index 490e9ad5b93..4c03f3c84e4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml @@ -37,6 +37,7 @@ hadoop-yarn-applications-distributedshell hadoop-yarn-applications-unmanaged-am-launcher hadoop-yarn-services + hadoop-yarn-submarine diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java index 32aa21d52bd..59b33530b0b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java @@ -804,6 +804,17 @@ public TimelineV2Client getRegisteredTimelineV2Client() { return this.timelineV2Client; } + /** + * Update application's tracking url on next heartbeat. + * + * @param trackingUrl new tracking url for this application + */ + @Public + @InterfaceStability.Unstable + public void updateTrackingUrl(String trackingUrl) { + // Unimplemented. + } + /** * Wait for check to return true for each 1000 ms. * See also {@link #waitFor(java.util.function.Supplier, int)} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java index 26c99e31aa9..59fa6a8f2f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java @@ -33,7 +33,6 @@ import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; -import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetNewReservationResponse; import org.apache.hadoop.yarn.api.protocolrecords.ReservationDeleteRequest; import org.apache.hadoop.yarn.api.protocolrecords.ReservationDeleteResponse; @@ -52,10 +51,14 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerReport; +import org.apache.hadoop.yarn.api.records.NodeAttribute; +import org.apache.hadoop.yarn.api.records.NodeAttributeKey; +import org.apache.hadoop.yarn.api.records.NodeAttributeInfo; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.NodeToAttributeValue; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; @@ -900,4 +903,59 @@ public abstract Resource getResourceProfile(String profile) @Unstable public abstract List getResourceTypeInfo() throws YarnException, IOException; + + /** + *

    + * The interface used by client to get node attributes in the cluster. + *

    + * + * @return cluster node attributes collection + * @throws YarnException when there is a failure in + * {@link ApplicationClientProtocol} + * @throws IOException when there is a failure in + * {@link ApplicationClientProtocol} + */ + @Public + @Unstable + public abstract Set getClusterAttributes() + throws YarnException, IOException; + + /** + *

    + * The interface used by client to get mapping of AttributeKey to associated + * NodeToAttributeValue list for specified node attributeKeys in the cluster. + *

    + * + * @param attributes AttributeKeys for which associated NodeToAttributeValue + * mapping value has to be retrieved. If empty or null is set then + * will return mapping for all attributeKeys in the cluster + * @return mapping of AttributeKey to List of associated + * NodeToAttributeValue's. + * @throws YarnException + * @throws IOException + */ + @Public + @Unstable + public abstract Map> getAttributesToNodes( + Set attributes) throws YarnException, IOException; + + /** + *

    + * The interface used by client to get all node to attribute mapping in + * existing cluster. + *

    + * + * @param hostNames HostNames for which host to attributes mapping has to + * be retrived.If empty or null is set then will return + * all nodes to attributes mapping in cluster. + * @return Node to attribute mappings + * @throws YarnException + * @throws IOException + */ + @Public + @Unstable + public abstract Map> getNodeToAttributes( + Set hostNames) throws YarnException, IOException; + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java index 0af687bd58b..3dd2f718ba6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java @@ -412,6 +412,17 @@ public TimelineV2Client getRegisteredTimelineV2Client() { public abstract void updateBlacklist(List blacklistAdditions, List blacklistRemovals); + /** + * Update application's tracking url on next heartbeat. + * + * @param trackingUrl new tracking url for this application + */ + @Public + @Unstable + public void updateTrackingUrl(String trackingUrl) { + // Unimplemented. + } + /** * Wait for check to return true for each 1000 ms. * See also {@link #waitFor(java.util.function.Supplier, int)} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/AMRMClientAsyncImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/AMRMClientAsyncImpl.java index 4f04b66e100..3cf2c3496ef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/AMRMClientAsyncImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/impl/AMRMClientAsyncImpl.java @@ -286,6 +286,11 @@ public void updateBlacklist(List blacklistAdditions, List blacklistRemovals) { client.updateBlacklist(blacklistAdditions, blacklistRemovals); } + + @Override + public void updateTrackingUrl(String trackingUrl) { + client.updateTrackingUrl(trackingUrl); + } private class HeartbeatThread extends Thread { public HeartbeatThread() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java index 7265d24ac05..6dcecde09aa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java @@ -99,6 +99,7 @@ protected String appHostName; protected int appHostPort; protected String appTrackingUrl; + protected String newTrackingUrl; protected ApplicationMasterProtocol rmClient; protected Resource clusterAvailableResources; @@ -308,6 +309,11 @@ public AllocateResponse allocate(float progressIndicator) .releaseList(releaseList).updateRequests(updateList) .schedulingRequests(schedulingRequestList).build(); + if (this.newTrackingUrl != null) { + allocateRequest.setTrackingUrl(this.newTrackingUrl); + this.appTrackingUrl = this.newTrackingUrl; + this.newTrackingUrl = null; + } // clear blacklistAdditions and blacklistRemovals before // unsynchronized part blacklistAdditions.clear(); @@ -1008,6 +1014,11 @@ public synchronized void updateBlacklist(List blacklistAdditions, } } + @Override + public synchronized void updateTrackingUrl(String trackingUrl) { + this.newTrackingUrl = trackingUrl; + } + private void updateAMRMToken(Token token) throws IOException { org.apache.hadoop.security.token.Token amrmToken = new org.apache.hadoop.security.token.Token(token diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index 1ceb46209b1..acfc3ff70be 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -22,7 +22,6 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.EnumSet; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -52,8 +51,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAttributesToNodesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeAttributesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeLabelsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse; @@ -68,6 +69,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetNewReservationRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetNewReservationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToAttributesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToLabelsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; @@ -96,15 +98,18 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerReport; +import org.apache.hadoop.yarn.api.records.NodeAttribute; +import org.apache.hadoop.yarn.api.records.NodeAttributeKey; +import org.apache.hadoop.yarn.api.records.NodeAttributeInfo; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.NodeToAttributeValue; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.api.records.Token; @@ -977,4 +982,28 @@ public Resource getResourceProfile(String profile) GetAllResourceTypeInfoRequest.newInstance(); return rmClient.getResourceTypeInfo(request).getResourceTypeInfo(); } + + @Override + public Set getClusterAttributes() + throws YarnException, IOException { + GetClusterNodeAttributesRequest request = + GetClusterNodeAttributesRequest.newInstance(); + return rmClient.getClusterNodeAttributes(request).getNodeAttributes(); + } + + @Override + public Map> getAttributesToNodes( + Set attributes) throws YarnException, IOException { + GetAttributesToNodesRequest request = + GetAttributesToNodesRequest.newInstance(attributes); + return rmClient.getAttributesToNodes(request).getAttributesToNodes(); + } + + @Override + public Map> getNodeToAttributes( + Set hostNames) throws YarnException, IOException { + GetNodesToAttributesRequest request = + GetNodesToAttributesRequest.newInstance(hostNames); + return rmClient.getNodesToAttributes(request).getNodeToAttributes(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java index 1d26a96bb11..a0e4e02b0a4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.client.cli; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; @@ -100,11 +101,14 @@ public static final String COMPONENT = "component"; public static final String ENABLE_FAST_LAUNCH = "enableFastLaunch"; public static final String UPGRADE_CMD = "upgrade"; + public static final String UPGRADE_EXPRESS = "express"; public static final String UPGRADE_INITIATE = "initiate"; public static final String UPGRADE_AUTO_FINALIZE = "autoFinalize"; public static final String UPGRADE_FINALIZE = "finalize"; public static final String COMPONENT_INSTS = "instances"; public static final String COMPONENTS = "components"; + public static final String VERSION = "version"; + public static final String STATES = "states"; private static String firstArg = null; @@ -245,6 +249,9 @@ public int run(String[] args) throws Exception { opts.addOption(UPGRADE_CMD, true, "Upgrades an application/long-" + "running service. It requires either -initiate, -instances, or " + "-finalize options."); + opts.addOption(UPGRADE_EXPRESS, true, "Works with -upgrade option to " + + "perform express upgrade. It requires the upgraded application " + + "specification file."); opts.addOption(UPGRADE_INITIATE, true, "Works with -upgrade option to " + "initiate the application upgrade. It requires the upgraded " + "application specification file."); @@ -294,10 +301,39 @@ public int run(String[] args) throws Exception { opts.addOption(STATUS_CMD, true, "Prints the status of the container."); opts.addOption(LIST_CMD, true, - "List containers for application attempt."); + "List containers for application attempt when application " + + "attempt ID is provided. When application name is provided, " + + "then it finds the instances of the application based on app's " + + "own implementation, and -appTypes option must be specified " + + "unless it is the default yarn-service type. With app name, it " + + "supports optional use of -version to filter instances based on " + + "app version, -components to filter instances based on component " + + "names, -states to filter instances based on instance state."); opts.addOption(HELP_CMD, false, "Displays help for all commands."); opts.getOption(STATUS_CMD).setArgName("Container ID"); - opts.getOption(LIST_CMD).setArgName("Application Attempt ID"); + opts.getOption(LIST_CMD).setArgName("Application Name or Attempt ID"); + opts.addOption(APP_TYPE_CMD, true, "Works with -list to " + + "specify the app type when application name is provided."); + opts.getOption(APP_TYPE_CMD).setValueSeparator(','); + opts.getOption(APP_TYPE_CMD).setArgs(Option.UNLIMITED_VALUES); + opts.getOption(APP_TYPE_CMD).setArgName("Types"); + + opts.addOption(VERSION, true, "Works with -list " + + "to filter instances based on input application version."); + opts.getOption(VERSION).setArgs(1); + + opts.addOption(COMPONENTS, true, "Works with -list to " + + "filter instances based on input comma-separated list of " + + "component names."); + opts.getOption(COMPONENTS).setValueSeparator(','); + opts.getOption(COMPONENTS).setArgs(Option.UNLIMITED_VALUES); + + opts.addOption(STATES, true, "Works with -list to " + + "filter instances based on input comma-separated list of " + + "instance states."); + opts.getOption(STATES).setValueSeparator(','); + opts.getOption(STATES).setArgs(Option.UNLIMITED_VALUES); + opts.addOption(SIGNAL_CMD, true, "Signal the container. The available signal commands are " + java.util.Arrays.asList(SignalContainerCommand.values()) + @@ -426,11 +462,40 @@ public int run(String[] args) throws Exception { } listApplicationAttempts(cliParser.getOptionValue(LIST_CMD)); } else if (title.equalsIgnoreCase(CONTAINER)) { - if (hasAnyOtherCLIOptions(cliParser, opts, LIST_CMD)) { + if (hasAnyOtherCLIOptions(cliParser, opts, LIST_CMD, APP_TYPE_CMD, + VERSION, COMPONENTS, STATES)) { printUsage(title, opts); return exitCode; } - listContainers(cliParser.getOptionValue(LIST_CMD)); + String appAttemptIdOrName = cliParser.getOptionValue(LIST_CMD); + try { + // try parsing attempt id, if it succeeds, it means it's appId + ApplicationAttemptId.fromString(appAttemptIdOrName); + listContainers(appAttemptIdOrName); + } catch (IllegalArgumentException e) { + // not appAttemptId format, it could be appName. If app-type is not + // provided, assume it is yarn-service type. + AppAdminClient client = AppAdminClient + .createAppAdminClient(getSingleAppTypeFromCLI(cliParser), + getConf()); + String version = cliParser.getOptionValue(VERSION); + String[] components = cliParser.getOptionValues(COMPONENTS); + String[] instanceStates = cliParser.getOptionValues(STATES); + try { + sysout.println(client.getInstances(appAttemptIdOrName, + components == null ? null : Arrays.asList(components), + version, instanceStates == null ? null : + Arrays.asList(instanceStates))); + return 0; + } catch (ApplicationNotFoundException exception) { + System.err.println("Application with name '" + appAttemptIdOrName + + "' doesn't exist in RM or Timeline Server."); + return -1; + } catch (Exception ex) { + System.err.println(ex.getMessage()); + return -1; + } + } } } else if (cliParser.hasOption(KILL_CMD)) { if (hasAnyOtherCLIOptions(cliParser, opts, KILL_CMD)) { @@ -579,9 +644,9 @@ public int run(String[] args) throws Exception { moveApplicationAcrossQueues(cliParser.getOptionValue(APP_ID), cliParser.getOptionValue(CHANGE_APPLICATION_QUEUE)); } else if (cliParser.hasOption(UPGRADE_CMD)) { - if (hasAnyOtherCLIOptions(cliParser, opts, UPGRADE_CMD, UPGRADE_INITIATE, - UPGRADE_AUTO_FINALIZE, UPGRADE_FINALIZE, COMPONENT_INSTS, COMPONENTS, - APP_TYPE_CMD)) { + if (hasAnyOtherCLIOptions(cliParser, opts, UPGRADE_CMD, UPGRADE_EXPRESS, + UPGRADE_INITIATE, UPGRADE_AUTO_FINALIZE, UPGRADE_FINALIZE, + COMPONENT_INSTS, COMPONENTS, APP_TYPE_CMD)) { printUsage(title, opts); return exitCode; } @@ -589,7 +654,14 @@ public int run(String[] args) throws Exception { AppAdminClient client = AppAdminClient.createAppAdminClient(appType, getConf()); String appName = cliParser.getOptionValue(UPGRADE_CMD); - if (cliParser.hasOption(UPGRADE_INITIATE)) { + if (cliParser.hasOption(UPGRADE_EXPRESS)) { + File file = new File(cliParser.getOptionValue(UPGRADE_EXPRESS)); + if (!file.exists()) { + System.err.println(file.getAbsolutePath() + " does not exist."); + return exitCode; + } + return client.actionUpgradeExpress(appName, file); + } else if (cliParser.hasOption(UPGRADE_INITIATE)) { if (hasAnyOtherCLIOptions(cliParser, opts, UPGRADE_CMD, UPGRADE_INITIATE, UPGRADE_AUTO_FINALIZE, APP_TYPE_CMD)) { printUsage(title, opts); @@ -821,6 +893,8 @@ private int printContainerReport(String containerId) throws YarnException, containerReportStr.println(containerReport.getFinishTime()); containerReportStr.print("\tState : "); containerReportStr.println(containerReport.getContainerState()); + containerReportStr.print("\tExecution-Type : "); + containerReportStr.println(containerReport.getExecutionType()); containerReportStr.print("\tLOG-URL : "); containerReportStr.println(containerReport.getLogUrl()); containerReportStr.print("\tHost : "); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ClusterCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ClusterCLI.java index a29b0db7362..4d939498453 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ClusterCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ClusterCLI.java @@ -36,6 +36,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.yarn.api.records.NodeAttributeInfo; import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -52,6 +53,7 @@ public static final String LIST_LABELS_CMD = "list-node-labels"; public static final String DIRECTLY_ACCESS_NODE_LABEL_STORE = "directly-access-node-label-store"; + public static final String LIST_CLUSTER_ATTRIBUTES="list-node-attributes"; public static final String CMD = "cluster"; private boolean accessLocal = false; static CommonNodeLabelsManager localNodeLabelsManager = null; @@ -71,6 +73,8 @@ public int run(String[] args) throws Exception { opts.addOption("lnl", LIST_LABELS_CMD, false, "List cluster node-label collection"); + opts.addOption("lna", LIST_CLUSTER_ATTRIBUTES, false, + "List cluster node-attribute collection"); opts.addOption("h", HELP_CMD, false, "Displays help for all commands."); opts.addOption("dnl", DIRECTLY_ACCESS_NODE_LABEL_STORE, false, "This is DEPRECATED, will be removed in future releases. Directly access node label store, " @@ -102,6 +106,8 @@ public int run(String[] args) throws Exception { if (parsedCli.hasOption(LIST_LABELS_CMD)) { printClusterNodeLabels(); + } else if(parsedCli.hasOption(LIST_CLUSTER_ATTRIBUTES)){ + printClusterNodeAttributes(); } else if (parsedCli.hasOption(HELP_CMD)) { printUsage(opts); return 0; @@ -112,6 +118,17 @@ public int run(String[] args) throws Exception { return 0; } + private void printClusterNodeAttributes() throws IOException, YarnException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintWriter pw = new PrintWriter( + new OutputStreamWriter(baos, Charset.forName("UTF-8"))); + for (NodeAttributeInfo attribute : client.getClusterAttributes()) { + pw.println(attribute.toString()); + } + pw.close(); + sysout.println(baos.toString("UTF-8")); + } + void printClusterNodeLabels() throws YarnException, IOException { List nodeLabels = null; if (accessLocal) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/NodeAttributesCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/NodeAttributesCLI.java new file mode 100644 index 00000000000..13d5e24c1c5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/NodeAttributesCLI.java @@ -0,0 +1,715 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.client.cli; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.MissingArgumentException; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.UnrecognizedOptionException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.GetAttributesToNodesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAttributesToNodesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeAttributesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodeAttributesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToAttributesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToAttributesResponse; +import org.apache.hadoop.yarn.api.records.NodeAttribute; +import org.apache.hadoop.yarn.api.records.NodeAttributeInfo; +import org.apache.hadoop.yarn.api.records.NodeAttributeKey; +import org.apache.hadoop.yarn.api.records.NodeAttributeType; +import org.apache.hadoop.yarn.client.ClientRMProxy; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; +import org.apache.hadoop.yarn.server.api.protocolrecords.AttributeMappingOperationType; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeToAttributes; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodesToAttributesMappingRequest; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintStream; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * CLI to map attributes to Nodes. + */ +public class NodeAttributesCLI extends Configured implements Tool { + + protected static final String INVALID_MAPPING_ERR_MSG = + "Invalid Node to attribute mapping : "; + + protected static final String USAGE_YARN_NODE_ATTRIBUTES = + "Usage: yarn nodeattributes "; + + protected static final String MISSING_ARGUMENT = + "Missing argument for command"; + + protected static final String NO_MAPPING_ERR_MSG = + "No node-to-attributes mappings are specified"; + + private static final String DEFAULT_SEPARATOR = System.lineSeparator(); + public static final String INVALID_COMMAND_USAGE = "Invalid Command Usage : "; + /** + * Output stream for errors, for use in tests. + */ + private PrintStream errOut = System.err; + + public NodeAttributesCLI() { + super(); + } + + protected void setErrOut(PrintStream errOut) { + this.errOut = errOut; + } + + protected AdminCommandHandler getAdminCommandHandler() { + return new AdminCommandHandler(); + } + + protected ClientCommandHandler getClientCommandHandler() { + return new ClientCommandHandler(); + } + + void printUsage(String cmd, boolean desc, CommandHandler... handlers) + throws UnsupportedEncodingException { + StringBuilder usageBuilder = new StringBuilder(); + usageBuilder.append(USAGE_YARN_NODE_ATTRIBUTES); + boolean satisfied = false; + for (CommandHandler cmdHandlers : handlers) { + satisfied |= cmdHandlers.getHelp(cmd, usageBuilder, desc); + } + if (!satisfied) { + printUsage(desc, handlers); + } else { + print(usageBuilder); + } + } + + private void printUsage(boolean desc, CommandHandler... handlers) + throws UnsupportedEncodingException { + StringBuilder usageBuilder = new StringBuilder(); + usageBuilder.append(USAGE_YARN_NODE_ATTRIBUTES); + for (CommandHandler cmdHandlers : handlers) { + cmdHandlers.getHelp(usageBuilder, desc); + } + + // append help with usage + usageBuilder.append(DEFAULT_SEPARATOR) + .append(" -help [cmd] List help of commands"); + print(usageBuilder); + } + + private void print(StringBuilder usageBuilder) + throws UnsupportedEncodingException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintWriter pw = + new PrintWriter(new OutputStreamWriter(baos, Charset.forName("UTF-8"))); + pw.write(usageBuilder.toString()); + pw.close(); + errOut.println(baos.toString("UTF-8")); + } + + private Options buildOptions(CommandHandler... handlers) { + Options opts = new Options(); + for (CommandHandler handler : handlers) { + Options handlerOpts = handler.getOptions(); + handlerOpts.getOptions().iterator() + .forEachRemaining(option -> opts.addOption((Option) option)); + } + return opts; + } + + public int run(String[] args) throws Exception { + + int exitCode = -1; + + AdminCommandHandler adminCmdHandler = getAdminCommandHandler(); + ClientCommandHandler clientCmdHandler = getClientCommandHandler(); + + // Build options + Options opts = buildOptions(adminCmdHandler, clientCmdHandler); + + if (args.length < 1) { + printUsage(false, adminCmdHandler, clientCmdHandler); + return -1; + } + + // Handle command separate + if (handleHelpCommand(args, adminCmdHandler, clientCmdHandler)) { + return 0; + } + + CommandLine cliParser; + CommandHandler handler = null; + try { + cliParser = new GnuParser().parse(opts, args); + handler = adminCmdHandler.canHandleCommand(cliParser) ? + adminCmdHandler : + clientCmdHandler.canHandleCommand(cliParser) ? + clientCmdHandler : + null; + if (handler == null) { + errOut.println(INVALID_COMMAND_USAGE); + printUsage(false, adminCmdHandler, clientCmdHandler); + return exitCode; + } else { + return handler.handleCommand(cliParser); + } + } catch (UnrecognizedOptionException e) { + errOut.println(INVALID_COMMAND_USAGE); + printUsage(false, adminCmdHandler, clientCmdHandler); + return exitCode; + } catch (MissingArgumentException ex) { + errOut.println(MISSING_ARGUMENT); + printUsage(true, adminCmdHandler, clientCmdHandler); + return exitCode; + } catch (IllegalArgumentException arge) { + errOut.println(arge.getLocalizedMessage()); + // print admin command detail + printUsage(true, handler); + return exitCode; + } catch (Exception e) { + errOut.println(e.toString()); + printUsage(true, handler); + return exitCode; + } + } + + private boolean handleHelpCommand(String[] args, CommandHandler... handlers) + throws UnsupportedEncodingException { + if (args[0].equals("-help")) { + if (args.length == 2) { + printUsage(args[1], true, handlers); + } else { + printUsage(true, handlers); + } + return true; + } + return false; + } + + public static void main(String[] args) throws Exception { + int result = ToolRunner.run(new NodeAttributesCLI(), args); + System.exit(result); + } + + /** + * Abstract class for command handler. + */ + public static abstract class CommandHandler extends Configured { + + private Options options; + + private LinkedList order = new LinkedList<>(); + private String header; + + protected CommandHandler(String header) { + this(new YarnConfiguration()); + this.header = header; + } + + protected CommandHandler(Configuration conf) { + super(conf); + options = buildOptions(); + } + + public boolean canHandleCommand(CommandLine parse) { + ArrayList