commit b57e52fef7b94b49afbf3befccd23a33b798f611 Author: stack Date: Tue Dec 22 19:55:18 2015 -0800 HBASE-15021 hadoopqa doing false positives diff --git a/dev-support/zombie-detector.sh b/dev-support/zombie-detector.sh new file mode 100755 index 0000000..df4c197 --- /dev/null +++ b/dev-support/zombie-detector.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Looks for any running zombies left over from old build runs. +# Will report and try to do stack trace on stale processes so can +# figure how they are hung. Echos state as the script runs +# on STDERR but prints final output on STDOUT formatted so it +# will fold into the test result formatting done by test-patch.sh. +# This script is called from test-patch.sh but also after tests +# have run up on builds.apache.org. + +# TODO: format output to suit context -- test-patch, jenkins or dev env + +#set -x +# printenv + +### Setup some variables. +bindir=$(dirname $0) + +# This key is set by our surefire configuration up in the main pom.xml +# This key needs to match the key we set up there. +HBASE_BUILD_ID_KEY="hbase.build.id=" +JENKINS= + +PS=${PS:-ps} +AWK=${AWK:-awk} +WGET=${WGET:-wget} +GREP=${GREP:-grep} +JIRACLI=${JIRA:-jira} + +############################################################################### +printUsage() { + echo "Usage: $0 [options]" BUILD_ID + echo + echo "Where:" + echo " BUILD_ID is build id to look for in process listing" + echo + echo "Options:" + echo "--ps-cmd= The 'ps' command to use (default 'ps')" + echo "--awk-cmd= The 'awk' command to use (default 'awk')" + echo "--grep-cmd= The 'grep' command to use (default 'grep')" + echo + echo "Jenkins-only options:" + echo "--jenkins Run by Jenkins (runs tests and posts results to JIRA)" + echo "--wget-cmd= The 'wget' command to use (default 'wget')" + echo "--jira-cmd= The 'jira' command to use (default 'jira')" +} + +############################################################################### +parseArgs() { + for i in $* + do + case $i in + --jenkins) + JENKINS=true + ;; + --ps-cmd=*) + PS=${i#*=} + ;; + --awk-cmd=*) + AWK=${i#*=} + ;; + --wget-cmd=*) + WGET=${i#*=} + ;; + --grep-cmd=*) + GREP=${i#*=} + ;; + --jira-cmd=*) + JIRACLI=${i#*=} + ;; + *) + BUILD_ID=$i + ;; + esac + done + if [ -z "$BUILD_ID" ]; then + printUsage + exit 1 + fi +} + +### Return list of the processes found with passed build id. +find_processes () { + jps -v | grep surefirebooter | grep -e "${HBASE_BUILD_TAG}" +} + +### Look for zombies +zombies () { + ZOMBIES=`find_processes` + if [[ -z ${ZOMBIES} ]] + then + ZOMBIE_TESTS_COUNT=0 + else + ZOMBIE_TESTS_COUNT=`echo "${ZOMBIES}"| wc -l| xargs` + fi + if [[ $ZOMBIE_TESTS_COUNT != 0 ]] ; then + wait=30 + echo "`date` Found ${ZOMBIE_TESTS_COUNT} suspicious java process(es) listed below; waiting ${wait}s to see if just slow to stop" >&2 + echo ${ZOMBIES} >&2 + sleep ${wait} + PIDS=`echo "${ZOMBIES}"|${AWK} '{print $1}'` + ZOMBIE_TESTS_COUNT=0 + for pid in $PIDS + do + # Test our zombie still running (and that it still an hbase build item) + PS_OUTPUT=`ps -p $pid | tail +2 | grep -e "${HBASE_BUILD_TAG}"` + if [[ ! -z "${PS_OUTPUT}" ]] + then + echo "`date` Zombie: $PS_OUTPUT" >&2 + let "ZOMBIE_TESTS_COUNT+=1" + PS_STACK=`jstack $pid | grep -e "\.Test" | grep -e "\.java"| head -3` + echo "${PS_STACK}" >&2 + ZB_STACK="${ZB_STACK}\nPID=${pid} ${PS_STACK}" + fi + done + if [[ $ZOMBIE_TESTS_COUNT != 0 ]] + then + echo "`date` There are ${ZOMBIE_TESTS_COUNT} possible zombie test(s)." >&2 + # If JIRA_COMMENT in environment, append our findings to it + echo -e "$JIRA_COMMENT + + {color:red}+1 zombies{red}. There are ${ZOMBIE_TESTS_COUNT} possible zombie test(s) + ${ZB_STACK}" + # Exit with exit code of 1. + exit 1 + else + echo "`date` We're ok: there was a zombie candidate but it went away" >&2 + echo "$JIRA_COMMENT + + {color:green}+1 zombies{color}. No zombie tests found running at the end of the build (There were candidates but they seem to have gone away)." + fi + else + echo "`date` We're ok: there is no zombie test" >&2 + echo "$JIRA_COMMENT + + {color:green}+1 zombies{color}. No zombie tests found running at the end of the build." + fi +} + +### Check if arguments to the script have been specified properly or not +parseArgs $@ +HBASE_BUILD_TAG="${HBASE_BUILD_ID_KEY}${BUILD_ID}" +zombies +RESULT=$? +if [[ $JENKINS == "true" ]] ; then + if [[ $RESULT != 0 ]] ; then + exit 100 + fi +fi +RESULT=$?