Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (revision 1187647) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (working copy) @@ -55,6 +55,10 @@ static final String HADOOP_MEM_KEY = "HADOOP_HEAPSIZE"; static final String HADOOP_OPTS_KEY = "HADOOP_OPTS"; + static final String HADOOP_CLIENT_OPTS = "HADOOP_CLIENT_OPTS"; + static final String HIVE_DEBUG_RECURSIVE = "HIVE_DEBUG_RECURSIVE"; + static final String HIVE_MAIN_CLIENT_DEBUG_OPTS = "HIVE_MAIN_CLIENT_DEBUG_OPTS"; + static final String HIVE_CHILD_CLIENT_DEBUG_OPTS = "HIVE_CHILD_CLIENT_DEBUG_OPTS"; static final String[] HIVE_SYS_PROP = {"build.dir", "build.dir.hive"}; private transient ContentSummary inputSummary = null; @@ -247,6 +251,11 @@ } else { variables.put(HADOOP_OPTS_KEY, hadoopOpts); } + + if(variables.containsKey(HIVE_DEBUG_RECURSIVE)) { + configureDebugVariablesForChildJVM(variables); + } + env = new String[variables.size()]; int pos = 0; for (Map.Entry entry : variables.entrySet()) { @@ -294,6 +303,48 @@ } } + private void configureDebugVariablesForChildJVM(Map environmentVariables) { + // this method contains various asserts to warn if environment variables are in a buggy state + assert environmentVariables.containsKey(HADOOP_CLIENT_OPTS) + && environmentVariables.get(HADOOP_CLIENT_OPTS) != null : HADOOP_CLIENT_OPTS + + " environment variable must be set when JVM in debug mode"; + + String hadoopClientOpts = environmentVariables.get(HADOOP_CLIENT_OPTS); + + assert environmentVariables.containsKey(HIVE_MAIN_CLIENT_DEBUG_OPTS) + && environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS) != null : HIVE_MAIN_CLIENT_DEBUG_OPTS + + " environment variable must be set when JVM in debug mode"; + + assert hadoopClientOpts.contains(environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS)) : HADOOP_CLIENT_OPTS + + " environment variable must contain debugging parameters, when JVM in debugging mode"; + + assert "y".equals(environmentVariables.get(HIVE_DEBUG_RECURSIVE)) + || "n".equals(environmentVariables.get(HIVE_DEBUG_RECURSIVE)) : HIVE_DEBUG_RECURSIVE + + " environment variable must be set to \"y\" or \"n\" when debugging"; + + if (environmentVariables.get(HIVE_DEBUG_RECURSIVE).equals("y")) { + // swap debug options in HADOOP_CLIENT_OPTS to those that the child JVM should have + assert environmentVariables.containsKey(HIVE_CHILD_CLIENT_DEBUG_OPTS) + && environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS) != null : HIVE_CHILD_CLIENT_DEBUG_OPTS + + " environment variable must be set when JVM in debug mode"; + String newHadoopClientOpts = hadoopClientOpts.replace( + environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS), + environmentVariables.get(HIVE_CHILD_CLIENT_DEBUG_OPTS)); + environmentVariables.put(HADOOP_CLIENT_OPTS, newHadoopClientOpts); + } else { + // remove from HADOOP_CLIENT_OPTS any debug related options + String newHadoopClientOpts = hadoopClientOpts.replace( + environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS), "").trim(); + if (newHadoopClientOpts.isEmpty()) { + environmentVariables.remove(HADOOP_CLIENT_OPTS); + } else { + environmentVariables.put(HADOOP_CLIENT_OPTS, newHadoopClientOpts); + } + } + // child JVM won't need to change debug parameters when creating it's own children + environmentVariables.remove(HIVE_DEBUG_RECURSIVE); + } + @Override public boolean mapStarted() { boolean b = super.mapStarted(); Index: bin/hive =================================================================== --- bin/hive (revision 1187647) +++ bin/hive (working copy) @@ -42,6 +42,10 @@ HELP=_help shift ;; + --debug*) + DEBUG=$1 + shift + ;; *) break ;; @@ -197,6 +201,16 @@ . $i done +if [ "$DEBUG" ]; then + if [ "$HELP" ]; then + debug_help + exit 0 + else + get_debug_params "$DEBUG" + export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS $HIVE_MAIN_CLIENT_DEBUG_OPTS" + fi +fi + TORUN="" for j in $SERVICE_LIST ; do if [ "$j" = "$SERVICE" ] ; then Index: bin/ext/debug.sh =================================================================== --- bin/ext/debug.sh (revision 0) +++ bin/ext/debug.sh (revision 0) @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +does_jvm_support_ti(){ + version=$( java -version 2>&1 | awk -F '"' '/version/ {print $2}') + if [[ "$version" < "1.5" ]]; then + return 1 + else + return 0 + fi +} + +set_debug_param(){ + OIFS=$IFS + IFS='=' + pair=($1) + case "${pair[0]}" in + recursive) + export HIVE_DEBUG_RECURSIVE="${pair[1]}" + ;; + port) + port="address=${pair[1]}" + ;; + mainSuspend) + main_suspend="suspend=${pair[1]}" + ;; + childSuspend) + child_suspend="suspend=${pair[1]}" + ;; + *) + ;; + esac + IFS=$OIFS; +} + +parse_debug(){ + IFS=':' + read -ra params <<< "$1" + IFS=',' + for param in ${params[1]}; do + set_debug_param "$param" + done + unset IFS +} + +set_debug_defaults(){ + export HIVE_DEBUG_RECURSIVE="y" + port="address=8000" + main_suspend="suspend=y" + child_suspend="suspend=n" +} + +get_debug_params(){ + set_debug_defaults + parse_debug $1 + + # For Debug -XX:+UseParallelGC is needed, as it is a (unfortunately not perfect) + # workaround for JVM 6862295 bug, that affects some JVMs still in use + if does_jvm_support_ti; then + export HIVE_MAIN_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -agentlib:jdwp=transport=dt_socket,server=y,$port,$main_suspend" + export HIVE_CHILD_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -agentlib:jdwp=transport=dt_socket,server=y,$child_suspend" + else + export HIVE_MAIN_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -Xdebug -Xrunjdwp:transport=dt_socket,server=y,$port,$main_suspend" + export HIVE_CHILD_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -Xdebug -Xrunjdwp:transport=dt_socket,server=y,$child_suspend" + fi +} + +debug_help(){ + echo + echo "Allows to debug Hive by connecting to it via JDI API" + echo + echo "Usage: hive --debug[:comma-separated parameters list]" + echo + echo "Parameters:" + echo + echo "recursive= Should child JVMs also be started in debug mode. Default: y" + echo "port= Port on which main JVM listens for debug connection. Default: 8000" + echo "mainSuspend= Should main JVM wait with execution for the debugger to connect. Default: y" + echo "childSuspend= Should child JVMs wait with execution for the debugger to connect. Default: n" + echo +} Index: bin/ext/help.sh =================================================================== --- bin/ext/help.sh (revision 1187647) +++ bin/ext/help.sh (working copy) @@ -28,6 +28,7 @@ echo " HIVE_OPT : Hive options" echo "For help on a particular service:" echo " ./hive --service serviceName --help" + echo "Debug help: ./hive --debug --help" } help_help(){ Index: conf/hive-env.sh.template =================================================================== --- conf/hive-env.sh.template (revision 1187647) +++ conf/hive-env.sh.template (working copy) @@ -12,7 +12,11 @@ # reducing memory usage: # # if [ "$SERVICE" = "cli" ]; then -# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit" +# if [ -z "$DEBUG" ]; then +# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit" +# else +# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit" +# fi # fi # The heap size of the jvm stared by hive shell script can be controlled via: