Index: conf/hive-env.sh.template =================================================================== --- conf/hive-env.sh.template (revision 1183405) +++ conf/hive-env.sh.template (working copy) @@ -12,7 +12,11 @@ # reducing memory usage: # # if [ "$SERVICE" = "cli" ]; then -# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit" +# if [ -z "$DEBUG" ]; then +# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit" +# else +# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit" +# fi # fi # The heap size of the jvm stared by hive shell script can be controlled via: Index: bin/ext/debug.sh =================================================================== --- bin/ext/debug.sh (revision 0) +++ bin/ext/debug.sh (revision 0) @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# When doing any changes to this script, the +# MapRedTask.configureDebugVariablesForChildJVM() possibly should +# be updated too + +does_jvm_support_ti(){ + version=$( java -version 2>&1 | awk -F '"' '/version/ {print $2}') + if [[ "$version" < "1.5" ]]; then + return 1 + else + return 0 + fi +} + +set_debug_param(){ + OIFS=$IFS + IFS='=' + pair=($1) + case "${pair[0]}" in + recursive) + export HADOOP_DEBUG_RECURSIVE="${pair[1]}" + ;; + port) + port="address=${pair[1]}" + ;; + mainSuspend) + main_suspend="suspend=${pair[1]}" + ;; + childSuspend) + export HADOOP_DEBUG_CHILD_JVM_SUSPEND="suspend=${pair[1]}" + ;; + *) + ;; + esac + IFS=$OIFS; +} + +parse_debug(){ + IFS=':' + read -ra params <<< "$1" + IFS=',' + for param in ${params[1]}; do + set_debug_param "$param" + done + unset IFS +} + +set_debug_defaults(){ + export HADOOP_DEBUG_RECURSIVE="y" + port="address=8000" + main_suspend="suspend=y" + export HADOOP_DEBUG_CHILD_JVM_SUSPEND="suspend=n" +} + +get_debug_params(){ + set_debug_defaults + parse_debug $1 + + # For Debug -XX:+UseParallelGC is needed, as it is a (unfortunately not perfect) + # workaround for JVM 6862295 bug, that affects some JVMs still in use + if does_jvm_support_ti; then + export HADOOP_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -agentlib:jdwp=transport=dt_socket,server=y,$port,$main_suspend" + else + export HADOOP_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -Xdebug -Xrunjdwp:transport=dt_socket,server=y,$port,$main_suspend" + fi +} + +debug_help(){ + echo + echo "Allows to debug Hive by connecting to it via JDI API" + echo + echo "Usage: hive --debug[:comma-separated parameters list]" + echo + echo "Parameters:" + echo + echo "recursive= Should child JVMs also be started in debug mode. Default: y" + echo "port= Port on which main JVM listens for debug connection. Default: 8000" + echo "mainSuspend= Should main JVM wait with execution for the debugger to connect. Default: y" + echo "childSuspend= Should child JVMs wait with execution for the debugger to connect. Default: n" + echo +} Index: bin/ext/help.sh =================================================================== --- bin/ext/help.sh (revision 1183405) +++ bin/ext/help.sh (working copy) @@ -28,6 +28,7 @@ echo " HIVE_OPT : Hive options" echo "For help on a particular service:" echo " ./hive --service serviceName --help" + echo "Debug help: ./hive --debug --help" } help_help(){ Index: bin/hive =================================================================== --- bin/hive (revision 1183405) +++ bin/hive (working copy) @@ -42,6 +42,10 @@ HELP=_help shift ;; + --debug*) + DEBUG=$1 + shift + ;; *) break ;; @@ -197,6 +201,17 @@ . $i done +if [ "$DEBUG" ]; then + if [ "$HELP" ]; then + debug_help + exit 0 + else + get_debug_params "$DEBUG" + export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS $HADOOP_CLIENT_DEBUG_OPTS" + unset HADOOP_CLIENT_DEBUG_OPTS + fi +fi + TORUN="" for j in $SERVICE_LIST ; do if [ "$j" = "$SERVICE" ] ; then Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (revision 1183405) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (working copy) @@ -25,6 +25,8 @@ import java.util.HashMap; import java.util.Map; import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.ContentSummary; @@ -55,6 +57,9 @@ static final String HADOOP_MEM_KEY = "HADOOP_HEAPSIZE"; static final String HADOOP_OPTS_KEY = "HADOOP_OPTS"; + static final String HADOOP_CLIENT_OPTS = "HADOOP_CLIENT_OPTS"; + static final String HADOOP_DEBUG_RECURSIVE = "HADOOP_DEBUG_RECURSIVE"; + static final String HADOOP_DEBUG_CHILD_JVM_SUSPEND = "HADOOP_DEBUG_CHILD_JVM_SUSPEND"; static final String[] HIVE_SYS_PROP = {"build.dir", "build.dir.hive"}; private transient ContentSummary inputSummary = null; @@ -247,6 +252,11 @@ } else { variables.put(HADOOP_OPTS_KEY, hadoopOpts); } + + if(variables.containsKey(HADOOP_DEBUG_RECURSIVE)) { + configureDebugVariablesForChildJVM(variables); + } + env = new String[variables.size()]; int pos = 0; for (Map.Entry entry : variables.entrySet()) { @@ -294,6 +304,42 @@ } } + private void configureDebugVariablesForChildJVM(Map environmentVariables) { + if (!environmentVariables.containsKey(HADOOP_CLIENT_OPTS)) { + return; + } + String hadoopClientOpts = environmentVariables.get(HADOOP_CLIENT_OPTS); + + if (environmentVariables.get(HADOOP_DEBUG_RECURSIVE).equals("y")) { + // swap some debug options in HADOOP_CLIENT_OPTS to those that the child JVM should have + Pattern p = Pattern.compile(" (-agentlib:jdwp=|-Xrunjdwp:)\\S*"); + Matcher m = p.matcher(hadoopClientOpts); + m.find(); + + String childSuspend; + if (environmentVariables.containsKey(HADOOP_DEBUG_CHILD_JVM_SUSPEND)) { + childSuspend = environmentVariables.get(HADOOP_DEBUG_CHILD_JVM_SUSPEND); + } else { + childSuspend = "suspend=n"; + } + + String newHadoopClientOpts = hadoopClientOpts.replaceAll(m.group(), + m.group().replaceAll("(,|)address=[0-9]*", "").replaceAll("suspend=.", childSuspend)); + environmentVariables.put(HADOOP_CLIENT_OPTS, newHadoopClientOpts); + + } else { + // remove from HADOOP_CLIENT_OPTS any debug related options + String newHadoopClientOpts = hadoopClientOpts.replaceAll( + "-XX:\\+UseParallelGC (-Xdebug )?\\S*", "") + .trim(); + if (newHadoopClientOpts.isEmpty()) { + environmentVariables.remove(HADOOP_CLIENT_OPTS); + } else { + environmentVariables.put(HADOOP_CLIENT_OPTS, newHadoopClientOpts); + } + } + } + @Override public boolean mapStarted() { boolean b = super.mapStarted();