diff --git hcatalog/src/test/e2e/templeton/README.txt hcatalog/src/test/e2e/templeton/README.txt index d272985..64370eb 100644 --- hcatalog/src/test/e2e/templeton/README.txt +++ hcatalog/src/test/e2e/templeton/README.txt @@ -223,3 +223,14 @@ enough map slots (10?) (mapred.tasktracker.map.tasks.maximum), otherwise test pa Adding Tests ------------ ToDo: add some guidelines + +Running on Tez +1. set up Tez as in http://tez.apache.org/install.html +2. set hive.execution.engine=tez in hive-site.xml (actually is this needed?) +3. add hive.execution.engine=tez to templeton.hive.properties in webhcat-site.xml +4. add to mapred-env.sh/yarn-env.sh (as you defined these in step 1) +export TEZ_VERSION=0.5.3 +export TEZ_JARS=/Users/ekoifman/dev/apache-tez-client-${TEZ_VERSION} +export TEZ_CONF_DIR=${TEZ_JARS}/conf +export HADOOP_CLASSPATH=${TEZ_CONF_DIR}:${TEZ_JARS}/*:${TEZ_JARS}/lib/*:${HADOOP_CLASSPATH} +(w/o this you'll see something like "java.lang.NoClassDefFoundError: org/apache/tez/dag/api/SessionNotRunning") diff --git hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh index 8a4621f..4bff935 100755 --- hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh +++ hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh @@ -48,5 +48,17 @@ ${HADOOP_HOME}/bin/hadoop fs -put ${PIG_PIGGYBANK_PATH} webhcate2e/ ${HADOOP_HOME}/bin/hadoop fs -put /Users/ekoifman/dev/sqoop-1.4.5.bin__hadoop-2.0.4-alpha.tar.gz /apps/templeton/sqoop-1.4.5.bin__hadoop-2.0.4-alpha.tar.gz ${HADOOP_HOME}/bin/hadoop fs -put /Users/ekoifman/dev/mysql-connector-java-5.1.30/mysql-connector-java-5.1.30-bin.jar /apps/templeton/jdbc/mysql-connector-java.jar + +#Tez set up (http://tez.apache.org/install.html) +#if not using Tez - ignore this +${HADOOP_HOME}/bin/hdfs dfs -put /Users/ekoifman/dev/apache-tez-${TEZ_VERSION}-src/tez-dist/target/tez-${TEZ_VERSION}.tar.gz /apps/tez-${TEZ_VERSION}.tar.gz +${HADOOP_HOME}/bin/hdfs dfs -mkdir /tmp/tezin +${HADOOP_HOME}/bin/hdfs dfs -mkdir /tmp/tezout +${HADOOP_HOME}/bin/hdfs dfs -put /Users/ekoifman/dev/hive/build.sh /tmp/tezin +#Above line is for Sanity Check: this is to run #6 in http://tez.apache.org/install.html +#$HADOOP_HOME/bin/hadoop jar tez-examples-0.5.3.jar orderedwordcount /tmp/tezin /tmp/tezout + + + #check what got deployed -${HADOOP_HOME}/bin/hdfs dfs -ls -R /apps/templeton webhcate2e /user/templeton /user/hive/warehouse +${HADOOP_HOME}/bin/hdfs dfs -ls -R /apps webhcate2e /user/templeton /user/hive/warehouse diff --git hcatalog/src/test/e2e/templeton/deployers/env.sh hcatalog/src/test/e2e/templeton/deployers/env.sh index 4a5f286..e2dd952 100755 --- hcatalog/src/test/e2e/templeton/deployers/env.sh +++ hcatalog/src/test/e2e/templeton/deployers/env.sh @@ -22,14 +22,29 @@ # define necessary env vars here and source it in other files -export HADOOP_VERSION=2.4.1-SNAPSHOT -#export HIVE_VERSION=0.14.0-SNAPSHOT -export PIG_VERSION=0.12.2-SNAPSHOT +echo ${HADOOP_VERSION}; + +if [ -z ${HADOOP_VERSION} ]; then + export HADOOP_VERSION=2.4.1-SNAPSHOT +fi + +if [ -z ${HIVE_VERSION} ]; then + export HIVE_VERSION=0.14.0-SNAPSHOT +fi + +if [ -z ${PIG_VERSION} ]; then + export PIG_VERSION=0.12.2-SNAPSHOT +fi #Root of project source tree -export PROJ_HOME=/Users/${USER}/dev/hive +if [ -z ${PROJ_HOME} ]; then + export PROJ_HOME=/Users/${USER}/dev/hive +fi export HIVE_HOME=${PROJ_HOME}/packaging/target/apache-hive-${HIVE_VERSION}-bin/apache-hive-${HIVE_VERSION}-bin -export HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION} + +if [ -z ${HADOOP_HOME} ]; then + export HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION} +fi #Make sure Pig is built for the Hadoop version you are running export PIG_TAR_PATH=/Users/${USER}/dev/pig-${PIG_VERSION}-src/build diff --git hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm index 1c61bcc..053723a 100644 --- hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm +++ hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm @@ -878,7 +878,7 @@ sub compare if (defined($testCmd->{'check_job_percent_complete'})) { my $pcValue = $res_hash->{'percentComplete'}; my $expectedPercentComplete = $testCmd->{'check_job_percent_complete'}; - if ( (!defined $pcValue) || $pcValue ne $expectedPercentComplete ) { + if ( (!defined $pcValue) || $pcValue !~ m/$expectedPercentComplete/ ) { print $log "check_job_percent_complete failed. got percentComplete $pcValue, expected $expectedPercentComplete"; $result = 0; } diff --git hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf index f04ea31..d4c45d5 100644 --- hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf +++ hcatalog/src/test/e2e/templeton/tests/jobsubmission.conf @@ -405,7 +405,7 @@ $cfg = { #test select a,b - 'num' => 7, + 'num' => 7,#seems to be the same as test 6 except for percent_complete check 'method' => 'POST', 'url' => ':TEMPLETON_URL:/templeton/v1/hive', 'post_options' => ['user.name=:UNAME:','execute=select count(*) from mynums', ], @@ -414,7 +414,7 @@ $cfg = 'status_code' => 200, 'check_job_created' => 1, 'check_job_complete' => 'SUCCESS', - 'check_job_percent_complete' => 'map 100% reduce 100%', + 'check_job_percent_complete' => 'map 100% reduce 100%|100% complete', 'check_job_exit_value' => 0, 'check_call_back' => 1, diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java index fb4b8e7..a7c6137 100644 --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java @@ -42,6 +42,8 @@ import javax.ws.rs.core.UriBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -57,6 +59,8 @@ * General utility methods. */ public class TempletonUtils { + private static final Log LOG = LogFactory.getLog(TempletonUtils.class); + /** * Is the object non-empty? */ @@ -98,6 +102,24 @@ public static boolean isset(char ch) { public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% complete$"); //looking for map = 100%, reduce = 100% public static final Pattern HIVE_COMPLETE = Pattern.compile(" map = (\\d+%),\\s+reduce = (\\d+%).*$"); + /** + * Hive on Tez produces progress report that looks like this + * Map 1: -/- Reducer 2: 0/1 + * Map 1: -/- Reducer 2: 0(+1)/1 + * Map 1: -/- Reducer 2: 1/1 + * + * -/- means there are no tasks (yet) + * 0/1 means 1 total tasks, 0 completed + * 1(+2)/3 means 3 total, 1 completed and 2 running + * + * HIVE-8495, in particular https://issues.apache.org/jira/secure/attachment/12675504/Screen%20Shot%202014-10-16%20at%209.35.26%20PM.png + * has more examples. + * To report progress, we'll assume all tasks are equal size and compute "completed" as percent of "total" + * "(Map|Reducer) (\\d+:) ((-/-)|(\\d+(\\(\\+\\d+\\))?/\\d+))" is the complete pattern but we'll drop "-/-" to exclude + * groups that don't add information such as "Map 1: -/-" + */ + public static final Pattern TEZ_COMPLETE = Pattern.compile("(Map|Reducer) (\\d+:) (\\d+(\\(\\+\\d+\\))?/\\d+)"); + public static final Pattern TEZ_COUNTERS = Pattern.compile("\\d+"); /** * Extract the percent complete line from Pig or Jar jobs. @@ -115,6 +137,31 @@ public static String extractPercentComplete(String line) { if(hive.find()) { return "map " + hive.group(1) + " reduce " + hive.group(2); } + Matcher tez = TEZ_COMPLETE.matcher(line); + if(tez.find()) { + int totalTasks = 0; + int completedTasks = 0; + do { + //here each group looks something like "Map 2: 2/4" "Reducer 3: 1(+2)/4" + //just parse the numbers and ignore one from "Map 2" and from "(+2)" if it's there + Matcher counts = TEZ_COUNTERS.matcher(tez.group()); + List items = new ArrayList(4); + while(counts.find()) { + items.add(counts.group()); + } + completedTasks += Integer.parseInt(items.get(1)); + if(items.size() == 3) { + totalTasks += Integer.parseInt(items.get(2)); + } + else { + totalTasks += Integer.parseInt(items.get(3)); + } + } while(tez.find()); + if(totalTasks == 0) { + return "0% complete (0 total tasks)"; + } + return completedTasks * 100 / totalTasks + "% complete"; + } return null; }