From 88eae4e06ac8b72b09d01ad633fa717db20a06f3 Mon Sep 17 00:00:00 2001 From: asreekumar Date: Thu, 20 Nov 2014 11:15:49 -0800 Subject: [PATCH] Add E2E test for templeton.mapper.memory.mb --- hcatalog/src/test/e2e/templeton/README.txt | 13 +++ hcatalog/src/test/e2e/templeton/build.xml | 29 +++++ .../config/webhcat/webhcat-site.updateConfig.xml | 118 +++++++++++++++++++++ .../templeton/deployers/deploy_e2e_artifacts.sh | 3 + hcatalog/src/test/e2e/templeton/deployers/env.sh | 1 + .../templeton/deployers/modify_webhcat_config.sh | 40 +++++++ .../templeton/deployers/restore_webhcat_config.sh | 36 +++++++ .../e2e/templeton/tests/modifyConfiguration.conf | 67 ++++++++++++ 8 files changed, 307 insertions(+) create mode 100644 hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.updateConfig.xml create mode 100755 hcatalog/src/test/e2e/templeton/deployers/modify_webhcat_config.sh create mode 100755 hcatalog/src/test/e2e/templeton/deployers/restore_webhcat_config.sh create mode 100644 hcatalog/src/test/e2e/templeton/tests/modifyConfiguration.conf diff --git a/hcatalog/src/test/e2e/templeton/README.txt b/hcatalog/src/test/e2e/templeton/README.txt index d272985..ed6d2cc 100644 --- a/hcatalog/src/test/e2e/templeton/README.txt +++ b/hcatalog/src/test/e2e/templeton/README.txt @@ -205,6 +205,19 @@ Here is the schema of the table writen in MySQL: To prevent primary key violation and sqoop import directory conflict, make sure the "PERSON" table is empty and the folder hdfs://hostname:8020/sqoopoutputdir doesn't exist before running the test. +Running updateConfig tests +-------------------------- +ant test-updateConfig -Dinpdir.hdfs= -Dtest.user.name= \ + -Dsecure.mode= -Dharness.webhdfs.url= -Dharness.templeton.url= + +This test suite is trying to verify the use of property templeton.mapper.memory.mb in webhcat-site.xml. +For this, an attempt is made to load data of size greater than 100MB, from one hive table to another hive table, +with the templeton.mapper.memory.mb set to a very low value. This is a negative test case that expects the failure of map job +due to insufficient memory. + +For running this test suite templeton.mapper.memory.mb property should be set to 0.01 in webhcat-site.xml. This could be done by +running modify_webhcat_config.sh in deployers/. Once the test run finishes, the change could be reverted by running restore_webhcat_config.sh + Notes ----- It's best to set HADOOP_HOME_WARN_SUPPRESS=true everywhere you can. diff --git a/hcatalog/src/test/e2e/templeton/build.xml b/hcatalog/src/test/e2e/templeton/build.xml index ee98a99..eefbd7a 100644 --- a/hcatalog/src/test/e2e/templeton/build.xml +++ b/hcatalog/src/test/e2e/templeton/build.xml @@ -197,6 +197,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.updateConfig.xml b/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.updateConfig.xml new file mode 100644 index 0000000..54040f7 --- /dev/null +++ b/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.updateConfig.xml @@ -0,0 +1,118 @@ + + + + + + + + + + + + + templeton.hcat + ${env.HCAT_PREFIX}/bin/hcat + The path to the hcatalog executable. + + + + templeton.libjars + ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.5.jar + Jars to add to the classpath. + + + + templeton.pig.archive + hdfs:///apps/templeton/pig-${env.PIG_VERSION}.tar.gz + The path to the Pig archive. + + + templeton.pig.path + pig-${env.PIG_VERSION}.tar.gz/pig-${env.PIG_VERSION}/bin/pig + The path to the Pig executable. + + + templeton.hive.archive + hdfs:///apps/templeton/apache-hive-${env.HIVE_VERSION}-bin.tar.gz + The path to the Hive archive. + + + + templeton.hive.path + apache-hive-${env.HIVE_VERSION}-bin.tar.gz/apache-hive-${env.HIVE_VERSION}-bin/bin/hive + The path to the Hive executable. + + + + templeton.hive.home + apache-hive-${env.HIVE_VERSION}-bin.tar.gz/apache-hive-${env.HIVE_VERSION}-bin + The path to the Hive home within the tar. This is needed if + Hive is not installed on all nodes in the cluster and needs to be + shipped to the target node in the cluster to execute Pig job which uses + HCat, Hive query, etc. + + + templeton.hcat.home + apache-hive-${env.HIVE_VERSION}-bin.tar.gz/apache-hive-${env.HIVE_VERSION}-bin/hcatalog + The path to the HCat home within the tar. This is needed if + Hive is not installed on all nodes in the cluster and needs to be + shipped to the target node in the cluster to execute Pig job which uses + HCat, Hive query, etc. + + + templeton.sqoop.archive + hdfs:///apps/templeton/sqoop-1.4.5.bin__hadoop-2.0.4-alpha.tar.gz + This should point to Sqoop tar that will be shipped to target node executing + the actual sqoop command. If not set, Sqoop is expected to be installed on every node of the + cluster. + + + templeton.sqoop.path + sqoop-1.4.5.bin__hadoop-2.0.4-alpha.tar.gz/sqoop-1.4.5.bin__hadoop-2.0.4-alpha/bin/sqoop + The path to the Sqoop executable. + + + templeton.sqoop.home + sqoop-1.4.5.bin__hadoop-2.0.4-alpha.tar.gz/sqoop-1.4.5.bin__hadoop-2.0.4-alpha + The path to the Sqoop home in the exploded archive. + + + + templeton.controller.mr.child.opts + -Xmx64m -Djava.net.preferIPv4Stack=true + + + + + templeton.hive.properties + hive.metastore.uris=thrift://localhost:9933,hive.metastore.sasl.enabled=false + + + templeton.mapper.memory.mb + 0.01 + Templeton controller job's Launch mapper's memory limit in + megabytes. When submitting controller job, Templeton will overwrite + mapreduce.map.memory.mb with this value. If empty, Templeton will + not set mapreduce.map.memory.mb when submitting the controller job, + therefore the configuration in mapred-site.xml will be used. + + + diff --git a/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh b/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh index 8a4621f..2999f55 100755 --- a/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh +++ b/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh @@ -25,6 +25,9 @@ source ./env.sh echo "Deploying artifacts to HDFS..." +curl -O http://files.grouplens.org/datasets/movielens/ml-1m.zip +unzip ml-1m.zip +mv ml-1m/ratings.dat ${PROJ_HOME}/hcatalog/src/test/e2e/templeton/inpdir/ratings.txt ${HADOOP_HOME}/bin/hdfs dfs -put ${PROJ_HOME}/hcatalog/src/test/e2e/templeton/inpdir/ webhcate2e #For hadoop1 we copy the same file with 2 names #$HADOOP_HOME/bin/hadoop fs -put hadoop-examples-1.2.1.jar webhcate2e/hexamples.jar diff --git a/hcatalog/src/test/e2e/templeton/deployers/env.sh b/hcatalog/src/test/e2e/templeton/deployers/env.sh index 4a5f286..2656b0e 100755 --- a/hcatalog/src/test/e2e/templeton/deployers/env.sh +++ b/hcatalog/src/test/e2e/templeton/deployers/env.sh @@ -40,3 +40,4 @@ export WEBHCAT_LOG_DIR=/tmp/webhcat_e2e/logs export WEBHCAT_PID_DIR=${WEBHCAT_LOG_DIR} #config/hive/hive-site.xml should match this path - it doesn't understand env vars export METASTORE_DB=${WEBHCAT_LOG_DIR}/wehcat_e2e_metastore_db +export CONF_BACKUP=/Users/${USER}/tmp \ No newline at end of file diff --git a/hcatalog/src/test/e2e/templeton/deployers/modify_webhcat_config.sh b/hcatalog/src/test/e2e/templeton/deployers/modify_webhcat_config.sh new file mode 100755 index 0000000..d46d210 --- /dev/null +++ b/hcatalog/src/test/e2e/templeton/deployers/modify_webhcat_config.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +#This script stops webhcat server, copies pre-canned webhcat-site.xml file required for updateConfig test suite to the webhcat config directory, +#then starts the webhcat server + + +source ./env.sh + +echo "Stopping WebHCat..."; +${HIVE_HOME}/hcatalog/sbin/webhcat_server.sh stop; + +if [ ${HIVE_HOME}/hcatalog/etc/webhcat/webhcat-site.xml ]; then + mkdir -p ${CONF_BACKUP}; + cp ${HIVE_HOME}/hcatalog/etc/webhcat/webhcat-site.xml ${CONF_BACKUP}/webhcat-site.xml; +fi + +cp ${PROJ_HOME}/hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.updateConfig.xml ${HIVE_HOME}/hcatalog/etc/webhcat/webhcat-site.xml + +echo "Starting WebHCat..." +${HIVE_HOME}/hcatalog/sbin/webhcat_server.sh start + +jps; diff --git a/hcatalog/src/test/e2e/templeton/deployers/restore_webhcat_config.sh b/hcatalog/src/test/e2e/templeton/deployers/restore_webhcat_config.sh new file mode 100755 index 0000000..5536545 --- /dev/null +++ b/hcatalog/src/test/e2e/templeton/deployers/restore_webhcat_config.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +#This script stops webhcat server and restores the webhcat server configurations to the one before test run + + +source ./env.sh + +echo "Stopping WebHCat..."; +${HIVE_HOME}/hcatalog/sbin/webhcat_server.sh stop; + +rm ${HIVE_HOME}/hcatalog/etc/webhcat/webhcat-site.xml + +if [ ${CONF_BACKUP}/webhcat-site.xml ]; then + cp ${CONF_BACKUP}/webhcat-site.xml ${HIVE_HOME}/hcatalog/etc/webhcat/webhcat-site.xml +fi + +rm -rf ${CONF_BACKUP}; + diff --git a/hcatalog/src/test/e2e/templeton/tests/modifyConfiguration.conf b/hcatalog/src/test/e2e/templeton/tests/modifyConfiguration.conf new file mode 100644 index 0000000..586b3da --- /dev/null +++ b/hcatalog/src/test/e2e/templeton/tests/modifyConfiguration.conf @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################################### +# curl command tests for templeton +# +# + +#use Yahoo::Miners::Test::PigSetup; + +#PigSetup::setup(); + +#my $me = `whoami`; +#chomp $me; +$cfg = +{ + 'driver' => 'Curl', + + 'groups' => + [ +##============================================================================================================= + { + 'name' => 'ModifyWebhcatConfig', + 'tests' => + [ + { + #test to verify templeton.mapper.memory.mb update + 'num' => 1, + 'setup' => [ + { + 'method' => 'POST', + 'url' => ':TEMPLETON_URL:/templeton/v1/ddl', + 'status_code' => 200, + 'post_options' => ['user.name=:UNAME:','exec=drop table if exists ratings;CREATE EXTERNAL TABLE ratings(userid INT, movieid INT, ratings FLOAT, unixtime STRING) + ROW FORMAT DELIMITED FIELDS TERMINATED BY \':\' location \':INPDIR_HDFS:/ratings/\';drop table if exists testtable;create table testtable + (userid INT, movieid INT, ratings FLOAT, unixtime STRING) location \':INPDIR_HDFS:/testtable/\';'], + 'json_field_substr_match' => {'stderr' => 'OK'} + } + ], + 'method' => 'POST', + 'url' => ':TEMPLETON_URL:/templeton/v1/hive', + 'post_options' => ['user.name=:UNAME:','execute=INSERT OVERWRITE TABLE testtable SELECT * from ratings','statusdir=:OUTDIR:/status'], + 'json_field_substr_match' => { 'id' => '\d+'}, + #results + 'status_code' => 200, + 'check_job_created' => 1, + 'check_job_complete' => 'FAILURE', + } + ] + } + ] +}, + ; \ No newline at end of file -- 1.8.5.2 (Apple Git-48)