Index: build-common.xml =================================================================== --- build-common.xml (revision 1325085) +++ build-common.xml (working copy) @@ -30,23 +30,10 @@ - - - - - - - - - - - - - - - + + - + Index: scripts/hcat_server_install.sh =================================================================== --- scripts/hcat_server_install.sh (revision 1325085) +++ scripts/hcat_server_install.sh (working copy) @@ -108,7 +108,7 @@ # Create the needed directories in root #for dir in var conf var/log bin lib ; do -for dir in var var/log bin etc libexec sbin share src; do +for dir in var var/log bin etc libexec sbin share ; do if [ ! -d $root/$dir ] ; then mkdir $root/$dir fi @@ -117,7 +117,7 @@ # Move files into the appropriate directories if [ "$alternate_root" == "y" ] ; then echo Installing into [$root] - for dir in bin etc libexec sbin share src ; do + for dir in bin etc libexec sbin share ; do for file in ./$dir/* ; do cp -R $file $root/$dir done @@ -136,11 +136,11 @@ #done # Move the proto-hive-site.xml to hive-site.xml -cp $root/etc/hcatalog/proto-hive-site.xml $root/etc/hcatalog/hive-site.xml +#cp $root/etc/hcatalog/proto-hive-site.xml $root/etc/hcatalog/hive-site.xml # Set permissions on hive-site.xml to 700, since it will contain the password to the # database -chmod 700 $root/etc/hcatalog/hive-site.xml +#chmod 700 $root/etc/hcatalog/hive-site.xml # Write out an environment file so that the start file can use it later cat > $root/etc/hcatalog/hcat-env.sh < - - Index: src/test/e2e/hcatalog/conf/existing_deployer.conf =================================================================== --- src/test/e2e/hcatalog/conf/existing_deployer.conf (revision 1325085) +++ src/test/e2e/hcatalog/conf/existing_deployer.conf (working copy) @@ -26,7 +26,7 @@ # hadoop values 'hadoopdir' => $ENV{'PH_CLUSTER'}, - 'hcat_data_dir' => ("$ENV{'PH_HDFS_BASE'}" || '/user/hcat').'/test/data', + 'hcat_data_dir' => ("$ENV{'PH_HDFS_BASE'}" || '/user/hcat').'/tests/data', # db values # 'dbuser' => 'pigtester', Index: src/test/e2e/hcatalog/conf/rpm.conf =================================================================== --- src/test/e2e/hcatalog/conf/rpm.conf (revision 0) +++ src/test/e2e/hcatalog/conf/rpm.conf (revision 0) @@ -0,0 +1,84 @@ +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +my $me = `whoami`; +chomp $me; + +# The contents of this file can be rewritten to fit your installation. +# Also, you can define the following environment variables and set things up as in the test setup +# PH_CLUSTER Root directory for cluster being used +# HCAT_ROOT Root directory for hcat version being used + +my $hdfsBase = $ENV{'PH_HDFS_BASE'} || "/user/hcat"; + +$ENV{'HCAT_HOME'} = "/usr/lib/hcatalog"; +$ENV{'HADOOP_HOME'} = "/usr/lib/hadoop"; +$ENV{'HIVE_HOME'} = "/usr/lib/hive"; +$ENV{'HBASE_HOME'} = "/usr/lib/hbase"; +$ENV{'PIG_HOME'} = "/usr/lib/pig"; + +$cfg = { + #HDFS + 'inpathbase' => "$hdfsBase/tests/data" + , 'outpathbase' => "$hdfsBase/out" + + #LOCAL + , 'localinpathbase' => "$ENV{HARNESS_ROOT}/in" + , 'localoutpathbase' => "$ENV{HARNESS_ROOT}/out/log" + , 'localxmlpathbase' => "$ENV{HARNESS_ROOT}/out/xml" + , 'localpathbase' => "$ENV{HARNESS_ROOT}/out/pigtest/$me" + + #TEST + , 'benchmarkPath' => "$ENV{HARNESS_ROOT}/benchmarks" + , 'scriptPath' => "$ENV{HARNESS_ROOT}/libexec" + , 'tmpPath' => "/tmp/pigtest" + , 'jythonjar' => "$ENV{PH_JYTHON_JAR}" + , 'propertiesFile' => "./conf/testpropertiesfile.conf" + , 'funcjarPath' => "$ENV{HARNESS_ROOT}/lib/java" + + #TESTDB + , 'dbuser' => "$ENV{'PH_DBUSER'}" || 'hcattest' + , 'dbhost' => "$ENV{'PH_DBHOST'}" || 'localhost' + , 'dbpasswd' => "$ENV{'PH_DBPASSWD'}" || 'hcattest' + , 'dbdb' => "$ENV{'PH_DBDB'}" || 'hcattestdb' + + #HCAT + , 'hcathome' => $ENV{'HCAT_HOME'} + , 'hcatshare' => "$ENV{'HCAT_HOME'}/share/hcatalog" + , 'hcatlib' => "$ENV{'HCAT_HOME'}/lib" + , 'hcatconf' => "$ENV{'HCAT_HOME'}/conf" + , 'hcatbin' => "$ENV{'HCAT_HOME'}/bin/hcat" + + #PIG + , 'pighome' => $ENV{'PIG_HOME'} + , 'pigbin' => "/usr/bin/pig" + + #HADOOP + , 'hadoopconfdir' => "$ENV{'HADOOP_HOME'}/conf" + , 'hadoopbin' => "/usr/bin/hadoop" + + #HIVE + , 'hivehome' => $ENV{'HIVE_HOME'} + , 'hivelib' => "$ENV{'HIVE_HOME'}/lib" + , 'hivebin' => "/usr/bin/hive" + , 'hiveconf' => "$ENV{'HIVE_HOME'}/conf" + + #HBASE + , 'hbaseconf' => "$ENV{'HBASE_HOME'}/conf" + , 'hbaselibdir' => "$ENV{'HBASE_HOME'}/" + , 'zklibdir' => "$ENV{'HBASE_HOME'}/lib" + +}; Index: src/test/e2e/hcatalog/conf/default.conf =================================================================== --- src/test/e2e/hcatalog/conf/default.conf (revision 1325085) +++ src/test/e2e/hcatalog/conf/default.conf (working copy) @@ -19,9 +19,6 @@ # The contents of this file can be rewritten to fit your installation. # Also, you can define the following environment variables and set things up as in the test setup -# PH_ROOT Root directory where test harness is installed -# PH_LOCAL Root directory for input and output for local mode tests -# PH_OUT Root directory where output data will be stored (on local disk, not HDFS) # PH_CLUSTER Root directory for cluster being used # HCAT_ROOT Root directory for hcat version being used @@ -39,10 +36,12 @@ , 'localpathbase' => "$ENV{PH_LOCAL}/out/pigtest/$me" #TEST - , 'benchmarkPath' => "$ENV{PH_OUT}/benchmarks" - , 'scriptPath' => "$ENV{PH_ROOT}/libexec" + , 'benchmarkPath' => "$ENV{HARNESS_ROOT}/benchmarks" + , 'scriptPath' => "$ENV{HARNESS_ROOT}/libexec" , 'tmpPath' => "/tmp/pigtest" , 'jythonjar' => "$ENV{PH_JYTHON_JAR}" + , 'propertiesFile' => "./conf/testpropertiesfile.conf" + , 'funcjarPath' => "$ENV{HARNESS_ROOT}/lib/java" #TESTDB , 'dbuser' => "$ENV{'PH_DBUSER'}" || 'hcattest' @@ -50,38 +49,30 @@ , 'dbpasswd' => "$ENV{'PH_DBPASSWD'}" || 'hcattest' , 'dbdb' => "$ENV{'PH_DBDB'}" || 'hcattestdb' - #COMMON - , 'metastore.principal' => "$ENV{METASTORE_PRINCIPAL}" - , 'metastore_thrift' => $ENV{'PH_METASTORE_THRIFT'} - , 'thriftserver' => "$ENV{HCAT_URL}" - #HCAT - , 'hcat_data_dir' => '/user/hcat/tests/data' - , 'hivehome' => $ENV{'PH_HIVE_HOME'} - , 'hcathome' => $ENV{'HCAT_INSTALL_DIR'} - , 'hcatalog.jar' => $ENV{'HADOOP_LIBJARS'} + , 'hcathome' => $ENV{'HCAT_HOME'} + , 'hcatshare' => "$ENV{'HCAT_HOME'}/share/hcatalog" + , 'hcatlib' => "$ENV{'HCAT_HOME'}/share/hcatalog/lib" + , 'hcatconf' => "$ENV{'HCAT_HOME'}/etc/hcatalog" + , 'hcatbin' => "$ENV{'HCAT_HOME'}/bin/hcat" + #PIG - , 'testconfigpath' => "$ENV{PH_CLUSTER}" - , 'hadoopbin' => "$ENV{PH_CLUSTER_BIN}" - , 'funcjarPath' => "$ENV{PH_ROOT}/lib/java" - , 'paramPath' => "$ENV{PH_ROOT}/paramfiles" - , 'pigpath' => "$ENV{PIG_HOME}" - , 'oldpigpath' => "$ENV{PH_OLDPIG}" - , 'additionaljars' => "$ENV{PIG_ADDITIONAL_JARS}" + , 'pighome' => $ENV{'PIG_HOME'} + , 'pigbin' => "$ENV{'PIG_HOME'}/bin/pig" #HADOOP - , 'UNUSEDhadoopHome' => "$ENV{HCAT_ROOT}/lib" - , 'userhomePath' => "$ENV{HOME}" - , 'local.bin' => '/usr/bin' - , 'logDir' => "$ENV{PH_OUT}/log" - , 'propertiesFile' => "./conf/testpropertiesfile.conf" - , 'harness.console.level' => 'ERROR' + , 'hadoopconfdir' => "$ENV{'HADOOP_HOME'}/conf" + , 'hadoopbin' => "$ENV{'HADOOP_HOME'}/bin/hadoop" #HIVE - , 'hive_bin_location' => "$ENV{HIVE_ROOT}/build/dist/bin" - , 'hbaseconfigpath' => "$ENV{HBASE_CONF_DIR}" - , 'hivehome' => "$ENV{HIVE_HOME}" - , 'hive.additionaljars' => "$ENV{HCAT_JARS}" - , 'hive.conf.dir' => "$ENV{HIVE_CONF_DIR}" || "$ENV{'HCAT_INSTALL_DIR'}/etc/hcatalog" + , 'hivehome' => $ENV{'HIVE_HOME'} + , 'hivelib' => "$ENV{'HIVE_HOME'}/lib" + , 'hivebin' => "$ENV{'HIVE_HOME'}/bin/hive" + , 'hiveconf' => "$ENV{'HIVE_HOME'}/conf" + #HBASE + , 'hbaseconf' => "$ENV{'HBASE_HOME'}/conf" + , 'hbaselibdir' => "$ENV{'HBASE_HOME'}/" + , 'zklibdir' => "$ENV{'HBASE_HOME'}/lib" + }; Index: src/test/e2e/hcatalog/conf/envbased.conf =================================================================== --- src/test/e2e/hcatalog/conf/envbased.conf (revision 0) +++ src/test/e2e/hcatalog/conf/envbased.conf (revision 0) @@ -0,0 +1,78 @@ +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +my $me = `whoami`; +chomp $me; + +# The contents of this file can be rewritten to fit your installation. +# Also, you can define the following environment variables and set things up as in the test setup +# PH_CLUSTER Root directory for cluster being used +# HCAT_ROOT Root directory for hcat version being used + +my $hdfsBase = $ENV{'PH_HDFS_BASE'} || "/user/hcat"; + +$cfg = { + #HDFS + 'inpathbase' => "$hdfsBase/tests/data" + , 'outpathbase' => "$hdfsBase/out" + + #LOCAL + , 'localinpathbase' => "$ENV{HARNESS_ROOT}/in" + , 'localoutpathbase' => "$ENV{HARNESS_ROOT}/out/log" + , 'localxmlpathbase' => "$ENV{HARNESS_ROOT}/out/xml" + , 'localpathbase' => "$ENV{HARNESS_ROOT}/out/pigtest/$me" + + #TEST + , 'benchmarkPath' => "$ENV{HARNESS_ROOT}/benchmarks" + , 'scriptPath' => "$ENV{HARNESS_ROOT}/libexec" + , 'tmpPath' => "/tmp/pigtest" + , 'jythonjar' => "$ENV{PH_JYTHON_JAR}" + , 'propertiesFile' => "./conf/testpropertiesfile.conf" + , 'funcjarPath' => "$ENV{HARNESS_ROOT}/lib/java" + + #TESTDB + , 'dbuser' => "$ENV{'PH_DBUSER'}" || 'hcattest' + , 'dbhost' => "$ENV{'PH_DBHOST'}" || 'localhost' + , 'dbpasswd' => "$ENV{'PH_DBPASSWD'}" || 'hcattest' + , 'dbdb' => "$ENV{'PH_DBDB'}" || 'hcattestdb' + + #HCAT + , 'hcathome' => $ENV{'HCAT_HOME'} + , 'hcatshare' => $ENV{'HCAT_SHARE_DIR'} + , 'hcatlib' => $ENV{'HCAT_LIB_DIR'} + , 'hcatconf' => $ENV{'HCAT_CONF_DIR'} + , 'hcatbin' => $ENV{'HCAT_CMD'} + + #PIG + , 'pighome' => $ENV{'PIG_HOME'} + , 'pigbin' => "$ENV{'PIG_HOME'}/bin/pig" + + #HADOOP + , 'hadoopconfdir' => $ENV{'HADOOP_CONF_DIR'} + , 'hadoopbin' => "$ENV{'HADOOP_HOME'}/bin/hadoop" + + #HIVE + , 'hivehome' => $ENV{'HIVE_HOME'} + , 'hivelib' => "$ENV{'HIVE_HOME'}/lib" + , 'hivebin' => $ENV{'HIVE_CMD'} + , 'hiveconf' => "$ENV{'HIVE_HOME'}/conf" + + #HBASE + , 'hbaseconf' => "$ENV{'HBASE_HOME'}/conf" + , 'hbaselibdir' => "$ENV{'HBASE_HOME'}/" + , 'zklibdir' => "$ENV{'HBASE_HOME'}/lib" + +}; Index: src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm =================================================================== --- src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm (revision 1325085) +++ src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm (working copy) @@ -70,6 +70,14 @@ print $log "You must set the environment variable HADOOP_HOME"; die "HADOOP_HOME not defined"; } + if (! defined $ENV{'HCAT_HOME'} || $ENV{'HCAT_HOME'} eq "") { + print $log "You must set the environment variable HCAT_HOME"; + die "HCAT_HOME not defined"; + } + if (! defined $ENV{'HIVE_HOME'} || $ENV{'HIVE_HOME'} eq "") { + print $log "You must set the environment variable HIVEOP_HOME"; + die "HIVE_HOME not defined"; + } # Run a quick and easy Hadoop command to make sure we can Util::runHadoopCmd($cfg, $log, "fs -ls /"); @@ -171,9 +179,9 @@ ); - if (defined($cfg->{'load_hive_only'}) && $cfg->{'load_hive_only'} == 1) { - return $self->hiveMetaOnly($cfg, $log, \@tables); - } +# if (defined($cfg->{'load_hive_only'}) && $cfg->{'load_hive_only'} == 1) { +# return $self->hiveMetaOnly($cfg, $log, \@tables); +# } # Create the HDFS directories Util::runHadoopCmd($cfg, $log, "fs -mkdir $cfg->{'hcat_data_dir'}"); Index: src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm =================================================================== --- src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm (revision 1325085) +++ src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm (working copy) @@ -52,52 +52,6 @@ return $self; } -sub replaceParameters -{ -##!!! Move this to Util.pm - - my ($self, $cmd, $outfile, $testCmd, $log) = @_; - - # $self - $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g; - - # $outfile - $cmd =~ s/:OUTPATH:/$outfile/g; - - # $ENV - $cmd =~ s/:PIGHARNESS:/$ENV{HARNESS_ROOT}/g; - - # $testCmd - $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g; - $cmd =~ s/:OUTPATH:/$outfile/g; - $cmd =~ s/:FUNCPATH:/$testCmd->{'funcjarPath'}/g; - $cmd =~ s/:PIGPATH:/$testCmd->{'pigpath'}/g; - $cmd =~ s/:RUNID:/$testCmd->{'UID'}/g; - $cmd =~ s/:USRHOMEPATH:/$testCmd->{'userhomePath'}/g; - $cmd =~ s/:MAPREDJARS:/$testCmd->{'mapredjars'}/g; - $cmd =~ s/:SCRIPTHOMEPATH:/$testCmd->{'scriptPath'}/g; - $cmd =~ s/:DBUSER:/$testCmd->{'dbuser'}/g; - $cmd =~ s/:DBNAME:/$testCmd->{'dbdb'}/g; -# $cmd =~ s/:LOCALINPATH:/$testCmd->{'localinpathbase'}/g; -# $cmd =~ s/:LOCALOUTPATH:/$testCmd->{'localoutpathbase'}/g; -# $cmd =~ s/:LOCALTESTPATH:/$testCmd->{'localpathbase'}/g; - $cmd =~ s/:BMPATH:/$testCmd->{'benchmarkPath'}/g; - $cmd =~ s/:TMP:/$testCmd->{'tmpPath'}/g; - $cmd =~ s/:HDFSTMP:/tmp\/$testCmd->{'runid'}/g; - - if ( $testCmd->{'hadoopSecurity'} eq "secure" ) { - $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteSecureCluster'}/g; - } else { - $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteNotSecureCluster'}/g; - } - - $cmd =~ s/:THRIFTSERVER:/$testCmd->{'thriftserver'}/g; - $cmd =~ s/:HADOOP_CLASSPATH:/$testCmd->{'hadoop_classpath'}/g; - $cmd =~ s/:HCAT_JAR:/$testCmd->{'hcatalog.jar'}/g; - - return $cmd; -} - sub globalSetup { my ($self, $globalHash, $log) = @_; @@ -127,11 +81,8 @@ $ENV{'PATH'} = $globalHash->{'scriptPath'}; } - my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', $globalHash->{'outpath'}); + Util::runHadoopCmd($globalHash, $log, "fs -mkdir $globalHash->{'outpath'}"); - print $log "Going to run " . join(" ", @cmd) . "\n"; - IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n"; - IPC::Run::run(['mkdir', '-p', $globalHash->{'localpath'}], \undef, $log, $log) or die "Cannot create localpath directory " . $globalHash->{'localpath'} . " " . "$ERRNO\n"; @@ -145,10 +96,7 @@ die "Cannot create temporary directory " . $globalHash->{'tmpPath'} . " " . "$ERRNO\n"; - # Create the HDFS temporary directory - @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', "tmp/$globalHash->{'runid'}"); - print $log "Going to run " . join(" ", @cmd) . "\n"; - IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n"; + Util::runHadoopCmd($globalHash, $log, "fs -mkdir tmp/$globalHash->{'runid'}"); } sub globalCleanup @@ -199,8 +147,8 @@ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out"; # Copy result file out of hadoop - my @baseCmd = $self->getPigCmd($testCmd, $log); - my $testOut = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log); + my @baseCmd = Util::getPigCmd($testCmd, $log); + my $testOut = $self->postProcessSingleOutputFile($outfile, $localdir, $testCmd, $log); $outputs[$i] = $testOut; $id++; } @@ -236,18 +184,16 @@ # Build the command - my @baseCmd = $self->getPigCmd($testCmd, $log); + my @baseCmd = Util::getPigCmd($testCmd, $log); my @cmd = @baseCmd; push(@cmd, $pigfile); # Run the command - print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n"; print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n"; - IPC::Run::run(\@cmd, \undef, $log, $log) or - die "Failed running $pigfile\n"; + IPC::Run::run(\@cmd, \undef, $log, $log) or die "Failed running $pigfile\n"; $result{'rc'} = $? >> 8; @@ -255,23 +201,17 @@ my $localoutfile; my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.out"; - $outfile = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log); + $outfile = $self->postProcessSingleOutputFile($outfile, $localdir, $testCmd, $log); return $outfile; } sub postProcessSingleOutputFile { - my ($self, $outfile, $localdir, $baseCmd, $testCmd, $log) = @_; + my ($self, $outfile, $localdir, $testCmd, $log) = @_; my $subName = (caller(0))[3]; - my @baseCmd = @{$baseCmd}; - my @copyCmd = @baseCmd; - push(@copyCmd, ('-e', 'copyToLocal', $outfile, $localdir)); - print $log "$0::$className::$subName INFO: Going to run pig command: @copyCmd\n"; + Util::runHadoopCmd($globalHash, $log, "fs -copyToLocal $outfile $localdir"); - IPC::Run::run(\@copyCmd, \undef, $log, $log) or die "Cannot copy results from HDFS $outfile to $localdir\n"; - - # Sort the result if necessary. Keep the original output in one large file. # Use system not IPC run so that the '*' gets interpolated by the shell. @@ -301,8 +241,6 @@ } sub runHadoop -# Being modified from runPig -# !!! Works, but need to add other arguments, like queue...??? { my ($self, $testCmd, $log) = @_; my $subName = (caller(0))[3]; @@ -313,7 +251,13 @@ my $hadoopfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".hadoop"; my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out"; - my $hadoopcmd = $self->replaceParameters( $testCmd->{'hadoop'}, $outfile, $testCmd, $log ); + # Get all of the additional jars we'll need. + my $additionalJars = Util::getHBaseLibs($testCmd, $log); #hbase before hive for precedence over bundled hbase + $additionalJars .= Util::getHiveLibs($testCmd, $log); + $additionalJars .= Util::getHCatLibs($testCmd, $log); + $testCmd->{'libjars'} = $additionalJars; + $testCmd->{'libjars'} =~ s/:/,/g; + my $hadoopcmd = Util::replaceParameters( $testCmd->{'hadoop'}, $outfile, $testCmd, $log ); # adjust for the leading and trailing new line often seen in the conf file's command directives $hadoopcmd =~ s/^\s*(.*?)\s*$/\1/s; @@ -338,18 +282,14 @@ my $cp = $testCmd->{'hcatalog.jar'}; $cp =~ s/,/:/g; # Add in the hcat config file - $cp .= ":" . $testCmd->{'hive.conf.dir'}; + $cp .= ":" . $testCmd->{'hiveconf'}; + $cp .= ":" . $additionalJars; $ENV{'HADOOP_CLASSPATH'} = $cp; - if (defined($testCmd->{'hbaseconfigpath'})) { - $ENV{'HADOOP_CLASSPATH'} = "$ENV{'HADOOP_CLASSPATH'}:$testCmd->{'hbaseconfigpath'}"; + if (defined($testCmd->{'hbaseconf'})) { + $ENV{'HADOOP_CLASSPATH'} = "$ENV{'HADOOP_CLASSPATH'}:$testCmd->{'hbaseconf'}"; } - if (defined($testCmd->{'metastore.principal'}) && ($testCmd->{'metastore.principal'} =~ m/\S+/)) { - $ENV{'HADOOP_OPTS'} = "$ENV{'HADOOP_OPTS'} -Dhive.metastore.kerberos.principal=" . $testCmd->{'metastore.principal'}; - $ENV{'HADOOP_CLIENT_OPTS'} = "-Dhive.metastore.kerberos.principal=" . $testCmd->{'metastore.principal'}; - } - # Add su user if provided if (defined($testCmd->{'run_as'})) { my $cmd = '"' . join (" ", @cmd) . '"'; @@ -373,10 +313,10 @@ die "Failed running $script\n"; my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.out"; - my @baseCmd = $self->getPigCmd($testCmd, $log); + my @baseCmd = Util::getPigCmd($testCmd, $log); if ($self->countStores($testCmd)==1) { @outputs = (); - $outputs[0] = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log); + $outputs[0] = $self->postProcessSingleOutputFile($outfile, $localdir, $testCmd, $log); $result{'outputs'} = \@outputs; } @@ -535,7 +475,7 @@ my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".pig"; my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out"; - my $pigcmd = $self->replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log ); + my $pigcmd = Util::replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log ); open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n"; print FH $pigcmd . "\n"; @@ -543,7 +483,8 @@ # Build the command - my @baseCmd = $self->getPigCmd($testCmd, $log); + #my @baseCmd = $self->getPigCmd($testCmd, $log); + my @baseCmd = Util::getPigCmd($testCmd, $log); my @cmd = @baseCmd; # Add option -l giving location for secondary logs @@ -580,7 +521,7 @@ # single query if ($stores == 1) { if ($copyResults) { - $result{'output'} = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log); + $result{'output'} = $self->postProcessSingleOutputFile($outfile, $localdir, $testCmd, $log); $result{'originalOutput'} = "$localdir/out_original"; # populated by postProcessSingleOutputFile } else { $result{'output'} = "NO_COPY"; @@ -596,7 +537,7 @@ # Copy result file out of hadoop my $testOut; if ($copyResults) { - $testOut = $self->postProcessSingleOutputFile($localoutfile, $localdir, \@baseCmd, $testCmd, $log); + $testOut = $self->postProcessSingleOutputFile($localoutfile, $localdir, $testCmd, $log); } else { $testOut = "NO_COPY"; } @@ -615,47 +556,6 @@ return \%result; } -sub getPigCmd($$$) -{ - my ($self, $testCmd, $log) = @_; - - my @pigCmd; - - # set the PIG_CLASSPATH environment variable - my $pcp .= $testCmd->{'jythonjar'} if (defined($testCmd->{'jythonjar'})); - $pcp .= ":" . $testCmd->{'classpath'} if (defined($testCmd->{'classpath'})); - $pcp .= ":" . $testCmd->{'additionaljars'} if (defined($testCmd->{'additionaljars'})); - # Only add testconfigpath to PIG_CLASSPATH if HADOOP_HOME isn't defined - $pcp .= ":" . $testCmd->{'testconfigpath'} if ($testCmd->{'exectype'} ne "local"); #&& (! defined $ENV{'HADOOP_HOME'}); - $pcp .= ":" . $testCmd->{'hbaseconfigpath'} if ($testCmd->{'exectype'} ne "local" && defined($testCmd->{'hbaseconfigpath'} && $testCmd->{'hbaseconfigpath'} ne "")); - - # Set it in our current environment. It will get inherited by the IPC::Run - # command. - $ENV{'PIG_CLASSPATH'} = $pcp; - - @pigCmd = ("$testCmd->{'pigpath'}/bin/pig"); - - if (defined($testCmd->{'additionaljars'})) { - push(@pigCmd, '-Dpig.additional.jars='.$testCmd->{'additionaljars'}); - } - - if ($testCmd->{'exectype'} eq "local") { - push(@{$testCmd->{'java_params'}}, "-Xmx1024m"); - push(@pigCmd, ("-x", "local")); - } - - my $opts .= "-Dhive.metastore.uris=$testCmd->{'thriftserver'}"; - if (defined($testCmd->{'java_params'})) { - $opts = $opts . " " . join(" ", @{$testCmd->{'java_params'}}); - } - - $ENV{'PIG_OPTS'} = $opts; - - print $log "Returning Pig command " . join(" ", @pigCmd) . "\n"; - print $log "With PIG_CLASSPATH set to " . $ENV{'PIG_CLASSPATH'} . " and PIG_OPTS set to " . $ENV{'PIG_OPTS'} . "\n"; - return @pigCmd; -} - sub compareSingleOutput { my ($self, $testResult, $testOutput, $benchmarkOutput, $log) = @_; Index: src/test/e2e/hcatalog/drivers/TestDriverHive.pm =================================================================== --- src/test/e2e/hcatalog/drivers/TestDriverHive.pm (revision 1325085) +++ src/test/e2e/hcatalog/drivers/TestDriverHive.pm (working copy) @@ -53,35 +53,11 @@ return $self; } -sub replaceParameters -{ -##!!! Move this to Util.pm - - my ($self, $cmd, $outfile, $testCmd, $log) = @_; - - # $self - $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g; - - # $outfile - $cmd =~ s/:OUTPATH:/$outfile/g; - - # $ENV - $cmd =~ s/:HARNESS:/$ENV{HARNESS_ROOT}/g; - - # $testCmd - $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g; - - return $cmd; -} - sub globalSetup { my ($self, $globalHash, $log) = @_; my $subName = (caller(0))[3]; - # Set up values for the metastore - Util::setupHiveProperties($globalHash, $log); - # Setup the output path my $me = `whoami`; chomp $me; Index: src/test/e2e/hcatalog/drivers/TestDriverHCat.pm =================================================================== --- src/test/e2e/hcatalog/drivers/TestDriverHCat.pm (revision 1325085) +++ src/test/e2e/hcatalog/drivers/TestDriverHCat.pm (working copy) @@ -53,48 +53,8 @@ return $self; } -sub replaceParameters -{ -##!!! Move this to Util.pm - my ($self, $cmd, $outfile, $testCmd, $log) = @_; - # $self - $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g; - - # $outfile - $cmd =~ s/:OUTPATH:/$outfile/g; - - # $ENV - $cmd =~ s/:PIGHARNESS:/$ENV{HARNESS_ROOT}/g; - - # $testCmd - $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g; - $cmd =~ s/:OUTPATH:/$outfile/g; - $cmd =~ s/:FUNCPATH:/$testCmd->{'funcjarPath'}/g; - $cmd =~ s/:PIGPATH:/$testCmd->{'pigpath'}/g; - $cmd =~ s/:RUNID:/$testCmd->{'UID'}/g; - $cmd =~ s/:USRHOMEPATH:/$testCmd->{'userhomePath'}/g; - $cmd =~ s/:MAPREDJARS:/$testCmd->{'mapredjars'}/g; - $cmd =~ s/:SCRIPTHOMEPATH:/$testCmd->{'scriptPath'}/g; - $cmd =~ s/:DBUSER:/$testCmd->{'dbuser'}/g; - $cmd =~ s/:DBNAME:/$testCmd->{'dbdb'}/g; -# $cmd =~ s/:LOCALINPATH:/$testCmd->{'localinpathbase'}/g; -# $cmd =~ s/:LOCALOUTPATH:/$testCmd->{'localoutpathbase'}/g; -# $cmd =~ s/:LOCALTESTPATH:/$testCmd->{'localpathbase'}/g; - $cmd =~ s/:BMPATH:/$testCmd->{'benchmarkPath'}/g; - $cmd =~ s/:TMP:/$testCmd->{'tmpPath'}/g; - $cmd =~ s/:HDFSTMP:/tmp\/$testCmd->{'runid'}/g; - - if ( $testCmd->{'hadoopSecurity'} eq "secure" ) { - $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteSecureCluster'}/g; - } else { - $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteNotSecureCluster'}/g; - } - - return $cmd; -} - sub globalSetup { my ($self, $globalHash, $log) = @_; @@ -152,7 +112,7 @@ my $subName = (caller(0))[3]; my %result; my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out"; - my $hcatCmd = $self->replaceParameters( $testCmd->{'hcat'}, $outfile, $testCmd, $log); + my $hcatCmd = Util::replaceParameters( $testCmd->{'hcat'}, $outfile, $testCmd, $log); my $outdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out"; my ($stdoutfile, $stderrfile); Index: src/test/e2e/hcatalog/drivers/TestDriverPig.pm =================================================================== --- src/test/e2e/hcatalog/drivers/TestDriverPig.pm (revision 1325085) +++ src/test/e2e/hcatalog/drivers/TestDriverPig.pm (working copy) @@ -52,48 +52,6 @@ return $self; } -sub replaceParameters -{ -##!!! Move this to Util.pm - - my ($self, $cmd, $outfile, $testCmd, $log) = @_; - - # $self - $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g; - - # $outfile - $cmd =~ s/:OUTPATH:/$outfile/g; - - # $ENV - $cmd =~ s/:PIGHARNESS:/$ENV{HARNESS_ROOT}/g; - - # $testCmd - $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g; - $cmd =~ s/:OUTPATH:/$outfile/g; - $cmd =~ s/:FUNCPATH:/$testCmd->{'funcjarPath'}/g; - $cmd =~ s/:PIGPATH:/$testCmd->{'pigpath'}/g; - $cmd =~ s/:RUNID:/$testCmd->{'UID'}/g; - $cmd =~ s/:USRHOMEPATH:/$testCmd->{'userhomePath'}/g; - $cmd =~ s/:MAPREDJARS:/$testCmd->{'mapredjars'}/g; - $cmd =~ s/:SCRIPTHOMEPATH:/$testCmd->{'scriptPath'}/g; - $cmd =~ s/:DBUSER:/$testCmd->{'dbuser'}/g; - $cmd =~ s/:DBNAME:/$testCmd->{'dbdb'}/g; -# $cmd =~ s/:LOCALINPATH:/$testCmd->{'localinpathbase'}/g; -# $cmd =~ s/:LOCALOUTPATH:/$testCmd->{'localoutpathbase'}/g; -# $cmd =~ s/:LOCALTESTPATH:/$testCmd->{'localpathbase'}/g; - $cmd =~ s/:BMPATH:/$testCmd->{'benchmarkPath'}/g; - $cmd =~ s/:TMP:/$testCmd->{'tmpPath'}/g; - $cmd =~ s/:HDFSTMP:/tmp\/$testCmd->{'runid'}/g; - - if ( $testCmd->{'hadoopSecurity'} eq "secure" ) { - $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteSecureCluster'}/g; - } else { - $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteNotSecureCluster'}/g; - } - - return $cmd; -} - sub globalSetup { my ($self, $globalHash, $log) = @_; @@ -123,7 +81,7 @@ $ENV{'PATH'} = $globalHash->{'scriptPath'}; } - my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', $globalHash->{'outpath'}); + my @cmd = (Util::getPigCmd($globalHash, $log), '-e', 'mkdir', $globalHash->{'outpath'}); print $log "Going to run " . join(" ", @cmd) . "\n"; @@ -143,7 +101,7 @@ " " . "$ERRNO\n"; # Create the HDFS temporary directory - @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'mkdir', "tmp/$globalHash->{'runid'}"); + @cmd = (Util::getPigCmd($globalHash, $log), '-e', 'mkdir', "tmp/$globalHash->{'runid'}"); print $log "Going to run " . join(" ", @cmd) . "\n"; IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n"; } @@ -157,7 +115,7 @@ " " . "$ERRNO\n"; # Cleanup the HDFS temporary directory - my @cmd = ($self->getPigCmd($globalHash, $log), '-e', 'fs', '-rmr', "tmp/$globalHash->{'runid'}"); + my @cmd = (Util::getPigCmd($globalHash, $log), '-e', 'fs', '-rmr', "tmp/$globalHash->{'runid'}"); print $log "Going to run " . join(" ", @cmd) . "\n"; IPC::Run::run(\@cmd, \undef, $log, $log) or die "Cannot create HDFS directory " . $globalHash->{'outpath'} . ": $? - $!\n"; } @@ -221,7 +179,7 @@ my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out"; # Copy result file out of hadoop - my @baseCmd = $self->getPigCmd($testCmd, $log); + my @baseCmd = Util::getPigCmd($testCmd, $log); my $testOut = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log); $outputs[$i] = $testOut; $id++; @@ -264,22 +222,16 @@ } # Write the pig script to a file. - my $pigcmd = $self->replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log ); + my $pigcmd = Util::replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log ); open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n"; print FH $pigcmd . "\n"; close(FH); # Build the command - my @baseCmd = $self->getPigCmd($testCmd, $log); + my @baseCmd = Util::getPigCmd($testCmd, $log); my @cmd = @baseCmd; - # Add option -l giving location for secondary logs - ##!!! Should that even be here? - my $locallog = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".log"; - push(@cmd, "-logfile"); - push(@cmd, $locallog); - # Add pig parameters if they're provided if (defined($testCmd->{'pig_params'})) { # Processing :PARAMPATH: in parameters @@ -333,7 +285,7 @@ } # Write the script to a file - my $cmd = $self->replaceParameters( $testCmd->{'script'}, $outfile, $testCmd, $log ); + my $cmd = Util::replaceParameters( $testCmd->{'script'}, $outfile, $testCmd, $log ); open(FH, ">$script") or die "Unable to open file $script to write script, $ERRNO\n"; print FH $cmd . "\n"; @@ -363,88 +315,6 @@ } -sub getPigCmd($$$) -{ - my ($self, $testCmd, $log) = @_; - - my @pigCmd; - - # set the PIG_CLASSPATH environment variable - my $pcp .= $testCmd->{'jythonjar'} if (defined($testCmd->{'jythonjar'})); - $pcp .= ":" . $testCmd->{'classpath'} if (defined($testCmd->{'classpath'})); - $pcp .= ":" . $testCmd->{'additionaljars'} if (defined($testCmd->{'additionaljars'})); - # Only add testconfigpath to PIG_CLASSPATH if HADOOP_HOME isn't defined - $pcp .= ":" . $testCmd->{'testconfigpath'} if ($testCmd->{'exectype'} ne "local"); #&& (! defined $ENV{'HADOOP_HOME'}); - $pcp .= ":" . $testCmd->{'hbaseconfigpath'} if ($testCmd->{'exectype'} ne "local" && defined($testCmd->{'hbaseconfigpath'} && $testCmd->{'hbaseconfigpath'} ne "")); - - # Set it in our current environment. It will get inherited by the IPC::Run - # command. - $ENV{'PIG_CLASSPATH'} = $pcp; - - @pigCmd = ("$testCmd->{'pigpath'}/bin/pig"); - - if (defined($testCmd->{'additionaljars'})) { - push(@pigCmd, '-Dpig.additional.jars='.$testCmd->{'additionaljars'}); - } - - if ($testCmd->{'exectype'} eq "local") { - push(@{$testCmd->{'java_params'}}, "-Xmx1024m"); - push(@pigCmd, ("-x", "local")); - } - - my $opts .= "-Dhive.metastore.uris=$testCmd->{'thriftserver'}"; - if (defined($testCmd->{'java_params'})) { - $opts = $opts . " " . join(" ", @{$testCmd->{'java_params'}}); - } - - $ENV{'PIG_OPTS'} = $ENV{'PIG_OPTS'} . " " . $opts; - - print $log "Returning Pig command " . join(" ", @pigCmd) . "\n"; - print $log "With PIG_CLASSPATH set to " . $ENV{'PIG_CLASSPATH'} . " and PIG_OPTS set to " . $ENV{'PIG_OPTS'} . "\n"; - return @pigCmd; -} - -sub dumpPigTable -{ - my ($self, $testCmd, $table, $log, $id) = @_; - my $subName = (caller(0))[3]; - - my %result; - - # Write the pig script to a file. - my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.pig"; - my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . "dump.out"; - - open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n"; - print FH "a = load '$table' using org.apache.hcatalog.pig.HCatLoader(); store a into '$outfile';\n"; - close(FH); - - - # Build the command - my @baseCmd = $self->getPigCmd($testCmd, $log); - my @cmd = @baseCmd; - - push(@cmd, $pigfile); - - - # Run the command - print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n"; - print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n"; - - IPC::Run::run(\@cmd, \undef, $log, $log) or - die "Failed running $pigfile\n"; - $result{'rc'} = $? >> 8; - - - # Get results from the command locally - my $localoutfile; - my $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . ".dump.out"; - my $stores = $self->countStores($testCmd); - - $outfile = $self->postProcessSingleOutputFile($outfile, $localdir, \@baseCmd, $testCmd, $log); - return $outfile; -} - sub runPig { my ($self, $testCmd, $log, $copyResults, $noFailOnFail) = @_; @@ -456,7 +326,7 @@ my $pigfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".pig"; my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out"; - my $pigcmd = $self->replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log ); + my $pigcmd = Util::replaceParameters( $testCmd->{'pig'}, $outfile, $testCmd, $log ); open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n"; print FH $pigcmd . "\n"; @@ -464,7 +334,7 @@ # Build the command - my @baseCmd = $self->getPigCmd($testCmd, $log); + my @baseCmd = Util::getPigCmd($testCmd, $log); my @cmd = @baseCmd; # Add option -l giving location for secondary logs @@ -485,8 +355,9 @@ # Run the command - print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n"; print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n"; + print $log "With PIG_CLASSPATH set to $ENV{'PIG_CLASSPATH'}\n"; + print $log "and HADOOP_HOME set to $ENV{'HADOOP_HOME'}\n"; my $runrc = IPC::Run::run(\@cmd, \undef, $log, $log); Index: src/test/e2e/hcatalog/drivers/Util.pm =================================================================== --- src/test/e2e/hcatalog/drivers/Util.pm (revision 1325085) +++ src/test/e2e/hcatalog/drivers/Util.pm (working copy) @@ -27,12 +27,14 @@ package Util; use IPC::Run qw(run); +use strict; +use English; sub prepareHCat { my ($self, $testCmd, $log) = @_; my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out"; - my $hcatCmd = $self->replaceParameters( $testCmd->{'hcat_prep'}, $outfile, $testCmd, $log); + my $hcatCmd = replaceParameters( $testCmd->{'hcat_prep'}, $outfile, $testCmd, $log); my @hivefiles = (); my @outfiles = (); @@ -50,46 +52,6 @@ Util::runHCatCmdFromFile($testCmd, $log, $hivefiles[0]); } -############################################################################## -# Sub: setupHiveProperties -# -# Assure that necessary values are set in config in order to set Hive -# Java properties. -# -# Returns: -# Nothing -sub setupHiveProperties($$) -{ - my ($cfg, $log) = @_; - - # Set up values for the metastore - if (defined($cfg->{'metastore_thrift'}) && $cfg->{'metastore_thrift'} == 1) { - if (! defined $cfg->{'metastore_host'} || $cfg->{'metastore_host'} eq "") { - print $log "When using thrift, you must set the key " . - " 'metastore_host' to the machine your metastore is on\n"; - die "metastore_host is not set in existing.conf\n"; - } - - $cfg->{'metastore_connection'} = - "jdbc:$cfg->{'metastore_db'}://$cfg->{'metastore_host'}/hivemetastoredb?createDatabaseIfNotExist=true"; - - if (! defined $cfg->{'metastore_passwd'} || $cfg->{'metastore_passwd'} eq "") { - $cfg->{'metastore_passwd'} = 'hive'; - } - - if (! defined $cfg->{'metastore_port'} || $cfg->{'metastore_port'} eq "") { - $cfg->{'metastore_port'} = '9933'; - } - - $cfg->{'metastore_uri'} = - "thrift://$cfg->{'metastore_host'}:$cfg->{'metastore_port'}"; - } else { - $cfg->{'metastore_connection'} = - "jdbc:derby:;databaseName=metastore_db;create=true"; - $cfg->{'metastore_driver'} = "org.apache.derby.jdbc.EmbeddedDriver"; - } -} - sub getHadoopCmd { my ( $properties ) = @_; @@ -113,7 +75,8 @@ } push (@baseCmd, $cmd); - push (@baseCmd, '--config', $properties->{'testconfigpath'}) if defined($properties->{'testconfigpath'}); + push (@baseCmd, '--config', $properties->{'hadoopconfdir'}) + if defined($properties->{'hadoopconfdir'}); return @baseCmd; } @@ -148,31 +111,11 @@ $outfile = $log if (!defined($outfile)); $errfile = $log if (!defined($errfile)); - my @cmd = ("$cfg->{'hivehome'}/bin/hive"); - - # Add all of the modified properties we want to set -# push(@cmd, "--hiveconf", "hive.metastore.uris=$cfg->{'thriftserver'}"); -# push(@cmd, "--hiveconf", "hive.metastore.local=false"); - -# if( defined($cfg->{'metastore.principal'}) && ($cfg->{'metastore.principal'} =~ m/\S+/) -# && ($cfg->{'metastore.principal'} ne '${metastore.principal}')){ -# push(@cmd, "--hiveconf", "hive.metastore.sasl.enabled=true", "--hiveconf", "hive.metastore.kerberos.principal=$cfg->{'metastore.principal'}"); -# } else { -# push(@cmd, "--hiveconf", "hive.metastore.sasl.enabled=false"); -# } - - $ENV{'HIVE_CONF_DIR'} = "$cfg->{'hive.conf.dir'}"; - - if (defined($cfg->{'hive.additionaljars'})) { - $ENV{'HIVE_AUX_JARS_PATH'} = $cfg->{'hive.additionaljars'}; - } - -# if (defined($cfg->{'hiveconf'})) { -# foreach my $hc (@{$cfg->{'hiveconf'}}) { -# push(@cmd, "--hiveconf", $hc); -# } -# } - + my @cmd = ($cfg->{'hivebin'}); + + $ENV{'HIVE_CONF_DIR'} = $cfg->{'hiveconf'}; + $ENV{'HIVE_AUX_JARS_PATH'} = $cfg->{'hcatshare'}; + if (defined($cfg->{'hivecmdargs'})) { push(@cmd, @{$cfg->{'hivecmdargs'}}); } @@ -230,26 +173,28 @@ my ($cfg, $log, $sql, $outfile, $errfile, $noFailOnFail) = @_; if (!defined($ENV{'HADOOP_HOME'})) { - die "Cannot run hive when HADOOP_HOME environment variable is not set."; + die "Cannot run hcat when HADOOP_HOME environment variable is not set."; } $outfile = $log if (!defined($outfile)); $errfile = $log if (!defined($errfile)); # unset HADOOP_CLASSPATH - $ENV{'HADOOP_CLASSPATH'} = ""; - $ENV{'HADOOP_CLASSPATH'} = $cfg->{'pigjar'}; +# $ENV{'HADOOP_CLASSPATH'} = ""; + $ENV{'HADOOP_CLASSPATH'} = $cfg->{'hbaseconf'}; + $ENV{'HCAT_CLASSPATH'} = Util::getHBaseLibs($cfg, $log); my @cmd; if (defined($sql)) { - @cmd = ("$cfg->{'hcathome'}/bin/hcat", "-f", $sql); + @cmd = ("$cfg->{'hcatbin'}", "-f", $sql); } else { - @cmd = ("$cfg->{'hcathome'}/bin/hcat"); + @cmd = ("$cfg->{'hcatbin'}"); } my $envStr; for my $k (keys(%ENV)) { - $envStr .= $k . "=" . $ENV{$k} . " " if ($k =~ /HADOOP/ || $k =~ /HIVE/); + $envStr .= $k . "=" . $ENV{$k} . " " if ($k =~ /HADOOP/ || $k =~ /HIVE/ || + $k =~ /HCAT/); } $envStr .= " "; print $log "Going to run hcat command [" . join(" ", @cmd) . @@ -327,198 +272,33 @@ sub getPigCmd { - my $subName = (caller(0))[3]; - my $jarkey = shift; - my ( $properties ) = @_; - my $isPigSqlEnabled= 0; - my @baseCmd; - die "$0.$subName: null properties" if (! $properties ); + my ( $cfg, $log ) = @_; -show_call_stack(); - #UGLY HACK for pig sql support - if ( $jarkey =~ /testsql/ ) { + my @cmd = ("$cfg->{'pigbin'}"); - $isPigSqlEnabled= 1; - $jarkey = "testjar"; - + + # sets the queue, for exampel "grideng" + if(defined($cfg->{'queue'})) { + push( @cmd,'-Dmapred.job.queue.name='.$cfg->{'queue'}); } - - my $cmd; - if ( $properties->{'use-pig.pl'} ) { - # The directive gives that - # 1) the 'pig' command will be called, as opposed to java - # 2) the conf file has full control over what options and parameters are - # passed to pig. - # I.e. no parameters should be passed automatically by the script here. - # - # This allows for testing of the pig script as installed, and for testin of - # the pig script's options, including error testing. - -print 'use-pig.pl?????'; - - $cmd = $properties->{'gridstack.root'} . "/pig/" . $properties->{'pigTestBuildName'} . "/bin/pig"; - if ( ! -x "$cmd" ) { - print STDERR "\n$0::$subName WARNING: Can't find pig command: $cmd\n"; - $cmd = `which pig`; - chomp $cmd; - print STDERR "$0::$subName WARNING: Instead using command: $cmd\n"; - } - die "\n$0::$subName FATAL: Pig command does not exist: $cmd\n" if ( ! -x $cmd ); - push (@baseCmd, $cmd ); - - if(defined($properties->{'additionaljars'})) { - push( @baseCmd,'-Dpig.additional.jars='.$properties->{'additionaljars'}); - } - $ENV{'PIG_CLASSPATH'}=$properties->{'additionaljars'}; - - if ( $properties->{'use-pig.pl'} eq 'raw' ) { # add _no_ arguments automatically - # !!! - return @baseCmd; - } - - } else { - $cmd="java"; - -print 'not use-pig.pl?????'; - # Set JAVA options - - # User can provide only one of - # (-c ) OR (-testjar -testconfigpath ) - # "-c " is allowed only in non local mode - if(defined($properties->{'cluster.name'})) { - # use provided cluster - @baseCmd = ($cmd, '-c', $properties->{'cluster.name'}); - } else { - die "\n$0::$subName FATAL: The jar file name must be passed in at the command line or defined in the configuration file\n" if ( !defined( $properties->{$jarkey} ) ); - die "\n$0::$subName FATAL: The jar file does not exist.\n" . $properties->{$jarkey}."\n" if ( ! -e $properties->{$jarkey} ); + my $cp = Util::getHCatLibs($cfg, $log) . Util::getHiveLibsForPig($cfg, $log) . + Util::getHBaseLibs($cfg, $log); + push(@cmd, ('-Dpig.additional.jars='. $cp)); + $cp .= ':' . $cfg->{'hiveconf'}; + $cp .= ':' . $cfg->{'hbaseconf'}; + $ENV{'PIG_CLASSPATH'} = $cp; - # use user provided jar - my $classpath; + # sets the permissions on the jobtracker for the logs + push( @cmd,'-Dmapreduce.job.acl-view-job=*'); - if (defined $properties->{'jythonjar'}) { - $classpath = "$classpath:" . $properties->{'jythonjar'}; - } - if( $properties->{'exectype'} eq "local") { - # in local mode, we should not use - # any hadoop-site.xml - $classpath= "$classpath:" . $properties->{$jarkey}; - $classpath= "$classpath:$properties->{'classpath'}" if ( defined( $properties->{'classpath'} ) ); - @baseCmd = ($cmd, '-cp', $classpath, '-Xmx1024m'); - - } else { - - # non local mode, we also need to specify - # location of hadoop-site.xml - die "\n$0::$subName FATAL: The hadoop configuration file name must be passed in at the command line or defined in the configuration file\n" - if ( !defined( $properties->{'testconfigpath'} ) ); - die "\n$0::$subName FATAL $! " . $properties->{'testconfigpath'}."\n\n" - if (! -e $properties->{'testconfigpath'} ); - $classpath= "$classpath:" . $properties->{$jarkey}.":".$properties->{'testconfigpath'}; - $classpath= "$classpath:$properties->{'classpath'}" if ( defined( $properties->{'classpath'} ) ); - $classpath= "$classpath:$properties->{'howl.jar'}" if ( defined( $properties->{'howl.jar'} ) ); - @baseCmd = ($cmd, '-cp', $classpath ); - } - } - - # sets the queue, for exampel "grideng" - if(defined($properties->{'queue'})) { - push( @baseCmd,'-Dmapred.job.queue.name='.$properties->{'queue'}); - } - - if(defined($properties->{'additionaljars'})) { - push( @baseCmd,'-Dpig.additional.jars='.$properties->{'additionaljars'}); - } - - if( ( $isPigSqlEnabled == 1 ) ){ - - if(defined($properties->{'metadata.uri'})) { - push( @baseCmd, '-Dmetadata.uri='.$properties->{'metadata.uri'}); - } - - if(defined($properties->{'metadata.impl'})) { - push( @baseCmd, '-Dmetadata.impl='.$properties->{'metadata.impl'}); - }else{ - push( @baseCmd, '-Dmetadata.impl=org.apache.hadoop.owl.pig.metainterface.OwlPigMetaTables'); - } - } - - # Add howl support - if(defined($properties->{'howl.metastore.uri'})) { - push( @baseCmd, '-Dhowl.metastore.uri='.$properties->{'howl.metastore.uri'}); - } - - # Set local mode property - # if ( defined($properties->{'exectype'}) && $properties->{'exectype'}=~ "local" ) { - # Removed above 'if...' for Pig 8. - my $java=`which java`; - my $version=`file $java`; - if ( $version =~ '32-bit' ){ - push(@baseCmd,'-Djava.library.path='.$ENV{HADOOP_HOME}.'/lib/native/Linux-i386-32'); - } else { - push(@baseCmd,'-Djava.library.path='.$ENV{HADOOP_HOME}.'/lib/native/Linux-amd64-64'); - } - # } - - - # Add user provided java options if they exist - if (defined($properties->{'java_params'})) { - push(@baseCmd, @{$properties->{'java_params'}}); - } - - if(defined($properties->{'hod'})) { - push( @baseCmd, '-Dhod.server='); - } - - # sets the permissions on the jobtracker for the logs - push( @baseCmd,'-Dmapreduce.job.acl-view-job=*'); - - - # Add Main - push(@baseCmd, 'org.apache.pig.Main'); - # Set local mode PIG option - if ( defined($properties->{'exectype'}) && $properties->{'exectype'}=~ "local" ) { - push(@baseCmd, '-x'); - push(@baseCmd, 'local'); - } - - # Set Pig SQL options - if( ( $isPigSqlEnabled == 1 ) && defined($properties->{'metadata.uri'})) { - - if ( defined($properties->{'testoutpath'}) ) { - push( @baseCmd, '-u' ); - push( @baseCmd, $properties->{'testoutpath'} ); - } - - push( @baseCmd, '-s' ); - push( @baseCmd, '-f' ); - } - - } # end else of if use-pig.pl - - - # Add -latest or -useversion - if ( $cmd =~ 'pig$' ) { - # Add -latest, or -useversion if 'current' is not target build - if ( defined($properties->{'pigTestBuildName'})) { - if ($properties->{'pigTestBuildName'} eq 'latest') { - push(@baseCmd, '-latest'); - } elsif ($properties->{'pigTestBuildName'} ne 'current') { - push(@baseCmd, '-useversion', "$properties->{'pigTestBuildName'}"); - } - } - } elsif ( $cmd =~ 'java' ) { - - # is this ever used: ??? - # Add latest if it's there - if (defined($properties->{'latest'})) { - push(@baseCmd, '-latest'); - } + if ( defined($cfg->{'exectype'}) && $cfg->{'exectype'}=~ "local" ) { + push(@cmd, ('-x', 'local')); } - return @baseCmd; + return @cmd; } @@ -527,18 +307,18 @@ my $locale= shift; # $locale = "en_US.UTF-8" if ( !$locale ); $locale = "ja_JP.utf8" if ( !$locale ); - $ENV[LC_CTYPE]="$locale"; - $ENV[LC_NUMERIC]="$locale"; - $ENV[LC_TIME]="$locale"; - $ENV[LC_COLLATE]="$locale"; - $ENV[LC_MONETARY]="$locale"; - $ENV[LC_MESSAGES]="$locale"; - $ENV[LC_PAPER]="$locale"; - $ENV[LC_NAME]="$locale"; - $ENV[LC_ADDRESS]="$locale"; - $ENV[LC_TELEPHONE]="$locale"; - $ENV[LC_MEASUREMENT]="$locale"; - $ENV[LC_IDENTIFICATION]="$locale"; + $ENV['LC_CTYPE']="$locale"; + $ENV['LC_NUMERIC']="$locale"; + $ENV['LC_TIME']="$locale"; + $ENV['LC_COLLATE']="$locale"; + $ENV['LC_MONETARY']="$locale"; + $ENV['LC_MESSAGES']="$locale"; + $ENV['LC_PAPER']="$locale"; + $ENV['LC_NAME']="$locale"; + $ENV['LC_ADDRESS']="$locale"; + $ENV['LC_TELEPHONE']="$locale"; + $ENV['LC_MEASUREMENT']="$locale"; + $ENV['LC_IDENTIFICATION']="$locale"; } sub getLocaleCmd @@ -560,4 +340,154 @@ ."export LC_IDENTIFICATION=\"$locale\""; } +sub replaceParameters +{ + + my ($cmd, $outfile, $testCmd, $log) = @_; + + # $self +# $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g; + + # $outfile + $cmd =~ s/:OUTPATH:/$outfile/g; + + # $ENV + $cmd =~ s/:PIGHARNESS:/$ENV{HARNESS_ROOT}/g; + + # $testCmd + $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g; + $cmd =~ s/:OUTPATH:/$outfile/g; + $cmd =~ s/:FUNCPATH:/$testCmd->{'funcjarPath'}/g; + $cmd =~ s/:PIGPATH:/$testCmd->{'pighome'}/g; + $cmd =~ s/:RUNID:/$testCmd->{'UID'}/g; + $cmd =~ s/:USRHOMEPATH:/$testCmd->{'userhomePath'}/g; + $cmd =~ s/:MAPREDJARS:/$testCmd->{'mapredjars'}/g; + $cmd =~ s/:SCRIPTHOMEPATH:/$testCmd->{'scriptPath'}/g; + $cmd =~ s/:DBUSER:/$testCmd->{'dbuser'}/g; + $cmd =~ s/:DBNAME:/$testCmd->{'dbdb'}/g; +# $cmd =~ s/:LOCALINPATH:/$testCmd->{'localinpathbase'}/g; +# $cmd =~ s/:LOCALOUTPATH:/$testCmd->{'localoutpathbase'}/g; +# $cmd =~ s/:LOCALTESTPATH:/$testCmd->{'localpathbase'}/g; + $cmd =~ s/:BMPATH:/$testCmd->{'benchmarkPath'}/g; + $cmd =~ s/:TMP:/$testCmd->{'tmpPath'}/g; + $cmd =~ s/:HDFSTMP:/tmp\/$testCmd->{'runid'}/g; + $cmd =~ s/:HCAT_JAR:/$testCmd->{'libjars'}/g; + + if ( $testCmd->{'hadoopSecurity'} eq "secure" ) { + $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteSecureCluster'}/g; + } else { + $cmd =~ s/:REMOTECLUSTER:/$testCmd->{'remoteNotSecureCluster'}/g; + } + + return $cmd; +} + +sub getHiveLibs($$) +{ + my ($cfg, $log) = @_; + + my $cp; + opendir(LIB, $cfg->{'hivelib'}) or die "Cannot open $cfg->{'hivelib'}, $!\n"; + my @jars = readdir(LIB); + foreach (@jars) { + /\.jar$/ && do { + $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; + }; + } + closedir(LIB); + return $cp; +} + +# Pig needs a limited set of the Hive libs, since they include some of the same jars +# and we get version mismatches if it picks up all the libraries. +sub getHiveLibsForPig($$) +{ + my ($cfg, $log) = @_; + + my $cp; + opendir(LIB, $cfg->{'hivelib'}) or die "Cannot open $cfg->{'hivelib'}, $!\n"; + my @jars = readdir(LIB); + foreach (@jars) { + /hive-.*\.jar$/ && do { + $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; + }; + /libfb303.jar/ && do { + $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; + }; + /libthrift.jar/ && do { + $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; + }; + /datanucleus-.*\.jar$/ && do { + $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; + }; + /jdo2-api-.*\.jar$/ && do { + $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; + }; + /commons-dbcp-.*\.jar$/ && do { + $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; + }; + /commons-pool-.*\.jar$/ && do { + $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; + }; +# /hbase-.*\.jar$/ && do { +# $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; +# }; +# /zookeeper-.*\.jar$/ && do { +# $cp .= $cfg->{'hivelib'} . '/' . $_ . ':'; +# }; + } + closedir(LIB); + return $cp; +} + +sub getHBaseLibs($$) +{ + my ($cfg, $log) = @_; + + my $cp; + opendir(LIB, $cfg->{'hbaselibdir'}) or die "Cannot open $cfg->{'hbaselibdir'}, $!\n"; + my @jars = readdir(LIB); + foreach (@jars) { + /hbase-.*\.jar$/ && do { + $cp .= $cfg->{'hbaselibdir'} . '/' . $_ . ':'; + }; + } + closedir(LIB); + opendir(LIB, $cfg->{'zklibdir'}) or die "Cannot open $cfg->{'zklibdir'}, $!\n"; + my @jars = readdir(LIB); + foreach (@jars) { + /zookeeper.*\.jar$/ && do { + $cp .= $cfg->{'zklibdir'} . '/' . $_ . ':'; + }; + } + closedir(LIB); + return $cp; +} + + +sub getHCatLibs($$) +{ + my ($cfg, $log) = @_; + + my $cp; + opendir(LIB, $cfg->{'hcatshare'}) or die "Cannot open $cfg->{'hcatshare'}, $!\n"; + my @jars = readdir(LIB); + foreach (@jars) { + /hcatalog-[0-9].*\.jar$/ && do { + $cp .= $cfg->{'hcatshare'} . '/' . $_ . ':'; + }; + } + closedir(LIB); + opendir(LIB, $cfg->{'hcatlib'}) or die "Cannot open $cfg->{'hcatlib'}, $!\n"; + my @jars = readdir(LIB); + foreach (@jars) { + /hbase-storage-handler.*\.jar$/ && do { + $cp .= $cfg->{'hcatlib'} . '/' . $_ . ':'; + }; + } + closedir(LIB); + return $cp; +} + + 1; Index: src/test/e2e/hcatalog/build.xml =================================================================== --- src/test/e2e/hcatalog/build.xml (revision 1325085) +++ src/test/e2e/hcatalog/build.xml (working copy) @@ -17,9 +17,8 @@ - - - + @@ -41,12 +40,14 @@ - - - - + + + + + + @@ -60,13 +61,15 @@ - + + - + + @@ -142,83 +145,71 @@ - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + @@ -241,28 +232,15 @@ - + - - - - - - - - - - - - - - - - - - - + + + + + + @@ -275,29 +253,21 @@ - - + - - - - - - - - - - - - - + + + + + + @@ -319,35 +289,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Index: src/docs/src/documentation/content/xdocs/site.xml =================================================================== --- src/docs/src/documentation/content/xdocs/site.xml (revision 1325085) +++ src/docs/src/documentation/content/xdocs/site.xml (working copy) @@ -40,7 +40,7 @@ - + Index: src/docs/src/documentation/content/xdocs/install.xml =================================================================== --- src/docs/src/documentation/content/xdocs/install.xml (revision 1325085) +++ src/docs/src/documentation/content/xdocs/install.xml (working copy) @@ -82,13 +82,17 @@

Database Setup

+

If you do not already have Hive installed with MySQL, the following will + walk you through how to do so. If you have already set this up, you can skip + this step.

+

Select a machine to install the database on. This need not be the same machine as the Thrift server, which we will set up later. For large clusters we recommend that they not be the same machine. For the purposes of these instructions we will refer to this machine as - hcatdb.acme.com

+ hivedb.acme.com

-

Install MySQL server on hcatdb.acme.com. You can obtain +

Install MySQL server on hivedb.acme.com. You can obtain packages for MySQL from MySQL's download site. We have developed and tested with versions 5.1.46 and 5.1.48. We suggest you use these versions or later. @@ -98,22 +102,25 @@ user, and replace dbpassword in the following commands with it.

mysql -u root

-

mysql> CREATE USER 'hive'@'hcatdb.acme.com' IDENTIFIED BY 'dbpassword';

+

mysql> CREATE USER 'hive'@'hivedb.acme.com' IDENTIFIED BY 'dbpassword';

mysql> CREATE DATABASE hivemetastoredb DEFAULT CHARACTER SET latin1 DEFAULT COLLATE latin1_swedish_ci;

-

mysql> GRANT ALL PRIVILEGES ON hivemetastoredb.* TO 'hive'@'hcatdb.acme.com' WITH GRANT OPTION;

+

mysql> GRANT ALL PRIVILEGES ON hivemetastoredb.* TO 'hive'@'hivedb.acme.com' WITH GRANT OPTION;

mysql> flush privileges;

mysql> quit;

-

In a temporary directory, untar the HCatalog installation tarball.

+

Use the database installation script found in the Hive package to create the + database. hive_home in the line below refers to the directory + where you have installed Hive. If you are using Hive rpms, then this will + be /usr/lib/hive.

-

tar xzf hcatalog-0.4.0.tar.gz

+

mysql -u hive -D hivemetastoredb -hhivedb.acme.com -p < hive_homescripts/metastore/upgrade/mysql/hive-schema-0.9.0.mysql.sql

-

Use the database installation script found in the package to create the - database.

-

mysql -u hive -D hivemetastoredb -hhcatdb.acme.com -p < share/hcatalog/hive/external/metastore/scripts/upgrade/mysql/hive-schema-0.8.0.mysql.sql

-

Thrift Server Setup

+

If you do not already have Hive running a metastore server using Thrift, + you can use the following instructions to setup and run one. You may skip + this step if you already are using a Hive metastore server.

+

Select a machine to install your Thrift server on. For smaller and test installations this can be the same machine as the database. For the purposes of these instructions we will refer to this machine as @@ -126,14 +133,14 @@

Select a user to run the Thrift server as. This user should not be a human user, and must be able to act as a proxy for other users. We suggest - the name "hcat" for the user. Throughout the rest of this documentation - we will refer to this user as hcat. If necessary, add the user to + the name "hive" for the user. Throughout the rest of this documentation + we will refer to this user as hive. If necessary, add the user to hcatsvr.acme.com.

Select a root directory for your installation of HCatalog. This - directory must be owned by the hcat user. We recommend - /usr/local/hcat. If necessary, create the directory. You will - need to be the hcat user for the operations described in the remainder + directory must be owned by the hive user. We recommend + /usr/local/hive. If necessary, create the directory. You will + need to be the hive user for the operations described in the remainder of this Thrift Server Setup section.

Copy the HCatalog installation tarball into a temporary directory, and untar @@ -150,7 +157,7 @@

cd hcatalog-0.4.0

share/hcatalog/scripts/hcat_server_install.sh -r root -d dbroot -h hadoop_home -p portnum

-

Now you need to edit your root/etc/hcatalog/hive-site.xml file. +

Now you need to edit your hive_home/conf/hive-site.xml file. Open this file in your favorite text editor. The following table shows the values you need to configure.

@@ -160,32 +167,54 @@ Value to Set it to + hive.metastore.local + false + + javax.jdo.option.ConnectionURL - In the JDBC connection string, change DBHOSTNAME to the name - of the machine you put the MySQL server on. + jdbc:mysql://hostname/hivemetastoredb?createDatabaseIfNotExist=true where hostname is the name of the machine you installed MySQL on. + javax.jdo.option.ConnectionDriverName + com.mysql.jdbc.Driver + + + + javax.jdo.option.ConnectionUserName + hive + + javax.jdo.option.ConnectionPassword dbpassword value you used in setting up the MySQL server above. + hive.semantic.analyzer.factory.impl + org.apache.hcatalog.cli.HCatSemanticAnalyzerFactory + + + hadoop.clientside.fs.operations + true + + hive.metastore.warehouse.dir The directory can be a URI or an absolute file path. If it is an absolute file path, it will be resolved to a URI by the metastore:

-- If default hdfs was specified in core-site.xml, path resolves to HDFS location.

-- Otherwise, path is resolved as local file: URI.

This setting becomes effective when creating new tables (it takes precedence over default DBS.DB_LOCATION_URI at the time of table creation).

+

You only need to set this if you have not yet configured Hive to run on your system.

hive.metastore.uris - Set the hostname of your Thrift - server by replacing SVRHOST with the name of the - machine you are installing the Thrift server on. You can also - change the port the Thrift server runs on by changing the default - value of 3306. + thrift://hostname:portnum where hostname is the name of the machine hosting the Thrift server, and portnum is the port number + used above in the installation script. + hive.metastore.execute.setugi + true + + hive.metastore.sasl.enabled Set to true if you are using kerberos security with your Hadoop cluster, false otherwise. @@ -220,8 +249,10 @@
Starting the Server -

Start the HCatalog server by switching directories to - root and invoking sbin/hcat_server.sh start

+

To start your server, HCatalog needs to know where Hive is installed. + This is communicated by setting the environment variable HIVE_HOME + to the location you installed Hive. Start the HCatalog server by switching directories to + root and invoking HIVE_HOME=hive_home sbin/hcat_server.sh start

@@ -253,40 +284,16 @@

Copy the HCatalog installation tarball into a temporary directory, and untar it.

-

tar zxf hcatalog-version.tar.gz

+

tar zxf hcatalog-0.4.0.tar.gz

-

Now you need to edit your root/etc/hcatalog/hive-site.xml file. - Open this file in your favorite text editor. The following table shows the - values you need to configure. These values should match the values set on - the HCatalog server. Do NOT copy the configuration file - from your server installation as that contains the password to your - database, which you should not distribute to your clients.

+

Now you need to edit your hive_home/conf/hive-site.xml file. + You can use the same file as on the server except the value of + javax.jdo.option.ConnectionPasswordh should be + removed. This avoids having the password available in plain text on + all of your clients.

- - - - - - - - - - - - - -
ParameterValue to Set it to
hive.metastore.warehouse.dirThe directory can be a URI or an absolute file path. If it is an absolute file path, it will be resolved to a URI by the metastore: -

-- If default hdfs was specified in core-site.xml, path resolves to HDFS location.

-

-- Otherwise, path is resolved as local file: URI.

-

This setting becomes effective when creating new tables (it takes precedence over default DBS.DB_LOCATION_URI at the time of table creation).

-
hive.metastore.urisSet the hostname of your Thrift - server by replacing SVRHOST with the name of the - machine you are installing the Thrift server on. You can also - change the port the Thrift server runs on by changing the default - value of 3306.
-

The HCatalog command line interface (CLI) can now be invoked as - root/bin/hcat.

+ HIVE_HOME=hive_home root/bin/hcat.

Index: src/docs/src/documentation/content/xdocs/cli.xml =================================================================== --- src/docs/src/documentation/content/xdocs/cli.xml (revision 1325085) +++ src/docs/src/documentation/content/xdocs/cli.xml (working copy) @@ -26,9 +26,15 @@
Set Up -

The HCatalog command line interface (CLI) can be invoked as hcat.

+

The HCatalog command line interface (CLI) can be invoked as +HIVE_HOME=hive_home hcat_homebin/hcat +where hive_home is the directory where Hive has been installed and +hcat_home is the directory where HCatalog has been installed.

+

If you are using BigTop's rpms or debs you can invoke the CLI by doing +/usr/bin/hcat.

+
Property changes on: hive ___________________________________________________________________ Modified: svn:externals - http://svn.apache.org/repos/asf/hive/trunk external + Index: hive/README =================================================================== --- hive/README (revision 1325085) +++ hive/README (working copy) @@ -1,16 +0,0 @@ -The Hive code is pulled in via an SVN external definition. This allows -us to have a local version of hive to build against, including an -up to date trunk version, without needing to fork the code. - -The external definition is pegged to an SVN revision, so that -changes in Hive's code base do not cause suprising changes in HCatalog. -The SVN revision should be updated regularly. - -For release branches, the SVN revision should be pegged against a Hive release -tag. - -The external definition can be edited by doing: - -svn propedit svn:externals hive - -in the top level directory. Index: build.xml =================================================================== --- build.xml (revision 1325085) +++ build.xml (working copy) @@ -44,7 +44,6 @@ - @@ -69,7 +68,6 @@ - @@ -150,23 +148,10 @@ - - - + - - - - - - - - - - - - - - - - - - - - - - + - - - - - - + + - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -529,34 +526,16 @@ - - - - - - - - - - - - - - - - - + - - @@ -569,9 +548,6 @@ - - - @@ -594,132 +570,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Index: storage-handlers/hbase/ivy.xml =================================================================== --- storage-handlers/hbase/ivy.xml (revision 1325085) +++ storage-handlers/hbase/ivy.xml (working copy) @@ -47,10 +47,81 @@ - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index: storage-handlers/hbase/ivy/ivysettings.xml =================================================================== --- storage-handlers/hbase/ivy/ivysettings.xml (revision 1325085) +++ storage-handlers/hbase/ivy/ivysettings.xml (working copy) @@ -1,65 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Index: storage-handlers/hbase/ivy/libraries.properties =================================================================== --- storage-handlers/hbase/ivy/libraries.properties (revision 1325085) +++ storage-handlers/hbase/ivy/libraries.properties (working copy) @@ -1,23 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#This properties file lists the versions of the various artifacts used by hadoop and components. -#It drives ivy and the generation of a maven POM - -junit.version=3.8.1 -ivy.version=2.2.0 -rats-lib.version=0.5.1 -hbase.version=0.92.0 -zookeeper.version=3.4.3 -guava.version=11.0 -jetty.version=6.1.26 -high-scale-lib.version=1.1.1 Index: storage-handlers/hbase/build.xml =================================================================== --- storage-handlers/hbase/build.xml (revision 1325085) +++ storage-handlers/hbase/build.xml (working copy) @@ -75,7 +75,7 @@ - + @@ -103,29 +103,14 @@ - - - - - - - - - - - - - - - - - + + + + - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index: ivy/ivysettings.xml =================================================================== --- ivy/ivysettings.xml (revision 1325085) +++ ivy/ivysettings.xml (working copy) @@ -30,12 +30,22 @@ http://www.ibiblio.net/pub/packages/maven2 --> + + + + + - + + + + + - + + + + + + + + + - + + + + + Index: ivy/libraries.properties =================================================================== --- ivy/libraries.properties (revision 1325085) +++ ivy/libraries.properties (working copy) @@ -13,15 +13,38 @@ #This properties file lists the versions of the various artifacts used by hadoop and components. #It drives ivy and the generation of a maven POM -junit.version=3.8.1 +activemq.version=5.5.0 +antlr.version=3.0.1 +commons-cli.version=1.2 +commons-configuration.version=1.6 +commons-dbcp.version=1.4 +commons-httpclient.version=3.0.1 +commons-lang.version=2.4 +commons-logging.version=1.0.4 +commons-pool.version=1.5.4 +datanucleus-connectionpool.version=2.0.3 +datanucleus-core.version=2.0.3 +datanucleus-enhancer.version=2.0.3 +datanucleus-rdbms.version=2.0.3 +derby.version=10.4.2.0 +fb303.version=0.7.0 +guava.version=11.0 +hadoop-core.version=1.0.1 +hadoop-test.version=1.0.1 +hadoop-tools.version=1.0.1 +hbase.version=0.92.0 +high-scale-lib.version=1.1.1 +hive.version=0.9.0-SNAPSHOT ivy.version=2.1.0 +jackson.version=1.7.3 +javax-mgmt.version=1.1-rev-1 +jdeb.version=0.8 +jdo.version=2.3-ec +jetty.version=6.1.26 +jms.version=1.1 +junit.version=4.10 +log4j.version=1.2.16 pig.version=0.8.0 -commons-cli.version=1.2 -#hadoop-core.version=0.20.2 Waiting for a secure version of hadoop in maven -hadoop-tools.version=0.20.205.0 -jms.version=1.1 -activemq.version=5.5.0 -javax-mgmt.version=1.1-rev-1 rats-lib.version=0.5.1 -jdeb.version=0.8 -jackson.version=1.7.3 +slf4j.version=1.6.1 +zookeeper.version=3.4.3 Index: bin/hcat =================================================================== --- bin/hcat (revision 1325085) +++ bin/hcat (working copy) @@ -106,7 +106,7 @@ HCAT_JAR=`ls $HCAT_PREFIX/share/hcatalog/hcatalog-[0-9]*.jar` # Find the storage-handler jars. -for jar in ${HCAT_PREFIX}/lib/*.jar ; do +for jar in ${HCAT_PREFIX}/share/hcatalog/lib/*.jar ; do HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$jar done Index: bin/hcat_server.sh =================================================================== --- bin/hcat_server.sh (revision 1325085) +++ bin/hcat_server.sh (working copy) @@ -1,25 +1,27 @@ #!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. bin=`which $0` bin=`dirname ${bin}` bin=`cd "$bin"; pwd` +HCAT_LOG_DIR="$bin"/../var/log + if [ -e "$bin/../libexec/hcat-config.sh" ]; then . "$bin"/../libexec/hcat-config.sh else @@ -34,7 +36,7 @@ function start_hcat() { # back ground the metastore service and record the pid - PID_FILE=${HCAT_PID_DIR}/hcat.pid + PID_FILE=${HCAT_LOG_DIR}/hcat.pid SLEEP_TIME_AFTER_START=15 # check if service is already running, if so exit @@ -45,7 +47,7 @@ exit 1 fi - HIVE_SITE_XML=${HCAT_CONF_DIR}/hive-site.xml + HIVE_SITE_XML=${HIVE_HOME}/conf/hive-site.xml if [ ! -e $HIVE_SITE_XML ] then echo "Missing hive-site.xml, expected at [$HIVE_SITE_XML]"; @@ -72,6 +74,10 @@ AUX_CLASSPATH=${AUX_CLASSPATH}:$f done + for f in ${HCAT_PREFIX}/share/hcatalog/*.jar ; do + AUX_CLASSPATH=${AUX_CLASSPATH}:$f + done + # echo AUX_CLASSPATH = ${AUX_CLASSPATH} export AUX_CLASSPATH=${AUX_CLASSPATH} @@ -80,7 +86,7 @@ export HADOOP_OPTS="${HADOOP_OPTS} -server -XX:+UseConcMarkSweepGC -XX:ErrorFile=${HCAT_LOG_DIR}/hcat_err_pid%p.log -Xloggc:${HCAT_LOG_DIR}/hcat_gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps" export HADOOP_HEAPSIZE=2048 # 8G is better if you have it export METASTORE_PORT=${METASTORE_PORT} - nohup ${HCAT_PREFIX}/bin/hive --service metastore >${HCAT_LOG_DIR}/hcat.out 2>${HCAT_LOG_DIR}/hcat.err & + nohup ${HIVE_HOME}/bin/hive --service metastore >${HCAT_LOG_DIR}/hcat.out 2>${HCAT_LOG_DIR}/hcat.err & PID=$! @@ -105,7 +111,7 @@ function stop_hcat() { SLEEP_TIME_AFTER_KILL=30 - PID_FILE=${HCAT_PID_DIR}/hcat.pid + PID_FILE=${HCAT_LOG_DIR}/hcat.pid echo looking for $PID_FILE # check if service is already running, if so exit