Index: src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GetAbsolutePath.java =================================================================== --- src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GetAbsolutePath.java (revision 0) +++ src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GetAbsolutePath.java (revision 0) @@ -0,0 +1,29 @@ +package org.apache.hcatalog.utils; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +public class GetAbsolutePath extends Configured implements Tool { + public int run(String[] args) throws Exception { + String[] paths = args[0].split(args[1]); + String newPath = ""; + for (int i=0;ishipJarsToHDFS($globalHash, $log); } ############################################################################## @@ -350,4 +352,33 @@ die "Failed running " . join(" ", @$cmd) . "\n"; } +############################################################################## +# Ship necessary Jars to HDFS so we have them for use in the Distributed Cache +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# Nothing +# This method should die with an appropriate error message if there is +# an issue. +sub shipJarsToHDFS($$$) +{ + my ($self, $globalHash, $log) = @_; + my $additionalJars = Util::getHBaseLibs($globalHash, $log); + $additionalJars .= Util::getHiveLibs($globalHash, $log); + $additionalJars .= Util::getHCatLibs($globalHash, $log); + my @jars = split(/:/, $additionalJars); + eval{ + Util::runHadoopCmd($globalHash, $log, "fs -mkdir tmp/cache"); + }; + foreach my $jar (@jars) { + eval { + Util::runHadoopCmd($globalHash, $log, "fs -copyFromLocal " . $jar . " tmp/cache"); + }; + } +} + + 1; Index: src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm =================================================================== --- src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm (revision 1344030) +++ src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm (working copy) @@ -26,6 +26,7 @@ use Digest::MD5 qw(md5_hex); use Util; use File::Path; +use File::Basename; use Cwd; use English; @@ -255,8 +256,18 @@ my $additionalJars = Util::getHBaseLibs($testCmd, $log); #hbase before hive for precedence over bundled hbase $additionalJars .= Util::getHiveLibs($testCmd, $log); $additionalJars .= Util::getHCatLibs($testCmd, $log); - $testCmd->{'libjars'} = $additionalJars; - $testCmd->{'libjars'} =~ s/:/,/g; + my @jars = split(/:/, $additionalJars); + my $hdfsJars = ""; + foreach $jar (@jars) { + my $basename = basename( $jar ); + $hdfsJars .= "tmp/cache/".$basename.","; + } + open(my $fh, ">", "path.out"); + Util::runHadoopCmd($globalHash, $fh, "jar lib/java/testudf.jar org.apache.hcatalog.utils.GetAbsolutePath " . $hdfsJars . " ,"); + $hdfsJars = `tail -1 path.out`; + chomp $hdfsJars; + + $testCmd->{'libjars'} = $hdfsJars; my $hadoopcmd = Util::replaceParameters( $testCmd->{'hadoop'}, $outfile, $testCmd, $log ); # adjust for the leading and trailing new line often seen in the conf file's command directives Index: src/docs/src/documentation/content/xdocs/inputoutput.xml =================================================================== --- src/docs/src/documentation/content/xdocs/inputoutput.xml (revision 1344030) +++ src/docs/src/documentation/content/xdocs/inputoutput.xml (working copy) @@ -169,6 +169,28 @@ <main_class> -libjars $LIB_JARS <program_arguments> +

This works but Hadoop will ship libjars every time you run the mapreduce program, that is not efficient and may deplete Hadoop distributed cache. We can optimize it to ship libjars using hdfs location. By doing this, Hadoop will reuse the entries in the distributed cache.

+ + +bin/hadoop fs -copyFromLocal $HCAT_HOME/share/hcatalog/hcatalog-0.4.0.jar /tmp +bin/hadoop fs -copyFromLocal $HIVE_HOME/lib/hive-metastore-0.9.0.jar /tmp +bin/hadoop fs -copyFromLocal $HIVE_HOME/lib/libthrift-0.7.0.jar /tmp +bin/hadoop fs -copyFromLocal $HIVE_HOME/lib/hive-exec-0.9.0.jar /tmp +bin/hadoop fs -copyFromLocal $HIVE_HOME/lib/libfb303-0.7.0.jar /tmp +bin/hadoop fs -copyFromLocal $HIVE_HOME/lib/jdo2-api-2.3-ec.jar /tmp +bin/hadoop fs -copyFromLocal $HIVE_HOME/lib/slf4j-api-1.6.1.jar /tmp + +export LIB_JARS=hdfs:///tmp/hcatalog-0.4.0.jar, +hdfs:///tmp/hive-metastore-0.9.0.jar, +hdfs:///tmp/libthrift-0.7.0.jar, +hdfs:///tmp/hive-exec-0.9.0.jar, +hdfs:///tmp/libfb303-0.7.0.jar, +hdfs:///tmp/jdo2-api-2.3-ec.jar, +hdfs:///tmp/slf4j-api-1.6.1.jar + +Other statements remains the same. + +

Authentication