Index: test-e2e/tools/test/floatpostprocessor.pl =================================================================== --- test-e2e/tools/test/floatpostprocessor.pl (revision 0) +++ test-e2e/tools/test/floatpostprocessor.pl (revision 0) @@ -0,0 +1,111 @@ +#!/usr/bin/env perl + +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# A simple tool to make sure all floats in the output are written the same way. +# It is assumed that the data in question is being read from stdin. +# +# + +use strict; + +our @floats; +our $delim; + +sub parseLine($) +{ + my $line = shift; + chomp $line; + return split(/$delim/, $line); +} + +sub postprocess($) +{ + my @fields = parseLine(shift); + + for (my $i = 0; $i < @fields; $i++) { + if ($i != 0) { print($delim); } + if ($floats[$i]) { + printf("%.2f", $fields[$i]); + } else { + print($fields[$i]); + } + } + print "\n"; +} + +sub is_float { + my $n = shift; + if(!defined $n || $n eq ""){ + return 0; + } + if($n =~ /^[+-]?\d+\.\d+([eE][-+]?[0-9]+)?$/){ + return 1; + } + + my $abs = abs($n); + if ($abs - int($abs) > 0) { + return 1; + } + return 0; +} + + +# main +{ + $delim = shift; + if (!defined($delim)) { + die "Usage: $0 delimiter\n"; + } + + my @sampled; + my $line; + # read away any empty lines into the sample + do { + $line = ; + push(@sampled, $line); + } while($line && $line =~ /^\s*$/); + # Sample the next thousand lines to figure out which columns have floats. + for (my $i = 0; $i < 1000 && ($line = ); $i++) { + push(@sampled, $line); + } + foreach my $line (@sampled) { + my @fields = parseLine($line); + for (my $j = 0; $j < @fields; $j++) { + if(is_float($fields[$j])){ + $floats[$j] = 1; + } + + + } + } + + # Now, play each of the sampled lines through the postprocessor + foreach my $line (@sampled) { + postprocess($line); + } + + while () { + postprocess($_); + } + +} + + + + Index: test-e2e/tools/generate/generate_data.pl =================================================================== --- test-e2e/tools/generate/generate_data.pl (revision 0) +++ test-e2e/tools/generate/generate_data.pl (revision 0) @@ -0,0 +1,582 @@ +#!/usr/bin/env perl +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A utility to generate test data for pig test harness tests. +# +# + +use strict; +use charnames (); +use Cwd; + +our @firstName = ("alice", "bob", "calvin", "david", "ethan", "fred", + "gabriella", "holly", "irene", "jessica", "katie", "luke", "mike", "nick", + "oscar", "priscilla", "quinn", "rachel", "sarah", "tom", "ulysses", "victor", + "wendy", "xavier", "yuri", "zach"); + +our @lastName = ("allen", "brown", "carson", "davidson", "ellison", "falkner", + "garcia", "hernandez", "ichabod", "johnson", "king", "laertes", "miller", + "nixon", "ovid", "polk", "quirinius", "robinson", "steinbeck", "thompson", + "underhill", "van buren", "white", "xylophone", "young", "zipper"); + +sub randomName() +{ + return sprintf("%s %s", $firstName[int(rand(26))], + $lastName[int(rand(26))]); +} + +our @city = ("albuquerque", "bombay", "calcutta", "danville", "eugene", + "frankfurt", "grenoble", "harrisburg", "indianapolis", + "jerusalem", "kellogg", "lisbon", "marseilles", + "nice", "oklohoma city", "paris", "queensville", "roswell", + "san francisco", "twin falls", "umatilla", "vancouver", "wheaton", + "xacky", "youngs town", "zippy"); + +sub randomCity() +{ + return $city[int(rand(26))]; +} + +our @state = ( "AL", "AK", "AS", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", + "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", + "MA", "MI", "MN", "MS", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", + "ND", "OH", "OK", "OR", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", + "WA", "WV", "WI", "WY"); + +sub randomState() +{ + return $state[int(rand(50))]; +} + +our @classname = ("american history", "biology", "chemistry", "debate", + "education", "forestry", "geology", "history", "industrial engineering", + "joggying", "kindergarten", "linguistics", "mathematics", "nap time", + "opthamology", "philosophy", "quiet hour", "religion", "study skills", + "topology", "undecided", "values clariffication", "wind surfing", + "xylophone band", "yard duty", "zync studies"); + +sub randomClass() +{ + return $classname[int(rand(26))]; +} + +our @grade = ("A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", + "F"); + +sub randomGrade() +{ + return $grade[int(rand(int(@grade)))]; +} + +our @registration = ("democrat", "green", "independent", "libertarian", + "republican", "socialist"); + +sub randomRegistration() +{ + return $registration[int(rand(int(@registration)))]; +} + +sub randomAge() +{ + return (int(rand(60)) + 18); +} + +sub randomGpa() +{ + return rand(4.0); +} + +our @street = ("A", "B", "C", "D", "E", "F", "G", "H", "I", + "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", + "T", "U", "V", "W", "X", "Y", "Z"); + +sub randomStreet() +{ + return sprintf("%d %s st", int(rand(1000)), $street[int(rand(26))]); +} + +sub randomZip() +{ + return int(rand(100000)); +} + +sub randomContribution() +{ + return sprintf("%.2f", rand(1000)); +} + +our @numLetter = ("1", "09", "09a"); + +sub randomNumLetter() +{ + return $numLetter[int(rand(int(@numLetter)))]; +} + +our @greekLetter = ( "alpha", "beta", "gamma", "delta", "epsilon", "zeta", + "eta", "theta", "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", + "pi", "rho", "sigma", "tau", "upsilon", "chi", "phi", "psi", "omega" ); + +sub randomGreekLetter() +{ + return $greekLetter[int(rand(int(@greekLetter)))]; +} + +sub randomNameAgeGpaMap() +{ + my $size = int(rand(3)); + my @mapValues = ( "name#" . randomName(), "age#" . randomAge(), "gpa#" . randomGpa() ); + $size = ($size == 0 ? 1 : $size); + my $map; + for(my $i = 0; $i <= $size; $i++) { + $map .= $mapValues[$i]; + if($i != $size) { + $map .= ","; + } + } + return $map; +} + +sub getMapFields($) { + my $mapString = shift; + # remove the enclosing square brackets + $mapString =~ s/[\[\]]//g; + # get individual map fields + my @fields = split(/,/, $mapString); + # get only the values + my $hash; + for my $field (@fields) { + if($field =~ /(\S+)#(.*)/) { + $hash->{$1} = $2; + } + } + return $hash; +} + +sub randomNameAgeGpaTuple() +{ + my $gpa = sprintf("%0.2f", randomGpa()); + return randomName() . "," . randomAge() . "," . $gpa ; +} + +sub randomList() +{ + my $size = int(rand(int(3))) + 1; + my $bag; + for(my $i = 0; $i <= $size; $i++) { + $bag .= randomAge(); + $bag .= "," if ($i != $size); + } + return $bag; +} + +sub randomEscape() +{ + my $r = rand(1); + if ($r < 0.16) { + return '\"'; + } elsif ($r < 0.32) { + return '\\\\'; + } elsif ($r < 0.48) { + return '\/'; + } elsif ($r < 0.64) { + return '\n'; + } elsif ($r < 0.80) { + return '\t'; + } else { + return randomUnicodeHex(); + } +} + + +sub randomJsonString() +{ + my $r = rand(1); + if ($r < 0.05) { + return "null"; + } elsif ($r < 0.10) { + return '"' . randomName() . randomEscape() . randomName() . '"'; + } else { + return '"' . randomName() . '"'; + } +} + +sub randomNullBoolean() +{ + my $r = rand(1); + if ($r < 0.05) { + return 'null'; + } elsif ($r < 0.525) { + return 'true'; + } else { + return 'false'; + } +} + +sub randomJsonMap() +{ + if (rand(1) < 0.05) { + return 'null'; + } + + my $str = "["; + my $num = rand(5) + 1; + for (my $i = 0; $i < $num; $i++) { + $str .= "," unless $i == 0; + $str .= '"' . randomCity() . '" : "' . randomName() . '"'; + } + $str .= "]"; +} + +sub randomJsonBag() +{ + if (rand(1) < 0.05) { + return 'null'; + } + + my $str = "["; + my $num = rand(5) + 1; + for (my $i = 0; $i < $num; $i++) { + $str .= "," unless $i == 0; + $str .= '{"a":' . int(rand(2**32) - rand(2**31)) . ' "b":' . + randomJsonString() . '}'; + } + $str .= "]"; +} + +sub usage() +{ + warn "Usage: $0 filetype numrows tablename hdfstargetdir\n"; + warn "\tValid filetypes [studenttab, studentparttab, \n"; + warn "\t\tstudentnull, allscalars, studentcomplextab, \n"; + warn "\t\tvoternulltab votertab, unicode, json]\n"; + warn "hdfstargetdir is the directory in hdfs that data will be copied to for loading into tables\n"; +} + +our @greekUnicode = ("\N{U+03b1}", "\N{U+03b2}", "\N{U+03b3}", "\N{U+03b4}", + "\N{U+03b5}", "\N{U+03b6}", "\N{U+03b7}", "\N{U+03b8}", "\N{U+03b9}", + "\N{U+03ba}", "\N{U+03bb}", "\N{U+03bc}", "\N{U+03bd}", "\N{U+03be}", + "\N{U+03bf}", "\N{U+03c0}", "\N{U+03c1}", "\N{U+03c2}", "\N{U+03c3}", + "\N{U+03c4}", "\N{U+03c5}", "\N{U+03c6}", "\N{U+03c7}", "\N{U+03c8}", + "\N{U+03c9}"); + +sub randomUnicodeNonAscii() +{ + my $name = $firstName[int(rand(int(@firstName)))] . + $greekUnicode[int(rand(int(@greekUnicode)))]; + return $name; +} + +sub randomUnicodeHex() +{ + return sprintf "\\u%04x", 0x3b1 + int(rand(25)); +} + +my $testvar = "\N{U+03b1}\N{U+03b3}\N{U+03b1}\N{U+03c0}\N{U+03b7}"; + +sub getBulkCopyCmd($$;$) +{ + my ($tableName, $delimeter, $filename) = @_; + + $filename = $tableName if (!defined($filename)); + + return "load data infile '" . cwd . "/$filename' + into table $tableName + columns terminated by '$delimeter';" +} + + +# main +{ + # explicitly call srand so we get the same data every time + # we generate it. However, we set it individually for each table type. + # Otherwise we'd be generating the same data sets regardless of size, + # and this would really skew our joins. + + my $filetype = shift; + my $numRows = shift; + my $tableName = shift; + my $hdfsTargetDir= shift; + + die usage() if (!defined($filetype) || !defined($numRows) || !defined($tableName) || !defined($hdfsTargetDir)); + + if ($numRows <= 0) { usage(); } + + open(HDFS, "> $tableName") or die("Cannot open file $tableName, $!\n"); + open(MYSQL, "> $tableName.mysql.sql") or die("Cannot open file $tableName.mysql.sql, $!\n"); + open(HIVE, "> $tableName.hive.sql") or die("Cannot open file $tableName.hive.sql, $!\n"); + + if ($filetype eq "studenttab") { + srand(3.14159 + $numRows); + print MYSQL "create table IF NOT EXISTS $tableName (name varchar(100), age integer, gpa float(3));\n"; + print MYSQL &getBulkCopyCmd($tableName, "\t"); + print HIVE "create external table IF NOT EXISTS $tableName( + name string, + age int, + gpa double) + row format delimited + fields terminated by '\\t' + stored as textfile + location '$hdfsTargetDir/$tableName';\n"; + for (my $i = 0; $i < $numRows; $i++) { + my $name = randomName(); + my $age = randomAge(); + my $gpa = randomGpa(); + printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa; + } + + } elsif ($filetype eq "studentparttab") { + srand(3.14159 + $numRows); + print MYSQL "create table IF NOT EXISTS $tableName (name varchar(100), age integer, gpa float(3), ds char(8));\n"; + print MYSQL &getBulkCopyCmd($tableName, "\t", "$tableName.mysql"); + print HIVE "create external table IF NOT EXISTS $tableName( + name string, + age int, + gpa double) + partitioned by (ds string) + row format delimited + fields terminated by '\\t' + stored as textfile + location '$hdfsTargetDir/$tableName'; + alter table $tableName add partition (ds='20110924') location '$hdfsTargetDir/$tableName/$tableName.20110924'; + alter table $tableName add partition (ds='20110925') location '$hdfsTargetDir/$tableName/$tableName.20110925'; + alter table $tableName add partition (ds='20110926') location '$hdfsTargetDir/$tableName/$tableName.20110926'; + "; + open(MYSQLDATA, "> $tableName.mysql") or die("Cannot open file $tableName.mysql, $!\n"); + for (my $ds = 20110924; $ds < 20110927; $ds++) { + close(HDFS); + open(HDFS, "> $tableName.$ds") or die("Cannot open file $tableName.$ds, $!\n"); + for (my $i = 0; $i < $numRows; $i++) { + my $name = randomName(); + my $age = randomAge(); + my $gpa = randomGpa(); + printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa; + printf MYSQLDATA "%s\t%d\t%.3f\t%d\n", $name, $age, $gpa, $ds; + } + } + close(MYSQLDATA); + + } elsif ($filetype eq "studentnull") { + srand(3.14159 + $numRows); + print MYSQL "create table IF NOT EXISTS $tableName (name varchar(100), age integer, gpa float(3));\n"; + print HIVE "create external table IF NOT EXISTS $tableName( + name string, + age int, + gpa double) + row format delimited + fields terminated by '\\001' + stored as textfile + location '$hdfsTargetDir/$tableName';\n"; + for (my $i = 0; $i < $numRows; $i++) { + # generate nulls in a random fashion + my $name = rand(1) < 0.05 ? '' : randomName(); + my $age = rand(1) < 0.05 ? '' : randomAge(); + my $gpa = rand(1) < 0.05 ? '' : randomGpa(); + printf MYSQL "insert into $tableName (name, age, gpa) values("; + print MYSQL ($name eq ''? "null, " : "'$name', "), ($age eq ''? "null, " : "$age, "); + if($gpa eq '') { + print MYSQL "null);\n" + } else { + printf MYSQL "%.2f);\n", $gpa; + } + print HDFS "$name$age"; + if($gpa eq '') { + print HDFS "\n" + } else { + printf HDFS "%.2f\n", $gpa; + } + + } + print MYSQL "commit;\n"; + + } elsif ($filetype eq "allscalars") { + srand(2.718281828459 + $numRows); + print MYSQL "create table IF NOT EXISTS $tableName (t tinyint, si smallint, i int, b + bigint, bool boolean, f float, d double, s varchar(25));\n"; + print MYSQL &getBulkCopyCmd($tableName, ':'); + print HIVE "create external table IF NOT EXISTS $tableName( + t tinyint, + si smallint, + i int, + b bigint, + bool boolean, + f float, + d double, + s string) + row format delimited + fields terminated by ':' + stored as textfile + location '$hdfsTargetDir/$tableName';\n"; + for (my $i = 0; $i < $numRows; $i++) { + printf HDFS "%d:%d:%d:%ld:%s:%.2f:%.2f:%s\n", + (int(rand(2**8) - 2**7)), + (int(rand(2**16) - 2**15)), + (int(rand(2**32) - 2**31)), + (int(rand(2**64) - 2**61)), + rand() >= 0.5 ? "true" : "false", + rand(100000.0) - 50000.0, + rand(10000000.0) - 5000000.0, + randomName(); + } + } elsif ($filetype eq "studentcomplextab") { + srand(3.14159 + $numRows); + print MYSQL "create table IF NOT EXISTS $tableName (nameagegpamap varchar(500), nameagegpatuple varchar(500), nameagegpabag varchar(500), nameagegpamap_name varchar(500), nameagegpamap_age integer, nameagegpamap_gpa float(3));\n"; + print MYSQL "begin transaction;\n"; + print HIVE "create external table IF NOT EXISTS $tableName( + nameagegpamap map, + struct , + array ) + row format delimited + fields terminated by '\\t' + collection items terminated by ',' + map keys terminated by '#' + stored as textfile + location '$hdfsTargetDir/$tableName';\n"; + for (my $i = 0; $i < $numRows; $i++) { + # generate nulls in a random fashion + my $map = rand(1) < 0.05 ? '' : randomNameAgeGpaMap(); + my $tuple = rand(1) < 0.05 ? '' : randomNameAgeGpaTuple(); + my $bag = rand(1) < 0.05 ? '' : randomList(); + printf MYSQL "insert into $tableName (nameagegpamap, nameagegpatuple, nameagegpabag, nameagegpamap_name, nameagegpamap_age, nameagegpamap_gpa) values("; + my $mapHash; + if($map ne '') { + $mapHash = getMapFields($map); + } + + print MYSQL ($map eq ''? "null, " : "'$map', "), + ($tuple eq ''? "null, " : "'$tuple', "), + ($bag eq '' ? "null, " : "'$bag', "), + ($map eq '' ? "null, " : (exists($mapHash->{'name'}) ? "'".$mapHash->{'name'}."', " : "null, ")), + ($map eq '' ? "null, " : (exists($mapHash->{'age'}) ? "'".$mapHash->{'age'}."', " : "null, ")), + ($map eq '' ? "null);\n" : (exists($mapHash->{'gpa'}) ? "'".$mapHash->{'gpa'}."');\n" : "null);\n")); + print HDFS "$map\t$tuple\t$bag\n"; + } + print MYSQL "commit;\n"; + + } elsif ($filetype eq "votertab") { + srand(299792458 + $numRows); + print MYSQL "create table IF NOT EXISTS $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n"; + print MYSQL &getBulkCopyCmd($tableName, "\t"); + print HIVE "create external table IF NOT EXISTS $tableName( + name string, + age int, + registration string, + contributions float) + row format delimited + fields terminated by '\\t' + stored as textfile + location '$hdfsTargetDir/$tableName';\n"; +for (my $i = 0; $i < $numRows; $i++) { + my $name = randomName(); + my $age = randomAge(); + my $registration = randomRegistration(); + my $contribution = randomContribution(); + printf HDFS "%s\t%d\t%s\t%.2f\n", $name, $age, + $registration, $contribution; + } + + } elsif ($filetype eq "voternulltab") { + srand(299792458 + $numRows); + print MYSQL "create table IF NOT EXISTS $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n"; + print MYSQL "begin transaction;\n"; + print HIVE "create external table IF NOT EXISTS $tableName( + name string, + age int, + registration string, + contributions float) + row format delimited + fields terminated by '\\t' + stored as textfile + location '$hdfsTargetDir/$tableName';\n"; + for (my $i = 0; $i < $numRows; $i++) { + # generate nulls in a random fashion + my $name = rand(1) < 0.05 ? '' : randomName(); + my $age = rand(1) < 0.05 ? '' : randomAge(); + my $registration = rand(1) < 0.05 ? '' : randomRegistration(); + my $contribution = rand(1) < 0.05 ? '' : randomContribution(); + printf MYSQL "insert into $tableName (name, age, registration, contributions) values("; + print MYSQL ($name eq ''? "null, " : "'$name', "), + ($age eq ''? "null, " : "$age, "), + ($registration eq ''? "null, " : "'$registration', "); + if($contribution eq '') { + print MYSQL "null);\n" + } else { + printf MYSQL "%.2f);\n", $contribution; + } + print HDFS "$name\t$age\t$registration\t"; + if($contribution eq '') { + print HDFS "\n" + } else { + printf HDFS "%.2f\n", $contribution; + } + } + print MYSQL "commit;\n"; + + } elsif ($filetype eq "unicode") { + srand(1.41421 + $numRows); + print MYSQL "create table IF NOT EXISTS $tableName (name varchar(255));\n"; + print MYSQL "begin transaction;\n"; + print HIVE "create external table IF NOT EXISTS $tableName( + name string) + row format delimited + fields terminated by '\\t' + stored as textfile + location '$hdfsTargetDir/$tableName';\n"; + for (my $i = 0; $i < $numRows; $i++) { + my $name = randomUnicodeNonAscii(); + printf MYSQL "insert into $tableName (name) values('%s');\n", $name; + printf HDFS "%s\n", $name; + } + print MYSQL "commit;\n"; + } elsif ($filetype eq "json") { + srand(6.0221415 + $numRows); + print MYSQL "create table IF NOT EXISTS $tableName (s varchar(200), + i int, d double, b boolean, m varchar(2048), + bb varchar(2048));\n"; + print MYSQL "begin transaction;\n"; + print HIVE "create external table IF NOT EXISTS $tableName( + s string, + i int, + d double, + b boolean, + m map, + bb array>) + stored as textfile + location '$hdfsTargetDir/$tableName' + TBLPROPERTIES ( + 'hcat.isd'='org.apache.hcatalog.json.JsonInputDriver', + 'hcat.osd'='org.apache.hcatalog.json.JsonOutputDriver' + );\n"; + for (my $i = 0; $i < $numRows; $i++) { + my $s = randomJsonString(); + my $i = rand(1) < 0.05 ? 'null' : (int(rand(2**32) - 2**31)), + my $d = rand(1) < 0.05 ? 'null' : (rand(2**10) - 2**9), + my $b = randomNullBoolean(); + my $m = randomJsonMap(); + my $bb = randomJsonBag(); + +# printf MYSQL "insert into $tableName (name) values('%s');\n", $name; + print HDFS qq@{"s":$s, "i":$i, "d":$d, "b":$b, "m":$m, "bb":$bb}\n@ + } + print MYSQL "commit;\n"; + + } else { + warn "Unknown filetype $filetype\n"; + usage(); + } +} + + Property changes on: test-e2e/tools/generate/generate_data.pl ___________________________________________________________________ Added: svn:executable + * Index: test-e2e/tests/cmdline.conf =================================================================== --- test-e2e/tests/cmdline.conf (revision 0) +++ test-e2e/tests/cmdline.conf (revision 0) @@ -0,0 +1,56 @@ +#!/usr/bin/env perl + +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### +# Nightly tests for hive. +# +# + +$cfg = { + 'driver' => 'HiveCmdLine', + 'groups' => [ { + 'name' => 'Example', + 'tests' => [ { + 'num' => 1, + 'sql' => "describe studenttab10k;", + 'rc' => 0, + # Don't mess with the tabs in the next few lines, they're important + 'expected_out' => 'name string +age int +gpa double +', + 'not_expected_regex_err' => 'FAILED' + }, { + 'num' => 2, + 'hivecmdargs' => ['-e', 'show tables;'], + 'expected_regex_out' => 'studenttab10k', + 'rc' => 0, + }, { + 'num' => 3, + 'sql' => "describe nosuchtable;", + 'rc' => 0, + 'expected_regex_out' => 'Table nosuchtable does not exist', + }, + ] + } + ], +}, +; + + + Index: test-e2e/tests/nightly.conf =================================================================== --- test-e2e/tests/nightly.conf (revision 0) +++ test-e2e/tests/nightly.conf (revision 0) @@ -0,0 +1,1018 @@ +#!/usr/bin/env perl + +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### +# Nightly tests for hive. +# +# + +$cfg = { + 'driver' => 'Hive', + 'groups' => [ { + 'name' => 'Checkin', + 'tests' => [ { + 'num' => 1, + 'sql' => q\select * from studenttab10k;\, + 'floatpostprocess' => 1, + 'delimiter' => ' ', + }, { + 'num' => 2, + 'sql' => q\select registration, sum(contributions) s + from studenttab10k s join votertab10k v + on (s.name = v.name and s.age = v.age) + where s.age < 50 and v.age < 50 + group by registration + order by s\, + 'sortArgs' => ['-t', ' ', '+1', '-2'], + 'floatpostprocess' => 1, + 'delimiter' => ' ', + }, { + 'num' => 3, + 'sql' => q\drop table if exists checkin_3; + create table checkin_3 as + select name, count(1) + from studenttab10k + group by name + having count(1) > 5\, + 'result_table' => 'checkin_3', + 'verify_sql' =>q\select name, count(1) + from studenttab10k + group by name + having count(1) > 5\, + }, { + 'num' => 4, + 'sql' => q\select avg(gpa) average + from studentparttab30k + where age > 50\, + 'floatpostprocess' => 1, + 'delimiter' => ' ', + } + ] + },{ + 'name' => 'SelectExpression', + 'tests' => [ { + 'num' => 1, + 'sql' => "select t + 10, t - 10, t * 10, t / 10, t % 10 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 2, + 'sql' => "select t + 10.10, t - 10.10, t * 10.10, t / 10.0 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 3, + 'sql' => "select si + 10, si - 10, si * 10, si / 10, si % 10 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 4, + 'sql' => "select si + 10.10, si - 10.10, si * 10.10, si / 10.0 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 5, + 'sql' => "select i + 10, i - 10, i * -1, i / 10, i % 10 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 6, + 'sql' => "select i + 10.10, i - 10.10, i * 10.10, i / 10.0 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ # division removed because I can't get hive and mysql to do their + # floating point arithmetic in the same way. + 'num' => 7, + 'sql' => "select b + 10, b - 10, b * -1, b % 10 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', +# },{ has floating point precision issues +# 'num' => 8, +# 'sql' => "select b + 10.10, b - 10.10, b * 10.10, b / 10.0 +# from all100k;", +# 'floatpostprocess' => 1, +# 'delimiter' => ' ', + },{ + 'num' => 9, + 'sql' => "select f + 10, f - 10, f * 1.01, f / 10 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 10, + 'sql' => "select f + 10.10, f - 10.10, f * 10.10, f / 10.0 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 11, + 'sql' => "select d + 10, d - 10, d * 10, d / 10 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 12, + 'sql' => "select d + 10.10, d - 10.10, d * 1.01, d / 10.0 + from all100k;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + }, + ] + },{ + 'name' => 'WhereExpression', + 'tests' => [ { + 'num' => 1, + 'sql' => "select t + from all100k + where t = -91 and si = -19299 and i = -1591211872 + and b = -4485904205832126464 and bool = false + and s = 'katie young';", + },{ + 'num' => 2, + 'sql' => "select t + from all100k + where f = 48308.98;" + },{ + 'num' => 3, + 'sql' => "select t + from all100k + where d = -2806609.87;", + },{ + 'num' => 4, + 'sql' => "select t + from all100k + where t = 87 or si = 4931;", + },{ + 'num' => 5, + 'sql' => "select t + from all100k + where i <> 1096589477;", + },{ + 'num' => 6, + 'sql' => "select t + from all100k + where t > 0 and si > 0 and i > 0 and b > 0 and f > 0.0 and + d > 0.0 and s > 'm';", + },{ + 'num' => 7, + 'sql' => "select t + from all100k + where t >= 0 and si >= 0 and i >= 0 and b >= 0 and f >= 0.0 + and d >= 0.0 and s >= 'm';", + },{ + 'num' => 8, + 'sql' => "select t + from all100k + where t < 0 and si < 0 and i < 0 and b < 0 and f < 0.0 and + d < 0.0 and s < 'm';", + },{ + 'num' => 9, + 'sql' => "select t + from all100k + where t <= 0 and si <= 0 and i <= 0 and b <= 0 and f <= 0.0 + and d <= 0.0 and s <= 'm';", + },{ + 'num' => 10, + 'sql' => "select name + from studentnull10k + where age is null;", + 'nullpostprocess' => 1, + },{ + 'num' => 11, + 'sql' => "select name + from studentnull10k + where age is not null;", + 'nullpostprocess' => 1, + },{ + 'num' => 12, + 'sql' => "select age + from studenttab10k + where name like '_uke king';", + },{ + 'num' => 13, + 'sql' => "select age + from studenttab10k + where name like '% king';", + },{ + 'num' => 14, + 'sql' => "select age + from studenttab10k + where name regexp '.* king';", + },{ + 'num' => 15, + 'sql' => "select age + from studenttab10k + where name rlike '.* king';", + 'verify_sql' =>"select age + from studenttab10k + where name regexp '.* king';", + } + ] + },{ + 'name' => 'WherePartition', + 'tests' => [ { + 'num' => 1, + 'sql' => "select name + from studentparttab30k + where ds = '20110924';", + },{ + 'num' => 2, + 'sql' => "select name + from studentparttab30k + where ds = '20110924' or ds = '20110925';", + },{ + 'num' => 3, + 'sql' => "select name + from studentparttab30k + where ds > '20110924';", + },{ + 'num' => 4, + 'sql' => "select name + from studentparttab30k + where ds >= '20110924';", + },{ + 'num' => 5, + 'sql' => "select name + from studentparttab30k + where ds <= '20110926';", + },{ + 'num' => 6, + 'sql' => "select name + from studentparttab30k + where ds < '20110926';", + },{ + 'num' => 7, + 'sql' => "select name + from studentparttab30k + where ds <> '20110926';", + } + ] + },{ + 'name' => 'Distinct', + 'tests' => [ { + 'num' => 1, + 'sql' => "select distinct name + from studenttab10k;", + },{ + 'num' => 2, + 'sql' => "select distinct name, age + from studenttab10k;", + } + ] + },{ + 'name' => 'All', + 'tests' => [ { + 'num' => 1, + 'sql' => "select all name + from studenttab10k;", + } + ] + },{ + 'name' => 'Join', + 'tests' => [ { + 'num' => 1, + 'sql' => "select registration + from studenttab10k s join votertab10k v + on (s.name = v.name);", + },{ + 'num' => 2, + 'sql' => "select registration + from studenttab10k s join votertab10k v + on (s.name = v.name and s.age = v.age);", + },{ + 'num' => 3, + 'sql' => "select registration + from studenttab10k s join votertab10k v + on (s.name = v.name) join studentparttab30k p + on (p.name = v.name) + where s.age < 25 and v.age < 25 and p.age < 25;", + },{ + 'num' => 4, + 'sql' => "select registration + from studenttab10k s left outer join votertab10k v + on (s.name = v.name);", + 'nullpostprocess' => 1, + },{ + 'num' => 5, + 'sql' => "select registration + from studenttab10k s right outer join votertab10k v + on (s.name = v.name);", + 'nullpostprocess' => 1, +# },{ +# 'num' => 6, +# 'sql' => "select registration +# from studenttab10k s full outer join votertab10k v +# on (s.name = v.name);", +# 'nullpostprocess' => 1, +# 'verify_sql' => "select registration +# from studenttab10k s left join votertab10k v +# on (s.name = v.name) +# union all +# select registration +# from votertab10k v left join studenttab10k s +# on (s.name = v.name);", + },{ + 'num' => 7, + 'sql' => "select registration + from studenttab10k s join votertab10k v + where s.age < 25 and v.age < 25;", + } + ] + },{ + 'name' => 'GroupBy', + 'tests' => [ { + 'num' => 1, + 'sql' => "select count(*) + from studentparttab30k;", + },{ + 'num' => 2, + 'sql' => "select name, count(*) + from studenttab10k + group by name;", + },{ + 'num' => 3, + 'sql' => "select name, avg(age) + from studentparttab30k + group by name;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 4, + 'sql' => "select name, sum(contributions) + from votertab10k + group by name;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 5, + 'sql' => "select name, age, max(contributions) + from votertab10k + where registration = 'democrat' + group by name, age;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 6, + 'sql' => "select name, min(contributions) + from votertab10k + where registration = 'green' + group by name;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 7, + 'sql' => "select name, min(age) + from votertab10k + group by name;", + },{ + 'num' => 8, + 'sql' => "select name, max(age) + from votertab10k + group by name;", + },{ + 'num' => 9, + 'sql' => "select age, max(name) + from votertab10k + group by age;", + },{ + 'num' => 10, + 'sql' => "select age, min(name) + from votertab10k + group by age;", + },{ + 'num' => 11, + 'sql' => "select registration, sum(contributions) + from studenttab10k s join votertab10k v + on (s.name = v.name) + group by registration;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + } + ] + },{ + 'name' => 'GroupByDistinct', + 'tests' => [ { + 'num' => 1, + 'sql' => "select name, count(distinct registration) + from votertab10k + group by name;", + },{ + 'num' => 2, + 'sql' => "select name, count(distinct registration), count(age) + from votertab10k + group by name;", + },{ + 'num' => 3, + 'sql' => "select name, count(distinct registration), count(distinct age) + from votertab10k + group by name;", + },{ + 'num' => 4, + 'sql' => "select s.name, count(distinct registration) + from studenttab10k s join votertab10k v + on (s.name = v.name) + group by s.name;", + } + ] + },{ + 'name' => 'Having', + 'tests' => [ { + 'num' => 1, + 'sql' => "select name, sum(age) + from votertab10k + group by name + having sum(age) > 1000;", + },{ + 'num' => 2, + 'sql' => "select age + from votertab10k + group by age + having sum(age) > 1000;", + },{ + 'num' => 3, + 'sql' => "select age, count(distinct name) + from votertab10k + group by age + having count(distinct name) > 50;", + },{ + 'num' => 4, + 'sql' => "select registration, sum(contributions) + from studenttab10k s join votertab10k v + on (s.name = v.name) + group by registration + having sum(contributions) > 100.0;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + } + ] + },{ + 'name' => 'OrderBy', + 'tests' => [ { + 'num' => 1, + 'sql' => "select name + from studenttab10k + order by name;", + 'sortArgs' => ['-t', ' ', '+0', '-1'], + },{ + 'num' => 2, + 'sql' => "select age + from studenttab10k + order by age;", + 'sortArgs' => ['-t', ' ', '+0n', '-1'], + },{ + 'num' => 3, + 'sql' => "select gpa + from studenttab10k + order by gpa;", + 'sortArgs' => ['-t', ' ', '+0n', '-1'], + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 4, + 'sql' => "select age + from studentnull10k + order by age;", + 'sortArgs' => ['-t', ' ', '+0n', '-1'], + },{ + 'num' => 5, + 'sql' => "select t + from all100k + order by t;", + 'sortArgs' => ['-t', ' ', '+0n', '-1'], + },{ + 'num' => 6, + 'sql' => "select si + from all100k + order by si;", + 'sortArgs' => ['-t', ' ', '+0n', '-1'], + },{ + 'num' => 7, + 'sql' => "select b + from all100k + order by b;", + 'sortArgs' => ['-t', ' ', '+0n', '-1'], + },{ +# 'num' => 8, bools not loaded in mysql correctly +# 'sql' => "select bool +# from all100k +# order by bool;", +# },{ + 'num' => 9, + 'sql' => "select d + from all100k + order by d;", + 'sortArgs' => ['-t', ' ', '+0n', '-1'], + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 10, + 'sql' => "select name, count(*) cnt + from studenttab10k + group by name + order by cnt;", + 'sortArgs' => ['-t', ' ', '+1n', '-2'], + },{ + 'num' => 11, + 'sql' => "select name, age + from studenttab10k + order by name, age;", + 'sortArgs' => ['-t', ' ', '+0', '-1', '+1n', '-2'], + },{ + 'num' => 12, + 'sql' => "select name + from studenttab10k + order by name desc;", + 'sortArgs' => ['-t', ' ', '-r', '+0', '-1'], + },{ + 'num' => 13, + 'sql' => "select age + from studenttab10k + order by age desc;", + 'sortArgs' => ['-t', ' ', '+0rn', '-1'], + },{ + 'num' => 14, + 'sql' => "select gpa + from studenttab10k + order by gpa desc;", + 'sortArgs' => ['-t', ' ', '+0rn', '-1'], + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 15, + 'sql' => "select age + from studentnull10k + order by age desc;", + 'sortArgs' => ['-t', ' ', '+0rn', '-1'], + },{ + 'num' => 16, + 'sql' => "select t + from all100k + order by t desc;", + 'sortArgs' => ['-t', ' ', '+0rn', '-1'], + },{ + 'num' => 17, + 'sql' => "select si + from all100k + order by si desc;", + 'sortArgs' => ['-t', ' ', '+0rn', '-1'], + },{ + 'num' => 18, + 'sql' => "select b + from all100k + order by b desc;", + 'sortArgs' => ['-t', ' ', '+0rn', '-1'], + },{ +# 'num' => 19, bools not loaded into mysql correctly +# 'sql' => "select bool +# from all100k +# order by bool desc;", +# },{ + 'num' => 20, + 'sql' => "select d + from all100k + order by d desc;", + 'sortArgs' => ['-t', ' ', '+0rn', '-1'], + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 21, + 'sql' => "select name, age + from studenttab10k + order by name, age desc;", + 'sortArgs' => ['-t', ' ', '+0', '-1', '+1nr', '-2'], + },{ + 'num' => 22, + 'sql' => "select name, age + from studenttab10k + order by name desc, age;", + 'sortArgs' => ['-t', ' ', '+0r', '-1', '+1n', '-2'], + },{ + 'num' => 23, + 'sql' => "select name, age + from studenttab10k + order by name desc, age desc;", + 'sortArgs' => ['-t', ' ', '+0r', '-1', '+1rn', '-2'], + },{ + 'num' => 24, + 'sql' => "select registration, s.name + from studenttab10k s join votertab10k v + on (s.name = v.name) + order by s.name;", + 'sortArgs' => ['-t', ' ', '+1', '-2'], + } + ] + },{ + 'name' => 'Insert', + 'tests' => [ { + 'num' => 1, # insert map only + 'sql' => "drop table if exists insert_1; + create table insert_1 ( + name string, + age int) + row format delimited + fields terminated by '\\t' + stored as textfile; + insert overwrite table insert_1 + select name, age + from studenttab10k + where age > 50;", + 'result_table' => 'insert_1', + 'verify_sql' =>"select name, age + from studenttab10k + where age > 50;", + },{ + 'num' => 2, # insert reduce side + 'sql' => "drop table if exists insert_2; + create table insert_2 ( + name string, + avgage double) + row format delimited + fields terminated by '\\t' + stored as textfile; + insert overwrite table insert_2 + select name, avg(age) as avgage + from studenttab10k + group by name;", + 'result_table' => 'insert_2', + 'floatpostprocess' => 1, + 'delimiter' => ' ', + 'verify_sql' =>"select name, avg(age) + from studenttab10k + group by name;", +# },{ # Commented out until we switch to Hive 0.8 +# 'num' => 3, # insert map only overwrite +# 'sql' => "create table if not exists insert_3 ( +# name string, +# age int) +# row format delimited +# fields terminated by '\\t' +# stored as textfile; +# insert into table insert_3 +# select name, age +# from studenttab10k +# where age > 50; +# insert overwrite table insert_3 +# select name, age +# from studenttab10k +# where age > 30;", +# 'result_table' => 'insert_3', +# 'verify_sql' =>"select name, age +# from studenttab10k +# where age > 30;", +# },{# Commented out until we switch to Hive 0.8 +# 'num' => 4, # insert reduce side overwrite +# 'sql' => "create table if not exists insert_4 ( +# name string, +# age double) +# row format delimited +# fields terminated by '\\t' +# stored as textfile; +# insert into table insert_4 +# select name, avg(age) as avgage +# from studenttab10k +# group by name; +# insert overwrite table insert_4 +# select name, avg(contributions) +# from votertab10k +# group by name;", +# 'result_table' => 'insert_4', +# 'verify_sql' =>"select name, avg(contributions) +# from votertab10k +# group by name;", + },{ + 'num' => 5, # insert partition + 'sql' => "drop table if exists insert_5; + create table insert_5 ( + name string, + age int) + partitioned by (ds string) + row format delimited + fields terminated by '\\t' + stored as textfile; + insert overwrite table insert_5 partition (ds='20110924') + select name, age + from studentparttab30k + where ds = '20110924' + order by name;", + 'result_table' => 'insert_5', + 'verify_sql' =>"select name, age, ds + from studentparttab30k + where ds = '20110924';", +# },{# Commented out until we switch to Hive 0.8 +# 'num' => 6, # insert partition overwrite +# 'sql' => "create table if not exists insert_6 ( +# name string, +# age int) +# partitioned by (ds string) +# row format delimited +# fields terminated by '\\t' +# stored as textfile; +# insert into table insert_6 partition (ds='20110925') +# select name, age +# from studenttab10k +# order by name; +# insert overwrite table insert_6 partition (ds='20110925') +# select name, age +# from studentparttab30k +# where ds = '20110925' +# order by name;", +# 'result_table' => 'insert_6', +# 'verify_sql' =>"select name, age, ds +# from studentparttab30k +# where ds = '20110925';", + },{ + 'num' => 7, # insert multiple partitions + 'sql' => "drop table if exists insert_7; + create table insert_7 ( + name string, + age int) + partitioned by (ds string) + row format delimited + fields terminated by '\\t' + stored as textfile; + insert overwrite table insert_7 partition (ds) + select name, age, ds + from studentparttab30k + order by name;", + 'result_table' => 'insert_7', + 'verify_sql' =>"select name, age, ds + from studentparttab30k;", + 'hiveconf' => [ "hive.exec.dynamic.partition.mode=nonstrict", + "hive.exec.dynamic.partition=true"], + } + ] + },{ + 'name' => 'MultiInsert', + 'tests' => [ { + 'num' => 1, # insert map only + 'sql' => "drop table if exists multi_insert_1_1; + drop table if exists multi_insert_1_2; + drop table if exists multi_insert_1_3; + + create table multi_insert_1_1 ( + name string, + ds string) + row format delimited + fields terminated by '\\t' + stored as textfile; + + create table multi_insert_1_2 ( + name string, + ds string) + row format delimited + fields terminated by '\\t' + stored as textfile; + + create table multi_insert_1_3 ( + name string, + ds string) + row format delimited + fields terminated by '\\t' + stored as textfile; + + from studentparttab30k + insert overwrite table multi_insert_1_1 + select name, ds + where ds = '20110924' + + insert overwrite table multi_insert_1_2 + select name, ds + where ds = '20110925' + + insert overwrite table multi_insert_1_3 + select name, ds + where ds = '20110926';", + 'result_table' => ['multi_insert_1_1', + 'multi_insert_1_2', + 'multi_insert_1_3'], + 'verify_sql' =>["select name, ds + from studentparttab30k + where ds = '20110924';", + "select name, ds + from studentparttab30k + where ds = '20110925';", + "select name, ds + from studentparttab30k + where ds = '20110926';"] + },{ + 'num' => 2, # insert reduce side + 'sql' => "drop table if exists multi_insert_2_1; + drop table if exists multi_insert_2_2; + drop table if exists multi_insert_2_3; + + create table multi_insert_2_1 ( + name string, + avgage double) + row format delimited + fields terminated by '\\t' + stored as textfile; + + create table multi_insert_2_2 ( + name string, + age int, + sumgpa double) + row format delimited + fields terminated by '\\t' + stored as textfile; + + create table multi_insert_2_3 ( + name string, + distage bigint) + row format delimited + fields terminated by '\\t' + stored as textfile; + + from studenttab10k + insert overwrite table multi_insert_2_1 + select name, avg(age) as avgage + group by name + + insert overwrite table multi_insert_2_2 + select name, age, sum(gpa) as sumgpa + group by name, age + + insert overwrite table multi_insert_2_3 + select name, count(distinct age) as distage + group by name; + ", + 'result_table' => ['multi_insert_2_1', + 'multi_insert_2_2', + 'multi_insert_2_3'], + 'floatpostprocess' => 1, + 'delimiter' => ' ', + 'verify_sql' =>["select name, avg(age) + from studenttab10k + group by name;", + "select name, age, sum(gpa) + from studenttab10k + group by name, age;", + "select name, count(distinct age) + from studenttab10k + group by name;"], + },{ + 'num' => 3, # partition + 'sql' => "drop table if exists multi_insert_3; + + create table multi_insert_3 ( + name string) + partitioned by (ds string) + row format delimited + fields terminated by '\\t' + stored as textfile; + + from studentparttab30k + insert overwrite table multi_insert_3 + partition (ds = '20110924') + select name + where ds = '20110924' + + insert overwrite table multi_insert_3 + partition (ds = '20110925') + select name + where ds = '20110925' + + insert overwrite table multi_insert_3 + partition (ds = '20110926') + select name + where ds = '20110926';", + 'result_table' => 'multi_insert_3', + 'verify_sql' =>"select name, ds + from studentparttab30k;", + } + ] + },{ + 'name' => 'Subquery', + 'tests' => [ { + 'num' => 1, + 'sql' => "select name, age + from (select name, age, gpa + from studenttab10k + union all + select name, age, gpa + from studentparttab30k + where ds = '20110924') t1 + where age < 25;", + 'verify_sql' => "select name, age + from studenttab10k + where age < 25 + union all + select name, age + from studentparttab30k + where ds = '20110924' and age < 25;" + }, + ] + },{ + 'name' => 'Limit', + 'tests' => [ { + 'num' => 1, + 'sql' => "select name + from studentparttab30k + where ds = '20110924' + limit 5 ;", + },{ + 'num' => 2, #This test fails. Need to investigate more + 'sql' => "select name,age + from studenttab10k + order by name desc, age limit 10; ", + } + ] + },{ + 'name' => 'SortBy', + 'tests' => [ { + 'num' => 1, + 'sql' => "select name + from studenttab10k + sort by name;", + 'verify_sql' =>"select name + from studenttab10k + order by name;", + } + ] + },{ + 'name' => 'SelectRegex', + 'tests' => [ { + 'num' => 1, + 'sql' => "select `a[g]+e` + from studenttab10k + order by age;", + 'verify_sql' => "select age + from studenttab10k + order by age;", + },{ + 'num' => 2, + 'sql' => "select `n.*` + from studenttab10k + order by name;", + 'verify_sql' => "select name + from studenttab10k + order by name;", + },{ + 'num' => 3, + 'sql' => "select `(n|a)+.+` + from studenttab10k + order by name;", + 'verify_sql' => "select name, age + from studenttab10k + order by name;", + },{ + 'num' => 4, + 'sql' => "select `[l-o]+.+` + from studenttab10k + order by name;", + 'verify_sql' => "select name + from studenttab10k + order by name;", + },{ + 'num' => 5, + 'sql' => "select `(n|a)?.+e` + from studenttab10k + order by name;", + 'verify_sql' => "select name,age + from studenttab10k + order by name;", + } + ] + } + # Need to test multiple insert - Need harness enhancements + # Need to test insert into directory - Need harness enhancements + # Need to test casts + # Need to test all built in expressions and UDF (see https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF) + # Need to test xpath functionality + # Need to test regular expression based projection + # Need to test semi joins - Mysql doesn't support, how do I express semi-join? + # Need to test map side group by + # Need to test limit + # Need to test sort by + # Need to test distribute by + # Need to test cluster by + # Need to test transforms + # Need to test lateral transforms + # Need to test subqueries + ], +}, +; + + + Index: test-e2e/conf/testpropertiesfile.conf =================================================================== --- test-e2e/conf/testpropertiesfile.conf (revision 0) +++ test-e2e/conf/testpropertiesfile.conf (revision 0) @@ -0,0 +1,21 @@ +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +$cfg = { + 'harness.log' => './out/harness.log' + ,'harness.log.level' => 'DEBUG' + ,'harness.console.level'=> 'INFO' +}; Index: test-e2e/conf/existing_deployer.conf =================================================================== --- test-e2e/conf/existing_deployer.conf (revision 0) +++ test-e2e/conf/existing_deployer.conf (revision 0) @@ -0,0 +1,39 @@ +#!/usr/bin/env perl + +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### +# Test for TestHarness itself. +# +# + +$cfg = { + 'deployer' => 'HiveExistingClusterDeployer', + + # hadoop values + 'hadoopconfdir' => $ENV{'PH_CLUSTER_CONF'}, + 'hadoopbin' => $ENV{'PH_CLUSTER_BIN'}, + 'load_hive_only' => $ENV{'PH_LOAD_HIVE_ONLY'}, + # generate values + 'gentool' => './libexec/generate_data.pl', + + # hive values +} +; + + + Index: test-e2e/conf/default.conf =================================================================== --- test-e2e/conf/default.conf (revision 0) +++ test-e2e/conf/default.conf (revision 0) @@ -0,0 +1,64 @@ +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +my $me = `whoami`; +chomp $me; + +# The contents of this file can be rewritten to fit your installation. +# Also, you can define the following environment variables and set things up as in the test setup +# PH_ROOT Root directory where test harness is installed +# PH_LOCAL Root directory for input and output for local mode tests +# PH_OUT Root directory where output data will be stored (on local disk, not HDFS) +# PH_HOST Host for the metadata server +# PH_PORT Port for the metadata server +# PH_PASSWD Password for the metadata db +# PH_THRIFT URI for metadata thrift server + +$cfg = { + #HIVE + 'hive_data_dir' => '/user/hive/tests/data', + 'metastore_host' => $ENV{'PH_METASTORE_HOST'}, + 'metastore_port' => $ENV{'PH_METASTORE_PORT'}, + 'metastore_db' => 'hivemetastoredb', + 'metastore_driver' => 'com.mysql.jdbc.Driver', + 'metastore_user' => 'hive', + 'metastore_passwd' => $ENV{'PH_METASTORE_PASSWD'}, + 'metastore_thrift' => $ENV{'PH_METASTORE_THRIFT'}, + 'hivehome' => $ENV{'PH_HIVE_HOME'} + + #LOCAL + , 'localinpathbase' => "$ENV{PH_LOCAL}/in" + , 'localoutpathbase' => "$ENV{PH_LOCAL}/out/log" + , 'localpathbase' => "$ENV{PH_LOCAL}/out/pigtest/$me" + + #TEST + , 'benchmarkPath' => "$ENV{PH_OUT}/benchmarks", + 'resultsPath' => "$ENV{PH_OUT}/results", + + # TESTDB + 'dbuser' => 'hivetest', + 'dbhost' => 'localhost', + 'dbpasswd' => 'hivetest', + 'dbdb' => 'hivetestdb', + + , 'userhomePath' => "$ENV{HOME}" + ,'local.bin' => '/usr/bin' + + ,'logDir' => "$ENV{PH_OUT}/log" + ,'propertiesFile' => "./conf/testpropertiesfile.conf" + ,'harness.console.level' => 'ERROR' + +}; Index: test-e2e/deployers/HiveExistingClusterDeployer.pm =================================================================== --- test-e2e/deployers/HiveExistingClusterDeployer.pm (revision 0) +++ test-e2e/deployers/HiveExistingClusterDeployer.pm (revision 0) @@ -0,0 +1,325 @@ +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package HiveExistingClusterDeployer; + +use IPC::Run qw(run); +use TestDeployer; +use Util; + +use strict; +use English; + +our @ISA = "TestDeployer"; + +########################################################################### +# Class: HiveExistingClusterDeployer +# Deploy the Pig harness to a cluster and database that already exists. + +############################################################################## +# Sub: new +# Constructor +# +# Paramaters: +# None +# +# Returns: +# None. +sub new +{ + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = {}; + + bless($self, $class); + + return $self; +} + +############################################################################## +# Sub: checkPrerequisites +# Check any prerequisites before a deployment is begun. For example if a +# particular deployment required the use of a database system it could +# check here that the db was installed and accessible. +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# None +# +sub checkPrerequisites +{ + my ($self, $cfg, $log) = @_; + + if (! defined $ENV{'HADOOP_HOME'} || $ENV{'HADOOP_HOME'} eq "") { + print $log "You must set the environment variable HADOOP_HOME"; + die "HADOOP_HOME not defined"; + } + + # Set up values for the metastore + Util::setupHiveProperties($cfg, $log); + # Run a quick and easy Hadoop command to make sure we can + Util::runHadoopCmd($cfg, $log, "fs -ls /"); + +} + +############################################################################## +# Sub: deploy +# Deploy any required packages +# This is a no-op in this case because we're assuming both the cluster and the +# database already exist +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# None +# +sub deploy +{ +} + +############################################################################## +# Sub: start +# Start any software modules that are needed. +# This is a no-op in this case because we're assuming both the cluster and the +# database already exist +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# None +# +sub start +{ +} + +############################################################################## +# Sub: generateData +# Generate any data needed for this test run. +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# None +# +sub generateData +{ + my ($self, $cfg, $log) = @_; + my @tables = ( + { + 'name' => "studenttab10k", + 'filetype' => "studenttab", + 'rows' => 10000, + 'hdfs' => "studenttab10k", + }, { + 'name' => "votertab10k", + 'filetype' => "votertab", + 'rows' => 10000, + 'hdfs' => "votertab10k", + }, { + 'name' => "studentparttab30k", + 'filetype' => "studentparttab", + 'rows' => 10000, + 'hdfs' => "studentparttab30k", + 'partitions' => ['20110924', '20110925', '20110926'] + },{ + 'name' => "studentnull10k", + 'filetype' => "studentnull", + 'rows' => 10000, + 'hdfs' => "studentnull10k", + },{ + 'name' => "all100k", + 'filetype' => "allscalars", + 'rows' => 100000, + 'hdfs' => "all100k", + } + ); + + + if (defined($cfg->{'load_hive_only'}) && $cfg->{'load_hive_only'} == 1) { + return $self->hiveMetaOnly($cfg, $log, \@tables); + } + + # Create the HDFS directories + Util::runHadoopCmd($cfg, $log, "fs -mkdir $cfg->{'hive_data_dir'}"); + + foreach my $table (@tables) { + print "Generating data for $table->{'name'}\n"; + # Generate the data + my @cmd = ($cfg->{'gentool'}, $table->{'filetype'}, $table->{'rows'}, + $table->{'name'}, $cfg->{'hive_data_dir'}); + $self->runCmd($log, \@cmd); + + # Copy the data to HDFS + my $hadoop = "fs -mkdir $cfg->{'hive_data_dir'}/$table->{'hdfs'}"; + Util::runHadoopCmd($cfg, $log, $hadoop); + + if (defined($table->{'partitions'})) { + foreach my $part (@{$table->{'partitions'}}) { + my $hadoop = "fs -mkdir + $cfg->{'hive_data_dir'}/$table->{'hdfs'}/$table->{'name'}.$part"; + Util::runHadoopCmd($cfg, $log, $hadoop); + my $hadoop = "fs -copyFromLocal $table->{'name'}.$part " . + "$cfg->{'hive_data_dir'}/$table->{'hdfs'}/$table->{'name'}.$part/$table->{'name'}.$part"; + Util::runHadoopCmd($cfg, $log, $hadoop); + } + } else { + my $hadoop = "fs -copyFromLocal $table->{'name'} ". + "$cfg->{'hive_data_dir'}/$table->{'hdfs'}/$table->{'name'}"; + Util::runHadoopCmd($cfg, $log, $hadoop); + } + + print "Loading data into Hive for $table->{'name'}\n"; + Util::runHiveCmdFromFile($cfg, $log, + "./" . $table->{'name'} . ".hive.sql"); + + print "Loading data into MySQL for $table->{'name'}\n"; + Util::runDbCmd($cfg, $log, $table->{'name'} . ".mysql.sql"); + } + +} + +########################################################################### +# Sub: hiveMetaOnly +# Load metadata into Hive, but don't load Mysql or HDFS, as we assume +# these have already been loaded. +# +# Paramaters: +# cfg - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# None +# +sub hiveMetaOnly +{ + my ($self, $cfg, $log, $tables) = @_; + foreach my $table (@{$tables}) { + print "Generating data for $table->{'name'}\n"; + # Generate the data + my @cmd = ($cfg->{'gentool'}, $table->{'filetype'}, $table->{'rows'}, + $table->{'name'}, $cfg->{'hive_data_dir'}); + $self->runCmd($log, \@cmd); + + print "Loading data into Hive for $table->{'name'}\n"; + Util::runHiveCmdFromFile($cfg, $log, "./" . $table->{'name'} . + ".hive.sql"); + } +} + +############################################################################## +# Sub: confirmDeployment +# Run checks to confirm that the deployment was successful. When this is +# done the testing environment should be ready to run. +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# Nothing +# This method should die with an appropriate error message if there is +# an issue. +# +sub confirmDeployment +{ +} + +############################################################################## +# Sub: deleteData +# Remove any data created that will not be removed by undeploying. +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# None +# +sub deleteData +{ +} + +############################################################################## +# Sub: stop +# Stop any servers or systems that are no longer needed once testing is +# completed. +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# None +# +sub stop +{ +} + +############################################################################## +# Sub: undeploy +# Remove any packages that were installed as part of the deployment. +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# None +# +sub undeploy +{ +} + +############################################################################## +# Sub: confirmUndeployment +# Run checks to confirm that the undeployment was successful. When this is +# done anything that must be turned off or removed should be turned off or +# removed. +# +# Paramaters: +# globalHash - hash from config file, including deployment config +# log - log file handle +# +# Returns: +# Nothing +# This method should die with an appropriate error message if there is +# an issue. +# +sub confirmUndeployment +{ + die "$0 INFO : confirmUndeployment is a virtual function!"; +} + +sub runCmd($$$) +{ + my ($self, $log, $cmd) = @_; + + print $log "Going to run [" . join(" ", @$cmd) . "]\n"; + + run($cmd, \undef, $log, $log) or + die "Failed running " . join(" ", @$cmd) . "\n"; +} + +1; Index: test-e2e/scripts/create_test_db.sql =================================================================== --- test-e2e/scripts/create_test_db.sql (revision 0) +++ test-e2e/scripts/create_test_db.sql (revision 0) @@ -0,0 +1,5 @@ +CREATE USER 'hivetest'@'localhost' IDENTIFIED BY 'hivetest'; +CREATE DATABASE hivetestdb DEFAULT CHARACTER SET latin1 DEFAULT COLLATE latin1_swedish_ci; +GRANT ALL PRIVILEGES ON hivetestdb.* TO 'hivetest'@'localhost' WITH GRANT OPTION; +GRANT FILE ON *.* TO 'hivetest'@'localhost' IDENTIFIED BY 'hivetest'; +flush privileges; Index: test-e2e/harness.tar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: test-e2e/harness.tar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: test-e2e/build.xml =================================================================== --- test-e2e/build.xml (revision 0) +++ test-e2e/build.xml (revision 0) @@ -0,0 +1,198 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index: test-e2e/drivers/TestDriverHive.pm =================================================================== --- test-e2e/drivers/TestDriverHive.pm (revision 0) +++ test-e2e/drivers/TestDriverHive.pm (revision 0) @@ -0,0 +1,383 @@ +package TestDriverHive; + +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### +# Test driver for hive nightly tests. +# +# + +use TestDriver; +use IPC::Run; # don't do qw(run), it screws up TestDriver which also has a run method +use Digest::MD5 qw(md5_hex); +use Util; +use File::Path; +use Cwd; + +use strict; +use English; + +our $className= "TestDriver"; +our @ISA = "$className"; +our $ROOT = (defined $ENV{'HARNESS_ROOT'} ? $ENV{'HARNESS_ROOT'} : die "ERROR: You must set environment variable HARNESS_ROOT\n"); +our $toolpath = "$ROOT/libexec"; + +my $passedStr = 'passed'; +my $failedStr = 'failed'; +my $abortedStr = 'aborted'; +my $skippedStr = 'skipped'; +my $dependStr = 'failed_dependency'; + +sub new +{ + # Call our parent + my ($proto) = @_; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new; + + bless($self, $class); + return $self; +} + +sub replaceParameters +{ +##!!! Move this to Util.pm + + my ($self, $cmd, $outfile, $testCmd, $log) = @_; + + # $self + $cmd =~ s/:LATESTOUTPUTPATH:/$self->{'latestoutputpath'}/g; + + # $outfile + $cmd =~ s/:OUTPATH:/$outfile/g; + + # $ENV + $cmd =~ s/:HARNESS:/$ENV{HARNESS_ROOT}/g; + + # $testCmd + $cmd =~ s/:INPATH:/$testCmd->{'inpathbase'}/g; + + return $cmd; +} + +sub globalSetup +{ + my ($self, $globalHash, $log) = @_; + my $subName = (caller(0))[3]; + + # Set up values for the metastore + Util::setupHiveProperties($globalHash, $log); + + # Setup the output path + my $me = `whoami`; + chomp $me; + $globalHash->{'runid'} = $me . "." . time; + + $globalHash->{'localpath'} = $globalHash->{'localpathbase'} . "/" . $globalHash->{'runid'} . "/"; + + IPC::Run::run(['mkdir', '-p', $globalHash->{'localpath'}], \undef, $log, $log) or + die "Cannot create localpath directory " . $globalHash->{'localpath'} . + " " . "$ERRNO\n"; + + IPC::Run::run(['mkdir', '-p', $globalHash->{'benchmarkPath'}], \undef, $log, $log) or + die "Cannot create benchmark directory " . $globalHash->{'benchmarkPath'} . + " " . "$ERRNO\n"; + + $globalHash->{'thisResultsPath'} = $globalHash->{'localpath'} . "/" + . $globalHash->{'resultsPath'}; + IPC::Run::run(['mkdir', '-p', $globalHash->{'thisResultsPath'}], \undef, $log, $log) or + die "Cannot create results directory " . $globalHash->{'thisResultsPath'} . + " " . "$ERRNO\n"; +} + +sub globalCleanup +{ + my ($self, $globalHash, $log) = @_; +} + + +sub runTest +{ + my ($self, $testCmd, $log) = @_; + + my %result; + + my @hivefiles = (); + my @outfiles = (); + # Write the hive script to a file. + $hivefiles[0] = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . + $testCmd->{'num'} . ".0.sql"; + $outfiles[0] = $testCmd->{'thisResultsPath'} . "/" . $testCmd->{'group'} . + "_" . $testCmd->{'num'} . ".0.out"; + + open(FH, "> $hivefiles[0]") or + die "Unable to open file $hivefiles[0] to write SQL script, $ERRNO\n"; + print FH $testCmd->{'sql'} . "\n"; + close(FH); + + # If the results are written to a table run the command and then + # run a another Hive command to dump the results of the table. + if (defined($testCmd->{'result_table'})) { + Util::runHiveCmdFromFile($testCmd, $log, $hivefiles[0]); + $result{'rc'} = $? >> 8; + + my @results = (); + if (ref($testCmd->{'result_table'}) ne 'ARRAY') { + $results[0] = $testCmd->{'result_table'}; + } else { + @results = @{$testCmd->{'result_table'}}; + } + for (my $i = 0; $i < @results; $i++) { + $hivefiles[$i] = $testCmd->{'localpath'} . + $testCmd->{'group'} . "_" . $testCmd->{'num'} . + ".dumptable.$i.sql"; + $outfiles[$i] = $testCmd->{'thisResultsPath'} . "/" . + $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".$i.out"; + open(FH, "> $hivefiles[$i]") or + die "Unable to open file $hivefiles[$i] to write SQL " . + "script, $ERRNO\n"; + print FH "select * from " . $results[$i] . ";\n"; + close(FH); + } + } + + my @originalOutputs = (); + my @outputs = (); + $result{'originalOutput'} = \@originalOutputs; + $result{'output'} = \@outputs; + + for (my $i = 0; $i < @hivefiles; $i++) { + my $outfp; + open($outfp, "> $outfiles[$i]") or + die "Unable to open output file $outfiles[$i], $!\n"; + + Util::runHiveCmdFromFile($testCmd, $log, $hivefiles[$i], $outfp); + + # Don't overwrite rc if we set it above + $result{'rc'} = $? >> 8 unless defined $result{'rc'}; + close($outfp); + + $originalOutputs[$i] = $outfiles[$i]; + $outputs[$i] = + $self->postProcessSingleOutputFile($outfiles[$i], $testCmd, $log); + } + + # Compare doesn't get the testCmd hash, so I need to stuff the necessary + # info about sorting into the result. + if (defined $testCmd->{'sortArgs'} && $testCmd->{'sortArgs'}) { + $result{'sortArgs'} = $testCmd->{'sortArgs'}; + } + + return \%result; +} + + + +sub generateBenchmark +{ + my ($self, $testCmd, $log) = @_; + + my %result; + + # Write the SQL to a file. + my @verifies = (); + if (defined $testCmd->{'verify_sql'}) { + if (ref($testCmd->{'verify_sql'}) eq "ARRAY") { + @verifies = @{$testCmd->{'verify_sql'}}; + } else { + $verifies[0] = $testCmd->{'verify_sql'}; + } + } else { + $verifies[0] = $testCmd->{'sql'}; + } + + my @rcs = (); + $result{'rc'} = \@rcs; + my @outputs = (); + $result{'output'} = \@outputs; + for (my $i = 0; $i < @verifies; $i++) { + my $sqlfile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . + $testCmd->{'num'} . ".benchmark.$i.sql"; + my $outfile = $testCmd->{'benchmarkPath'} . "/" . + $testCmd->{'group'} . "_" . $testCmd->{'num'} . + ".benchmark.$i.out"; + + open(FH, "> $sqlfile") or + die "Unable to open file $sqlfile to write SQL script, $ERRNO\n"; + print FH $verifies[$i]; + close(FH); + + my $outfp; + open($outfp, "> $outfile") or + die "Unable to open output file $outfile, $!\n"; + + Util::runDbCmd($testCmd, $log, $sqlfile, $outfp); + $rcs[$i] = $? >> 8; + close($outfp); + + $outputs[$i] = + $self->postProcessSingleOutputFile($outfile, $testCmd, $log, 1); + } + + return \%result; +} + +sub compare +{ + my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_; + + # Make sure we have the same number of results from runTest and + # generateBenchmark + if (scalar(@{$testResult->{'output'}}) != + scalar(@{$benchmarkResult->{'output'}})) { + die "runTest returned " . scalar(@{$testResult->{'output'}}) . + " results, but generateBenchmark returned " . + scalar(@{$benchmarkResult->{'output'}}) . "\n"; + } + + my $totalFailures = 0; + for (my $i = 0; $i < @{$testResult->{'output'}}; $i++) { + # cksum the the two files to see if they are the same + my ($testChksm, $benchmarkChksm); + IPC::Run::run((['cat', @{$testResult->{'output'}}[$i]], '|', + ['cksum']), \$testChksm, $log) or + die "$0: error: cannot run cksum on test results\n"; + IPC::Run::run((['cat', @{$benchmarkResult->{'output'}}[$i]], '|', + ['cksum']), \$benchmarkChksm, $log) or + die "$0: error: cannot run cksum on benchmark\n"; + + chomp $testChksm; + chomp $benchmarkChksm; + print $log + "test cksum: $testChksm\nbenchmark cksum: $benchmarkChksm\n"; + + if ($testChksm ne $benchmarkChksm) { + print $log "Test output $i checksum does not match benchmark " . + "checksum\n"; + print $log "Test $i checksum = <$testChksm>\n"; + print $log "Expected $i checksum = <$benchmarkChksm>\n"; + print $log "RESULTS DIFFER: vimdiff " . cwd . + "/" . @{$testResult->{'output'}}[$i] . " " . cwd . + "/" . @{$benchmarkResult->{'output'}}[$i] . "\n"; + $totalFailures++; + } + + # Now, check if the sort order is specified + if (defined($testResult->{'sortArgs'})) { + my @sortChk = ('sort', '-cs'); + push(@sortChk, @{$testResult->{'sortArgs'}}); + push(@sortChk, @{$testResult->{'originalOutput'}}[$i]); + print $log "Going to run sort check command: " . + join(" ", @sortChk) . "\n"; + IPC::Run::run(\@sortChk, \undef, $log, $log); + my $sortrc = $?; + if ($sortrc) { + print $log "Sort check failed\n"; + $totalFailures++; + } + } + } + + return $totalFailures == 0; +} + +sub postProcessSingleOutputFile +{ + my ($self, $outfile, $testCmd, $log, $isBenchmark) = @_; + + # If requested, process the data to smooth over floating point + # differences. + if (defined $testCmd->{'floatpostprocess'} && + defined $testCmd->{'delimiter'}) { + # Move the file to a temp file and run through the pre-processor. + my $tmpfile = "$outfile.tmp"; + link($outfile, $tmpfile) or + die "Unable to create temporary file $tmpfile, $!\n"; + unlink($outfile) or + die "Unable to unlink file $outfile, $!\n"; + open(IFH, "< $tmpfile") or + die "Unable to open file $tmpfile, $!\n"; + open(OFH, "> $outfile") or + die "Unable to open file $outfile, $!\n"; + my @cmd = ("$toolpath/floatpostprocessor.pl", + $testCmd->{'delimiter'}); + print $log "Going to run [" . join(" ", @cmd) . "]\n"; + IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or + die "Failed to run float postprocessor, $!\n"; + close(IFH); + close(OFH); + unlink($tmpfile); + } + + if ($isBenchmark && defined $testCmd->{'nullpostprocess'}) { + # Move the file to a temp file and run through the pre-processor. + my $tmpfile = "$outfile.tmp"; + link($outfile, $tmpfile) or + die "Unable to create temporary file $tmpfile, $!\n"; + unlink($outfile) or + die "Unable to unlink file $outfile, $!\n"; + open(IFH, "< $tmpfile") or + die "Unable to open file $tmpfile, $!\n"; + open(OFH, "> $outfile") or + die "Unable to open file $outfile, $!\n"; + my @cmd = ("sed", "s/NULL//g"); + print $log "Going to run [" . join(" ", @cmd) . "]\n"; + IPC::Run::run(\@cmd, \*IFH, \*OFH, $log) or + die "Failed to run float postprocessor, $!\n"; + close(IFH); + close(OFH); + unlink($tmpfile); + } + + # Sort the results for the benchmark compare. + my $sortfile = "$outfile.sorted"; + my @cmd = ("sort", $outfile); + print $log "Going to run [" . join(" ", @cmd) . "]\n"; + IPC::Run::run(\@cmd, '>', "$sortfile"); + + return $sortfile; +} + + + +############################################################################## +# Count the number of stores in a Pig Latin script, so we know how many files +# we need to compare. +# +sub countStores($$) +{ + my ($self, $testCmd) = @_; + + # Special work around for queries with more than one store that are not + # actually multiqueries. + if (defined $testCmd->{'notmq'}) { + return 1; + } + + my $count; + + # hope they don't have more than store per line + # also note that this won't work if you comment out a store + my @q = split(/\n/, $testCmd->{'pig'}); + for (my $i = 0; $i < @q; $i++) { + $count += $q[$i] =~ /store\s+[a-zA-Z][a-zA-Z0-9_]*\s+into/i; + } + + return $count; +} + +1; Index: test-e2e/drivers/Util.pm =================================================================== --- test-e2e/drivers/Util.pm (revision 0) +++ test-e2e/drivers/Util.pm (revision 0) @@ -0,0 +1,208 @@ +#!/usr/bin/env perl + +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +########################################################################### +# Class: Util +# +# A collection of helper subroutines. +# + + +package Util; + +use IPC::Run qw(run); +use strict; + +############################################################################## +# Sub: setupHiveProperties +# +# Assure that necessary values are set in config in order to set Hive +# Java properties. +# +# Returns: +# Nothing +sub setupHiveProperties($$) +{ + my ($cfg, $log) = @_; + + # Set up values for the metastore + if (defined($cfg->{'metastore_thrift'}) && $cfg->{'metastore_thrift'} == 1) { + if (! defined $cfg->{'metastore_host'} || $cfg->{'metastore_host'} eq "") { + print $log "When using thrift, you must set the key " . + " 'metastore_host' to the machine your metastore is on\n"; + die "metastore_host is not set in existing.conf\n"; + } + + $cfg->{'metastore_connection'} = + "jdbc:$cfg->{'metastore_db'}://$cfg->{'metastore_host'}/hivemetastoredb?createDatabaseIfNotExist=true"; + + if (! defined $cfg->{'metastore_passwd'} || $cfg->{'metastore_passwd'} eq "") { + $cfg->{'metastore_passwd'} = 'hive'; + } + + if (! defined $cfg->{'metastore_port'} || $cfg->{'metastore_port'} eq "") { + $cfg->{'metastore_port'} = '9933'; + } + + $cfg->{'metastore_uri'} = + "thrift://$cfg->{'metastore_host'}:$cfg->{'metastore_port'}"; + } else { + $cfg->{'metastore_connection'} = + "jdbc:derby:;databaseName=metastore_db;create=true"; + $cfg->{'metastore_driver'} = "org.apache.derby.jdbc.EmbeddedDriver"; + } +} + +############################################################################## +# Sub: runHiveCmdFromFile +# +# Run the provided file using the Hive command line. +# +# cfg - The configuration file for the test +# log - reference to the log file, should be an open file pointer +# sql - name of file containing SQL to run. Optional, if present -f $sql +# will be appended to the command. +# outfile - open file pointer (or variable reference) to write stdout to for +# this test. Optional, will be written to $log if this value is not +# provided. +# outfile - open file pointer (or variable reference) to write stderr to for +# this test. Optional, will be written to $log if this value is not +# provided. +# noFailOnFail - if true, do not fail when the Hive command returns non-zero +# value. +# Returns: +# Nothing +sub runHiveCmdFromFile($$;$$$$) +{ + my ($cfg, $log, $sql, $outfile, $errfile, $noFailOnFail) = @_; + + if (!defined($ENV{'HADOOP_HOME'})) { + die "Cannot run hive when HADOOP_HOME environment variable is not set."; + } + + $outfile = $log if (!defined($outfile)); + $errfile = $log if (!defined($errfile)); + + my @cmd; + if (defined($sql)) { + @cmd = ("$cfg->{'hivehome'}/bin/hive", "-f", $sql); + } else { + @cmd = ("$cfg->{'hivehome'}/bin/hive"); + } + + # Add all of the modified properties we want to set + push(@cmd, + ("--hiveconf", "javax.jdo.option.ConnectionURL=$cfg->{'metastore_connection'}", + "--hiveconf", "javax.jdo.option.ConnectionDriverName=$cfg->{'metastore_driver'}")); + + if (defined($cfg->{'metastore_thrift'}) && $cfg->{'metastore_thrift'} == 1) { + push(@cmd, + ("--hiveconf", "hive.metastore.local=false", + "--hiveconf", "hive.metastore.uris=thrift://$cfg->{'metastore_host'}:$cfg->{'metastore_port'}", + "--hiveconf", "javax.jdo.option.ConnectionPassword=$cfg->{'metastore_passwd'}")); + } + + if (defined($cfg->{'additionaljarspath'})) { + $ENV{'HIVE_AUX_JARS_PATH'} = $cfg->{'additionaljarspath'}; + } + + if (defined($cfg->{'hiveconf'})) { + foreach my $hc (@{$cfg->{'hiveconf'}}) { + push(@cmd, "--hiveconf", $hc); + } + } + + if (defined($cfg->{'hivecmdargs'})) { + push(@cmd, @{$cfg->{'hivecmdargs'}}); + } + + if (defined($cfg->{'hiveops'})) { + $ENV{'HIVE_OPTS'} = join(" ", @{$cfg->{'hiveops'}}); + } + + $ENV{'HIVE_HOME'} = $cfg->{'hivehome'}; + + my $envStr; + for my $k (keys(%ENV)) { + $envStr .= $k . "=" . $ENV{$k} . " " if ($k =~ /HADOOP/ || $k =~ /HIVE/); + } + $envStr .= " "; + print $log "Going to run hive command [" . join(" ", @cmd) . + "] with environment set to [$envStr]\n"; + my $runrc = run(\@cmd, \undef, $outfile, $errfile); + my $rc = $? >> 8; + + return $runrc if $runrc; # success + + if (defined($noFailOnFail) && $noFailOnFail) { + return $rc; + } else { + die "Failed running hive command [" . join(" ", @cmd) . "]\n"; + } +} + +############################################################################## +# Sub: runHadoopCmd +# +# Run the provided hadoop command +# +# Returns: +# Nothing +sub runHadoopCmd($$$) +{ + my ($cfg, $log, $c) = @_; + + my @cmd = ("$ENV{'HADOOP_HOME'}/bin/hadoop"); + push(@cmd, split(' ', $c)); + + print $log "Going to run [" . join(" ", @cmd) . "]\n"; + + run(\@cmd, \undef, $log, $log) or + die "Failed running " . join(" ", @cmd) . "\n"; +} + +############################################################################## +# Sub: runDbCmd +# +# Run the provided mysql command +# +# Returns: +# Nothing +sub runDbCmd($$$;$) +{ + my ($cfg, $log, $sqlfile, $outfile) = @_; + + $outfile = $log if (!defined($outfile)); + + open(SQL, "< $sqlfile") or die "Unable to open $sqlfile for reading, $!\n"; + + my @cmd = ('mysql', '-u', $cfg->{'dbuser'}, '-D', $cfg->{'dbdb'}, + '-h', $cfg->{'dbhost'}, "--password=$cfg->{'dbpasswd'}", + "--skip-column-names"); + + print $log "Going to run [" . join(" ", @cmd) . "] passing in [$sqlfile]\n"; + + run(\@cmd, \*SQL, $outfile, $log) or + die "Failed running " . join(" ", @cmd) . "\n"; + close(SQL); +} + + + +1; Index: test-e2e/drivers/TestDriverHiveCmdLine.pm =================================================================== --- test-e2e/drivers/TestDriverHiveCmdLine.pm (revision 0) +++ test-e2e/drivers/TestDriverHiveCmdLine.pm (revision 0) @@ -0,0 +1,179 @@ +package TestDriverHiveCmdLine; + +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### +# Test driver for hive nightly tests. +# +# + +use TestDriverHive; +use IPC::Run; # don't do qw(run), it screws up TestDriver which also has a run method +use Util; +use File::Path; +use Cwd; + +use strict; +use English; + +our $className= "TestDriverHive"; +our @ISA = "$className"; + +sub new +{ + # Call our parent + my ($proto) = @_; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new; + + bless($self, $class); + return $self; +} + +sub runTest +{ + my ($self, $testCmd, $log) = @_; + + my %result; + + my ($stdout, $stderr); + + # If they provided a hive script in 'sql', write it to a file. + my $hivefile = undef; + if (defined($testCmd->{'sql'})) { + $hivefile = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . + $testCmd->{'num'} . ".sql"; + + open(FH, "> $hivefile") or + die "Unable to open file $hivefile to write SQL script, $ERRNO\n"; + print FH $testCmd->{'sql'} . "\n"; + close(FH); + } + Util::runHiveCmdFromFile($testCmd, $log, $hivefile, \$stdout, \$stderr, 1); + $result{'rc'} = $? >> 8; + + $result{'stdout'} = $stdout; + $result{'stderr'} = $stderr; + + return \%result; +} + + + +sub generateBenchmark +{ + # Intentionally empty +} + +sub compare +{ + my ($self, $testResult, $benchmarkResult, $log, $testCmd) = @_; + + my $result = 1; # until proven wrong... + + # Return Code + if (defined $testCmd->{'rc'}) { + if ((! defined $testResult->{'rc'}) || + ($testResult->{'rc'} != $testCmd->{'rc'})) { + print $log "Check failed: rc = <" . $testCmd->{'rc'} . + "> expected, test returned rc = <" . $testResult->{'rc'} + . ">\n"; + $result = 0; + } + } + + # Standard Out + if (defined $testCmd->{'expected_out'}) { + if ($testResult->{'stdout'} ne $testCmd->{'expected_out'}) { + print $log "Check failed: exact match of <" . + $testCmd->{'expected_out'} . + "> expected in stdout:<" . $testResult->{'stdout'} + . ">\n"; + $result = 0; + } + } + + if (defined $testCmd->{'not_expected_out'}) { + if ($testResult->{'stdout'} eq $testCmd->{'not_expected_out'}) { + print $log "Check failed: NON-match of <" . + $testCmd->{'expected_out'} . "> expected to stdout:<" . + $testResult->{'stdout'} . ">\n"; + $result = 0; + } + } + + if (defined $testCmd->{'expected_out_regex'}) { + if ($testResult->{'stdout'} !~ $testCmd->{'expected_out_regex'}) { + print $log "Check failed: regex match of <" . + $testCmd->{'expected_out_regex'} . "> expected in stdout:<" . + $testResult->{'stdout'} . ">\n"; + $result = 0; + } + } + + if (defined $testCmd->{'not_expected_out_regex'}) { + if ($testResult->{'stdout'} =~ $testCmd->{'not_expected_out_regex'}) { + print $log "Check failed: regex NON-match of <" . + $testCmd->{'not_expected_out_regex'} . + "> expected in stdout:<" . $testResult->{'stdout'} . ">\n"; + $result = 0; + } + } + + # Standard Error + if (defined $testCmd->{'expected_err'}) { + if ($testResult->{'stderr'} ne $testCmd->{'expected_err'}) { + print $log "Check failed: exact match of <" . + $testCmd->{'expected_err'} . + "> expected in stderr:<" . $testResult->{'stderr'} + . ">\n"; + $result = 0; + } + } + + if (defined $testCmd->{'not_expected_err'}) { + if ($testResult->{'stderr'} eq $testCmd->{'not_expected_err'}) { + print $log "Check failed: NON-match of <" . + $testCmd->{'expected_err'} . "> expected to stderr:<" . + $testResult->{'stderr'} . ">\n"; + $result = 0; + } + } + + if (defined $testCmd->{'expected_err_regex'}) { + if ($testResult->{'stderr'} !~ $testCmd->{'expected_err_regex'}) { + print $log "Check failed: regex match of <" . + $testCmd->{'expected_err_regex'} . "> expected in stderr:<" . + $testResult->{'stderr'} . ">\n"; + $result = 0; + } + } + + if (defined $testCmd->{'not_expected_err_regex'}) { + if ($testResult->{'stderr'} =~ $testCmd->{'not_expected_err_regex'}) { + print $log "Check failed: regex NON-match of <" . + $testCmd->{'not_expected_err_regex'} . + "> expected in stderr:<" . $testResult->{'stderr'} . ">\n"; + $result = 0; + } + } + + + return $result; +} + +1;