diff --git src/test/e2e/hcatalog/tests/hadoop.conf src/test/e2e/hcatalog/tests/hadoop.conf index f4411df..89921c5 100644 --- src/test/e2e/hcatalog/tests/hadoop.conf +++ src/test/e2e/hcatalog/tests/hadoop.conf @@ -180,7 +180,24 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: ,'sql' => q\select name, age, floor(gpa) + 0.1 from all100krc;\ ,'floatpostprocess' => 1 ,'delimiter' => ' ' - }, + },{ + 'num' => 4 + ,'hcat_prep' => q\ +drop table if exists hadoop_write_4; +create table hadoop_write_4( + name string, + age int, + gpa double) +stored as sequencefile; +\, + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_4 +\, + ,'result_table' => 'hadoop_write_4' + ,'sql' => q\select name, age, floor(gpa) + 0.1 from all100krc;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + } ], }, # end g { diff --git src/test/e2e/hcatalog/tests/hive.conf src/test/e2e/hcatalog/tests/hive.conf index de19f60..e0fc2d5 100644 --- src/test/e2e/hcatalog/tests/hive.conf +++ src/test/e2e/hcatalog/tests/hive.conf @@ -94,6 +94,22 @@ $cfg = { 'name' => 'Hive_Read', 'tests' => [ { 'num' => 1, + # float and double columns removed because mysql and hive can't agree + # on how to round, even using floor/truncate functions + 'sql' => q\select t, si, i, b, s from all100k;\, + 'verify_sql' => q\select t, si, i, b, s from all100k;\, + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 2, + # double column removed because mysql and hive can't agree + # on how to round, even using floor/truncate functions + 'sql' => q\select i, s from all100kjson;\, + 'verify_sql' => q\select i, s from all100kjson;\, + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 3, 'sql' => q\select name, age, floor(gpa) from all100krc;\, 'verify_sql' => q\select name, age, truncate(gpa, 0) from all100krc;\, 'floatpostprocess' => 1, @@ -104,12 +120,44 @@ $cfg = { 'name' => 'Hive_Write', 'tests' => [ { 'num' => 1, + # float and double columns removed because mysql and hive can't agree + # on how to round, even using floor/truncate functions 'sql' => q\ drop table if exists hive_write_1; -create table hive_write_1 (name string, age int, gpa double) stored as rcfile; -insert into TABLE hive_write_1 select name, age, floor(gpa) + 0.1 from all100krc;\, +create table hive_write_1 (t tinyint, si smallint, i int, b bigint, s string) row format delimited stored as textfile; +insert into TABLE hive_write_1 select t, si, i, b, s from all100k;\, 'result_table' => 'hive_write_1', - 'verify_sql' =>"select name, age, floor(gpa) + 0.1 from all100krc;", + 'verify_sql' => q\select t, si, i, b, s from all100k;\, + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 2, + 'sql' => q\ +drop table if exists hive_write_2; +create table hive_write_2 (name string, age int, gpa double) row format serde 'org.apache.hcatalog.data.JsonSerDe' stored as textfile; +insert into TABLE hive_write_2 select s, i, 0.1 from all100kjson;\, + 'result_table' => 'hive_write_2', + 'verify_sql' =>"select s, i, 0.1 from all100kjson;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 3, + 'sql' => q\ +drop table if exists hive_write_3; +create table hive_write_3 (name string, age int, gpa double) stored as rcfile; +insert into TABLE hive_write_3 select name, age, 1.1 from all100krc;\, + 'result_table' => 'hive_write_3', + 'verify_sql' =>"select name, age, 1.1 from all100krc;", + 'floatpostprocess' => 1, + 'delimiter' => ' ', + },{ + 'num' => 4, + 'sql' => q\ +drop table if exists hive_write_4; +create table hive_write_4 (name string, age int, gpa double) stored as sequencefile; +insert into TABLE hive_write_4 select name, age, 1.1 from studenttab10k;\, + 'result_table' => 'hive_write_4', + 'verify_sql' =>"select name, age, 1.1 from studenttab10k;", 'floatpostprocess' => 1, 'delimiter' => ' ', } ] diff --git src/test/e2e/hcatalog/tests/pig.conf src/test/e2e/hcatalog/tests/pig.conf index 54e6897..05c8863 100644 --- src/test/e2e/hcatalog/tests/pig.conf +++ src/test/e2e/hcatalog/tests/pig.conf @@ -137,6 +137,34 @@ store b into ':OUTPATH:';\, ,'sql' => q\select name, age from all100krc;\ ,'floatpostprocess' => 1 ,'delimiter' => ' ' + }, + { + # A table with one partition in text and one in rc + 'num' => 4 + ,'hcat_prep'=>q?drop table if exists pig_read_4; +create table pig_read_4 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as textfile; +alter table pig_read_4 add partition (b='1') location '/user/hcat/tests/data/studenttab10k'; +alter table pig_read_4 set fileformat rcfile; +alter table pig_read_4 add partition (b='2') location '/user/hcat/tests/data/all100krc';? + ,'pig' => q\a = load 'pig_read_4' using org.apache.hcatalog.pig.HCatLoader(); +b = foreach a generate name, age, b; +store b into ':OUTPATH:';\, + ,'sql' => q\(select name, age, 1 from studenttab10k) + union all + (select name, age, 2 from all100krc);\ + }, + { + # Read from a table in the non-default database + 'num' => 5 + ,'hcat_prep'=>q?create database if not exists pig_db_1; +drop table if exists pig_db_1.pig_read_5; +create table pig_db_1.pig_read_5 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as textfile; +use pig_db_1; +alter table pig_read_5 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';? + ,'pig' => q\a = load 'pig_db_1.pig_read_5' using org.apache.hcatalog.pig.HCatLoader(); +b = foreach a generate name, age, b; +store b into ':OUTPATH:';\, + ,'sql' => q\select name, age, 1 from studenttab10k;\ } ], }, # end g @@ -191,10 +219,82 @@ store b into 'pig_write_3' using org.apache.hcatalog.pig.HCatStorer();\, ,'result_table' => 'pig_write_3' ,'floatpostprocess' => 1 ,'delimiter' => ' ' + }, + { + # Store in a sequence file + 'num' => 4 + ,'hcat_prep'=>q\drop table if exists pig_write_4; +create table pig_write_4( + name string, + age int, + gpa double) +stored as sequencefile; +\ + ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); +b = foreach a generate name, age, 0.1; +c = foreach b generate name, age, $2 as gpa; +store c into 'pig_write_4' using org.apache.hcatalog.pig.HCatStorer();\, + ,'sql' => q\select name, age, 0.1 from studenttab10k;\ + ,'result_table' => 'pig_write_4' + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + { + # Write to a table in the non-default database + 'num' => 5 + ,'hcat_prep'=>q?create database if not exists pig_db_1; +create table if not exists pig_db_1.pig_write_5 (name string, age int) row format delimited fields terminated by '\t' stored as textfile;? + ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); +b = foreach a generate name, age; +store b into 'pig_db_1.pig_write_5' using org.apache.hcatalog.pig.HCatStorer();\, + ,'sql' => q\select name, age from studenttab10k;\ + ,'result_table' => 'pig_db_1.pig_write_5' } ], }, # end g { + 'name' => 'Pig_Change_Schema', + 'tests' => [ + { + # I don't like this, I'm using one test to setup for the next. But I don't know how else to do this. + 'num' => 1 + ,'hcat_prep'=>q\drop table if exists pig_change_schema_1; +create table pig_change_schema_1 (name string) partitioned by (ds string) STORED AS TEXTFILE;\ + ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); +b = filter a by ds == '20110924'; +c = foreach b generate name; +store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds=20110924');\, + ,'result_table' => 'pig_change_schema_1' + ,'sql' => q\select name, ds from studentparttab30k where ds='20110924';\ + }, + { + # I don't like this, I'm using one test to setup for the next. But I don't know how else to do this. + 'num' => 2 + ,'depends_on' => 'Pig_Change_Schema_1' + ,'hcat_prep'=>q\alter table pig_change_schema_1 add columns (age int);\ + ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); +b = filter a by ds == '20110925'; +c = foreach b generate name, age; +store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds=20110925');\, + ,'result_table' => 'pig_change_schema_1' + ,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924') + union all + (select name, age, ds from studentparttab30k where ds = '20110925');\ + }, + { + # I don't like this, I'm using one test to setup for the next. But I don't know how else to do this. + 'num' => 3 + , 'depends_on' => 'Pig_Change_Schema_2' + ,'pig' => q\a = load 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatLoader(); +c = foreach a generate name, age, ds; +store c into ':OUTPATH:';\ + ,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924') + union all + (select name, age, ds from studentparttab30k where ds = '20110925');\ + } + ], + }, + { 'name' => 'Pig_HBase', 'tests' => [ {