From ea166f1ece8a00acd86cc393838dbb1e8e3f2939 Mon Sep 17 00:00:00 2001 From: "David Z. Chen" Date: Mon, 21 Jul 2014 04:17:48 -0700 Subject: [PATCH] HIVE-7457: Minor HCatalog Pig Adapter test cleanup. --- .../hive/hcatalog/pig/HCatStorerWrapper.java | 17 +- .../org/apache/hive/hcatalog/pig/MockLoader.java | 9 +- .../org/apache/hive/hcatalog/pig/MyPigStorage.java | 1 - .../apache/hive/hcatalog/pig/TestE2EScenarios.java | 107 ++-- .../apache/hive/hcatalog/pig/TestHCatLoader.java | 289 ++++++---- .../hcatalog/pig/TestHCatLoaderComplexSchema.java | 173 +++--- .../hive/hcatalog/pig/TestHCatLoaderStorer.java | 133 +++-- .../apache/hive/hcatalog/pig/TestHCatStorer.java | 641 ++++++++++++--------- .../hive/hcatalog/pig/TestHCatStorerMulti.java | 64 +- .../hive/hcatalog/pig/TestHCatStorerWrapper.java | 38 +- .../hive/hcatalog/pig/TestOrcHCatLoader.java | 1 - .../hive/hcatalog/pig/TestOrcHCatPigStorer.java | 31 - .../hive/hcatalog/pig/TestOrcHCatStorer.java | 37 +- .../hive/hcatalog/pig/TestOrcHCatStorerMulti.java | 27 + .../apache/hive/hcatalog/pig/TestPigHCatUtil.java | 28 +- 15 files changed, 891 insertions(+), 705 deletions(-) delete mode 100644 hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatPigStorer.java create mode 100644 hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorerMulti.java diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java index b06e9b4..b39a7ef 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java @@ -28,12 +28,13 @@ /** * This class is used to test the HCAT_PIG_STORER_EXTERNAL_LOCATION property used in HCatStorer. - * When this property is set, HCatStorer writes the output to the location it specifies. Since - * the property can only be set in the UDFContext, we need this simpler wrapper to do three things: + * When this property is set, HCatStorer writes the output to the location it specifies. Since the + * property can only be set in the UDFContext, we need this simpler wrapper to do three things: *
    - *
  1. save the external dir specified in the Pig script
  2. - *
  3. set the same UDFContext signature as HCatStorer
  4. - *
  5. before {@link HCatStorer#setStoreLocation(String, Job)}, set the external dir in the UDFContext.
  6. + *
  7. save the external dir specified in the Pig script
  8. + *
  9. set the same UDFContext signature as HCatStorer
  10. + *
  11. before {@link HCatStorer#setStoreLocation(String, Job)}, set the external dir in the + * UDFContext.
  12. *
*/ public class HCatStorerWrapper extends HCatStorer { @@ -51,15 +52,15 @@ public HCatStorerWrapper(String partSpecs, String externalDir) throws Exception this.externalDir = externalDir; } - public HCatStorerWrapper(String externalDir) throws Exception{ + public HCatStorerWrapper(String externalDir) throws Exception { super(); this.externalDir = externalDir; } @Override public void setStoreLocation(String location, Job job) throws IOException { - Properties udfProps = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[] { sign }); + Properties udfProps = + UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] { sign }); udfProps.setProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION, externalDir); super.setStoreLocation(location, job); } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java index c87b95a..bf1b126 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java @@ -63,7 +63,7 @@ public float getProgress() throws IOException, InterruptedException { @Override public void initialize(InputSplit split, TaskAttemptContext arg1) throws IOException, - InterruptedException { + InterruptedException { } @Override @@ -84,7 +84,7 @@ public MockInputSplit(String location) { @Override public String[] getLocations() throws IOException, InterruptedException { - return new String[]{location}; + return new String[] { location }; } @Override @@ -123,7 +123,7 @@ public MockInputFormat(String location) { @Override public RecordReader createRecordReader(InputSplit arg0, TaskAttemptContext arg1) - throws IOException, InterruptedException { + throws IOException, InterruptedException { return new MockRecordReader(); } @@ -133,7 +133,8 @@ public List getSplits(JobContext arg0) throws IOException, InterruptedException } } - private static final Map> locationToData = new HashMap>(); + private static final Map> locationToData = + new HashMap>(); public static void setData(String location, Iterable data) { locationToData.put(location, data); diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java index d056910..4972a78 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java @@ -24,7 +24,6 @@ import org.apache.pig.data.Tuple; public class MyPigStorage extends PigStorage { - String arg2; public MyPigStorage(String arg1, String arg2) throws IOException { diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java index a4b55c8..d274a0a 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java @@ -23,9 +23,8 @@ import java.util.HashMap; import java.util.Iterator; -import junit.framework.TestCase; - import org.apache.commons.io.FileUtils; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hive.cli.CliSessionState; @@ -42,6 +41,7 @@ import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; + import org.apache.hive.hcatalog.HcatTestUtils; import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.common.HCatContext; @@ -51,12 +51,16 @@ import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; import org.apache.hive.hcatalog.mapreduce.HCatMapRedUtil; + import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.data.Tuple; -public class TestE2EScenarios extends TestCase { +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +public class TestE2EScenarios { private static final String TEST_DATA_DIR = System.getProperty("java.io.tmpdir") + File.separator + TestHCatLoader.class.getCanonicalName() + "-" + System.currentTimeMillis(); private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; @@ -69,14 +73,13 @@ protected String storageFormat() { return "orc"; } - @Override - protected void setUp() throws Exception { - + @Before + public void setUp() throws Exception { File f = new File(TEST_WAREHOUSE_DIR); if (f.exists()) { FileUtil.fullyDelete(f); } - if(!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { + if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR); } @@ -90,8 +93,8 @@ protected void setUp() throws Exception { } - @Override - protected void tearDown() throws Exception { + @After + public void tearDown() throws Exception { try { dropTable("inpy"); dropTable("rc5318"); @@ -105,14 +108,15 @@ private void dropTable(String tablename) throws IOException, CommandNeedRetryExc driver.run("drop table " + tablename); } - private void createTable(String tablename, String schema, String partitionedBy, String storageFormat) throws IOException, CommandNeedRetryException { + private void createTable(String tablename, String schema, String partitionedBy, + String storageFormat) throws IOException, CommandNeedRetryException { String createTable; createTable = "create table " + tablename + "(" + schema + ") "; if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { createTable = createTable + "partitioned by (" + partitionedBy + ") "; } - if (storageFormat != null){ - createTable = createTable + "stored as " +storageFormat; + if (storageFormat != null) { + createTable = createTable + "stored as " + storageFormat; } driverRun(createTable); } @@ -120,8 +124,8 @@ private void createTable(String tablename, String schema, String partitionedBy, private void driverRun(String cmd) throws IOException, CommandNeedRetryException { int retCode = driver.run(cmd).getResponseCode(); if (retCode != 0) { - throw new IOException("Failed to run [" - + cmd + "], return code from hive driver : [" + retCode + "]"); + throw new IOException("Failed to run [" + cmd + "], return code from hive driver : [" + + retCode + "]"); } } @@ -129,64 +133,63 @@ private void pigDump(String tableName) throws IOException { PigServer server = new PigServer(ExecType.LOCAL); System.err.println("==="); - System.err.println(tableName+":"); + System.err.println(tableName + ":"); server.registerQuery("X = load '" + tableName - + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); Iterator XIter = server.openIterator("X"); while (XIter.hasNext()) { Tuple t = XIter.next(); - for (Object o : t.getAll()){ - System.err.print( - "\t(" + o.getClass().getName() + ":" - + o.toString() + ")" - ); + for (Object o : t.getAll()) { + System.err.print("\t(" + o.getClass().getName() + ":" + o.toString() + ")"); } System.err.println(""); } System.err.println("==="); } - private void copyTable(String in, String out) throws IOException, InterruptedException { Job ijob = new Job(); Job ojob = new Job(); HCatInputFormat inpy = new HCatInputFormat(); - inpy.setInput(ijob , null, in); + inpy.setInput(ijob, null, in); HCatOutputFormat oupy = new HCatOutputFormat(); - oupy.setOutput(ojob, - OutputJobInfo.create(null, out, new HashMap() - )); + oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap())); // Test HCatContext - System.err.println("HCatContext INSTANCE is present : " +HCatContext.INSTANCE.getConf().isPresent()); - if (HCatContext.INSTANCE.getConf().isPresent()){ - System.err.println("HCatContext tinyint->int promotion says " + - HCatContext.INSTANCE.getConf().get().getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)); + System.err.println("HCatContext INSTANCE is present : " + + HCatContext.INSTANCE.getConf().isPresent()); + if (HCatContext.INSTANCE.getConf().isPresent()) { + System.err.println("HCatContext tinyint->int promotion says " + + HCatContext.INSTANCE + .getConf() + .get() + .getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)); } HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration()); - System.err.println("Copying from ["+in+"] to ["+out+"] with schema : "+ tableSchema.toString()); + System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + + tableSchema.toString()); oupy.setSchema(ojob, tableSchema); oupy.checkOutputSpecs(ojob); OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration())); oc.setupJob(ojob); - for (InputSplit split : inpy.getSplits(ijob)){ + for (InputSplit split : inpy.getSplits(ijob)) { TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration()); TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration()); - RecordReader rr = inpy.createRecordReader(split, rtaskContext); + RecordReader rr = + inpy.createRecordReader(split, rtaskContext); rr.initialize(split, rtaskContext); OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext); taskOc.setupTask(wtaskContext); RecordWriter, HCatRecord> rw = oupy.getRecordWriter(wtaskContext); - while(rr.nextKeyValue()){ + while (rr.nextKeyValue()) { rw.write(rr.getCurrentKey(), rr.getCurrentValue()); } rw.close(wtaskContext); @@ -199,35 +202,33 @@ private void copyTable(String in, String out) throws IOException, InterruptedExc private TaskAttemptContext createTaskAttemptContext(Configuration tconf) { Configuration conf = (tconf == null) ? (new Configuration()) : tconf; - TaskAttemptID taskId = HCatMapRedUtil.createTaskAttemptID(new JobID("200908190029", 1), false, 1, 1); + TaskAttemptID taskId = + HCatMapRedUtil.createTaskAttemptID(new JobID("200908190029", 1), false, 1, 1); conf.setInt("mapred.task.partition", taskId.getId()); conf.set("mapred.task.id", taskId.toString()); - TaskAttemptContext rtaskContext = HCatMapRedUtil.createTaskAttemptContext(conf , taskId); + TaskAttemptContext rtaskContext = HCatMapRedUtil.createTaskAttemptContext(conf, taskId); return rtaskContext; } - + @Test public void testReadOrcAndRCFromPig() throws Exception { String tableSchema = "ti tinyint, si smallint,i int, bi bigint, f float, d double, b boolean"; - HcatTestUtils.createTestDataFile(TEXTFILE_LOCN, - new String[]{ - "-3\0019001\00186400\0014294967297\00134.532\0012184239842983489.1231231234\001true" - ,"0\0010\0010\0010\0010\0010\001false" - } - ); + HcatTestUtils.createTestDataFile(TEXTFILE_LOCN, new String[] { + "-3\0019001\00186400\0014294967297\00134.532\0012184239842983489.1231231234\001true", + "0\0010\0010\0010\0010\0010\001false" }); // write this out to a file, and import it into hive - createTable("inpy",tableSchema,null,"textfile"); - createTable("rc5318",tableSchema,null,"rcfile"); - createTable("orc5318",tableSchema,null,"orc"); - driverRun("LOAD DATA LOCAL INPATH '"+TEXTFILE_LOCN+"' OVERWRITE INTO TABLE inpy"); + createTable("inpy", tableSchema, null, "textfile"); + createTable("rc5318", tableSchema, null, "rcfile"); + createTable("orc5318", tableSchema, null, "orc"); + driverRun("LOAD DATA LOCAL INPATH '" + TEXTFILE_LOCN + "' OVERWRITE INTO TABLE inpy"); // write it out from hive to an rcfile table, and to an orc table -// driverRun("insert overwrite table rc5318 select * from inpy"); - copyTable("inpy","rc5318"); -// driverRun("insert overwrite table orc5318 select * from inpy"); - copyTable("inpy","orc5318"); + // driverRun("insert overwrite table rc5318 select * from inpy"); + copyTable("inpy", "rc5318"); + // driverRun("insert overwrite table orc5318 select * from inpy"); + copyTable("inpy", "orc5318"); pigDump("inpy"); pigDump("rc5318"); diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java index 82fc8a9..4980941 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java @@ -71,8 +71,11 @@ public class TestHCatLoader { private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoader.class); - private static final String TEST_DATA_DIR = HCatUtil.makePathASafeFileName(System.getProperty("java.io.tmpdir") + - File.separator + TestHCatLoader.class.getCanonicalName() + "-" + System.currentTimeMillis()); + private static final String TEST_DATA_DIR = HCatUtil.makePathASafeFileName( + System.getProperty("java.io.tmpdir") + + File.separator + + TestHCatLoader.class.getCanonicalName() + + "-" + System.currentTimeMillis()); private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; private static final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; private static final String COMPLEX_FILE_NAME = TEST_DATA_DIR + "/complex.input.data"; @@ -86,51 +89,62 @@ private Map> basicInputData; protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; } private void dropTable(String tablename) throws IOException, CommandNeedRetryException { dropTable(tablename, driver); } - static void dropTable(String tablename, Driver driver) throws IOException, CommandNeedRetryException { + + static void dropTable(String tablename, Driver driver) throws IOException, + CommandNeedRetryException { driver.run("drop table if exists " + tablename); } - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + private void createTable(String tablename, String schema, String partitionedBy) + throws IOException, CommandNeedRetryException { createTable(tablename, schema, partitionedBy, driver, storageFormat()); } - static void createTable(String tablename, String schema, String partitionedBy, Driver driver, String storageFormat) - throws IOException, CommandNeedRetryException { + + static void createTable(String tablename, String schema, String partitionedBy, Driver driver, + String storageFormat) throws IOException, CommandNeedRetryException { String createTable; createTable = "create table " + tablename + "(" + schema + ") "; if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { createTable = createTable + "partitioned by (" + partitionedBy + ") "; } - createTable = createTable + "stored as " +storageFormat; + createTable = createTable + "stored as " + storageFormat; executeStatementOnDriver(createTable, driver); } - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + private void createTable(String tablename, String schema) throws IOException, + CommandNeedRetryException { createTable(tablename, schema, null); } + /** * Execute Hive CLI statement + * * @param cmd arbitrary statement to execute */ - static void executeStatementOnDriver(String cmd, Driver driver) throws IOException, CommandNeedRetryException { + static void executeStatementOnDriver(String cmd, Driver driver) throws IOException, + CommandNeedRetryException { LOG.debug("Executing: " + cmd); CommandProcessorResponse cpr = driver.run(cmd); - if(cpr.getResponseCode() != 0) { - throw new IOException("Failed to execute \"" + cmd + "\". Driver returned " + cpr.getResponseCode() + " Error: " + cpr.getErrorMessage()); + if (cpr.getResponseCode() != 0) { + throw new IOException("Failed to execute \"" + cmd + "\". Driver returned " + + cpr.getResponseCode() + " Error: " + cpr.getErrorMessage()); } } + private static void checkProjection(FieldSchema fs, String expectedName, byte expectedPigType) { assertEquals(fs.alias, expectedName); - assertEquals("Expected " + DataType.findTypeName(expectedPigType) + "; got " + - DataType.findTypeName(fs.type), expectedPigType, fs.type); + assertEquals( + "Expected " + DataType.findTypeName(expectedPigType) + "; got " + + DataType.findTypeName(fs.type), expectedPigType, fs.type); } - + @Before public void setup() throws Exception { @@ -138,7 +152,7 @@ public void setup() throws Exception { if (f.exists()) { FileUtil.fullyDelete(f); } - if(!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { + if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR); } @@ -151,11 +165,9 @@ public void setup() throws Exception { SessionState.start(new CliSessionState(hiveConf)); createTable(BASIC_TABLE, "a int, b string"); - createTable(COMPLEX_TABLE, - "name string, studentid int, " + createTable(COMPLEX_TABLE, "name string, studentid int, " + "contact struct, " - + "currently_registered_courses array, " - + "current_grades map, " + + "currently_registered_courses array, " + "current_grades map, " + "phnos array>"); createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); @@ -176,29 +188,40 @@ public void setup() throws Exception { } } HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input); - HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, - new String[]{ - //"Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)},[PHARMACOLOGY#A-,PSYCHIATRY#B+],{(415-253-6367,cell),(408-253-6367,landline)}", - //"Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)},[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D],{(415-253-6367,cell),(408-253-6367,landline)}", - } - ); + HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, new String[] { + // "Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)},[PHARMACOLOGY#A-,PSYCHIATRY#B+],{(415-253-6367,cell),(408-253-6367,landline)}", + // "Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)},[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D],{(415-253-6367,cell),(408-253-6367,landline)}", + }); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); int i = 0; server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);", ++i); - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i); - server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i); + server.registerQuery("store A into '" + BASIC_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i); + server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i); server.registerQuery("B = foreach A generate a,b;", ++i); server.registerQuery("B2 = filter B by a < 2;", ++i); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');", ++i); - + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');", ++i); + server.registerQuery("C = foreach A generate a,b;", ++i); server.registerQuery("C2 = filter C by a >= 2;", ++i); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');", ++i); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');", ++i); - server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});", ++i); - server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i); + server.registerQuery( + "D = load '" + COMPLEX_FILE_NAME + "' as (" + + "name:chararray, " + + "studentid:int, " + + "contact:tuple(phno:chararray,email:chararray), " + + "currently_registered_courses:bag{innertup:tuple(course:chararray)}, " + + "current_grades:map[ ], " + + "phnos:bag{innertup:tuple(phno:chararray,type:chararray)});", + ++i); + server.registerQuery("store D into '" + COMPLEX_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i); server.executeBatch(); } @@ -218,11 +241,11 @@ public void tearDown() throws Exception { @Test public void testSchemaLoadBasic() throws IOException { - PigServer server = new PigServer(ExecType.LOCAL); // test that schema was loaded correctly - server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("X = load '" + BASIC_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); Schema dumpedXSchema = server.dumpSchema("X"); List Xfields = dumpedXSchema.getFields(); assertEquals(2, Xfields.size()); @@ -232,6 +255,7 @@ public void testSchemaLoadBasic() throws IOException { assertTrue(Xfields.get(1).type == DataType.CHARARRAY); } + /** * Test that we properly translate data types in Hive/HCat table schema into Pig schema */ @@ -239,6 +263,7 @@ public void testSchemaLoadBasic() throws IOException { public void testSchemaLoadPrimitiveTypes() throws IOException { AllTypesTable.testSchemaLoadPrimitiveTypes(); } + /** * Test that value from Hive table are read properly in Pig */ @@ -251,7 +276,8 @@ public void testReadDataPrimitiveTypes() throws Exception { public void testReadDataBasic() throws IOException { PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("X = load '" + BASIC_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); Iterator XIter = server.openIterator("X"); int numTuplesRead = 0; while (XIter.hasNext()) { @@ -270,11 +296,11 @@ public void testReadDataBasic() throws IOException { @Test public void testSchemaLoadComplex() throws IOException { - PigServer server = new PigServer(ExecType.LOCAL); // test that schema was loaded correctly - server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("K = load '" + COMPLEX_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); Schema dumpedKSchema = server.dumpSchema("K"); List Kfields = dumpedKSchema.getFields(); assertEquals(6, Kfields.size()); @@ -303,10 +329,11 @@ public void testSchemaLoadComplex() throws IOException { assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type); assertNotNull(Kfields.get(3).schema.getFields().get(0).schema); assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size()); - assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type); - // assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); - // commented out, because the name becomes "innerfield" by default - we call it "course" in pig, - // but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine + assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields() + .get(0).type); + //assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); + // Commented out, because the name becomes "innerfield" by default - we call it "course" in + // pig, but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine } assertEquals(DataType.MAP, Kfields.get(4).type); assertEquals("current_grades", Kfields.get(4).alias.toLowerCase()); @@ -318,12 +345,15 @@ public void testSchemaLoadComplex() throws IOException { assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type); assertNotNull(Kfields.get(5).schema.getFields().get(0).schema); assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2); - assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type); - assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); - assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type); - assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase()); + assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields() + .get(0).type); + assertEquals("phno", + Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); + assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields() + .get(1).type); + assertEquals("type", + Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase()); } - } @Test @@ -335,7 +365,8 @@ public void testReadPartitionedBasic() throws IOException, CommandNeedRetryExcep driver.getResults(valuesReadFromHiveDriver); assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size()); - server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("W = load '" + PARTITIONED_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); Schema dumpedWSchema = server.dumpSchema("W"); List Wfields = dumpedWSchema.getFields(); assertEquals(3, Wfields.size()); @@ -366,7 +397,8 @@ public void testReadPartitionedBasic() throws IOException, CommandNeedRetryExcep } assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size()); - server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("P1 = load '" + PARTITIONED_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); server.registerQuery("P1filter = filter P1 by bkt == '0';"); Iterator P1Iter = server.openIterator("P1filter"); int count1 = 0; @@ -379,7 +411,8 @@ public void testReadPartitionedBasic() throws IOException, CommandNeedRetryExcep } assertEquals(3, count1); - server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("P2 = load '" + PARTITIONED_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); server.registerQuery("P2filter = filter P2 by bkt == '1';"); Iterator P2Iter = server.openIterator("P2filter"); int count2 = 0; @@ -400,7 +433,8 @@ public void testProjectionsBasic() throws IOException { // projections are handled by using generate, not "as" on the Load - server.registerQuery("Y1 = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("Y1 = load '" + BASIC_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); server.registerQuery("Y2 = foreach Y1 generate a;"); server.registerQuery("Y3 = foreach Y1 generate b,a;"); Schema dumpedY2Schema = server.dumpSchema("Y2"); @@ -444,7 +478,7 @@ public void testProjectionsBasic() throws IOException { @Test public void testColumnarStorePushdown() throws Exception { - String PIGOUTPUT_DIR = TEST_DATA_DIR+ "/colpushdownop"; + String PIGOUTPUT_DIR = TEST_DATA_DIR + "/colpushdownop"; String PIG_FILE = "test.pig"; String expectedCols = "0,1"; PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); @@ -457,20 +491,19 @@ public void testColumnarStorePushdown() throws Exception { try { String[] args = { "-x", "local", PIG_FILE }; PigStats stats = PigRunner.run(args, null); - //Pig script was successful + // Pig script was successful assertTrue(stats.isSuccessful()); - //Single MapReduce job is launched + // Single MapReduce job is launched OutputStats outstats = stats.getOutputStats().get(0); - assertTrue(outstats!= null); - assertEquals(expectedCols,outstats.getConf() - .get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); - //delete output file on exit + assertTrue(outstats != null); + assertEquals(expectedCols, + outstats.getConf().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); + // delete output file on exit FileSystem fs = FileSystem.get(outstats.getConf()); - if (fs.exists(new Path(PIGOUTPUT_DIR))) - { + if (fs.exists(new Path(PIGOUTPUT_DIR))) { fs.delete(new Path(PIGOUTPUT_DIR), true); } - }finally { + } finally { new File(PIG_FILE).delete(); } } @@ -497,27 +530,30 @@ public void testConvertBooleanToInt() throws Exception { File inputDataDir = new File(inputFileName).getParentFile(); inputDataDir.mkdir(); - String[] lines = new String[]{"llama\ttrue", "alpaca\tfalse"}; + String[] lines = new String[] { "llama\ttrue", "alpaca\tfalse" }; HcatTestUtils.createTestDataFile(inputFileName, lines); assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode()); - assertEquals(0, driver.run("create external table " + tbl + - " (a string, b boolean) row format delimited fields terminated by '\t'" + - " stored as textfile location 'file:///" + - inputDataDir.getPath().replaceAll("\\\\", "/") + "'").getResponseCode()); + assertEquals( + 0, + driver.run( + "create external table " + tbl + + " (a string, b boolean) row format delimited fields terminated by '\t'" + + " stored as textfile location 'file:///" + + inputDataDir.getPath().replaceAll("\\\\", "/") + "'").getResponseCode()); Properties properties = new Properties(); properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true"); PigServer server = new PigServer(ExecType.LOCAL, properties); - server.registerQuery( - "data = load 'test_convert_boolean_to_int' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server + .registerQuery("data = load 'test_convert_boolean_to_int' using org.apache.hive.hcatalog.pig.HCatLoader();"); Schema schema = server.dumpSchema("data"); assertEquals(2, schema.getFields().size()); assertEquals("a", schema.getField(0).alias); assertEquals(DataType.CHARARRAY, schema.getField(0).type); assertEquals("b", schema.getField(1).alias); - if (PigHCatUtil.pigHasBooleanSupport()){ + if (PigHCatUtil.pigHasBooleanSupport()) { assertEquals(DataType.BOOLEAN, schema.getField(1).type); } else { assertEquals(DataType.INTEGER, schema.getField(1).type); @@ -534,44 +570,50 @@ public void testConvertBooleanToInt() throws Exception { } /** - * basic tests that cover each scalar type - * https://issues.apache.org/jira/browse/HIVE-5814 + * basic tests that cover each scalar type https://issues.apache.org/jira/browse/HIVE-5814 */ private static final class AllTypesTable { private static final String ALL_TYPES_FILE_NAME = TEST_DATA_DIR + "/alltypes.input.data"; private static final String ALL_PRIMITIVE_TYPES_TABLE = "junit_unparted_alltypes"; - private static final String ALL_TYPES_SCHEMA = "( c_boolean boolean, " + //0 - "c_tinyint tinyint, " + //1 - "c_smallint smallint, " + //2 - "c_int int, " + //3 - "c_bigint bigint, " + //4 - "c_float float, " + //5 - "c_double double, " + //6 - "c_decimal decimal(5,2), " +//7 - "c_string string, " + //8 - "c_char char(10), " + //9 - "c_varchar varchar(20), " + //10 - "c_binary binary, " + //11 - "c_date date, " + //12 - "c_timestamp timestamp)"; //13 + private static final String ALL_TYPES_SCHEMA = "( c_boolean boolean, " + // 0 + "c_tinyint tinyint, " + // 1 + "c_smallint smallint, " + // 2 + "c_int int, " + // 3 + "c_bigint bigint, " + // 4 + "c_float float, " + // 5 + "c_double double, " + // 6 + "c_decimal decimal(5,2), " + // 7 + "c_string string, " + // 8 + "c_char char(10), " + // 9 + "c_varchar varchar(20), " + // 10 + "c_binary binary, " + // 11 + "c_date date, " + // 12 + "c_timestamp timestamp)"; // 13 /** - * raw data for #ALL_PRIMITIVE_TYPES_TABLE - * All the values are within range of target data type (column) + * raw data for #ALL_PRIMITIVE_TYPES_TABLE All the values are within range of target data type + * (column) */ private static final Object[][] primitiveRows = new Object[][] { - {Boolean.TRUE,Byte.MAX_VALUE,Short.MAX_VALUE, Integer.MAX_VALUE,Long.MAX_VALUE,Float.MAX_VALUE,Double.MAX_VALUE,555.22,"Kyiv","char(10)xx","varchar(20)","blah".getBytes(),Date.valueOf("2014-01-13"),Timestamp.valueOf("2014-01-13 19:26:25.0123")}, - {Boolean.FALSE,Byte.MIN_VALUE,Short.MIN_VALUE, Integer.MIN_VALUE,Long.MIN_VALUE,Float.MIN_VALUE,Double.MIN_VALUE,-555.22,"Saint Petersburg","char(xx)00","varchar(yy)","doh".getBytes(),Date.valueOf("2014-01-14"), Timestamp.valueOf("2014-01-14 19:26:25.0123")} - }; + { Boolean.TRUE, Byte.MAX_VALUE, Short.MAX_VALUE, Integer.MAX_VALUE, Long.MAX_VALUE, + Float.MAX_VALUE, Double.MAX_VALUE, 555.22, "Kyiv", "char(10)xx", "varchar(20)", + "blah".getBytes(), Date.valueOf("2014-01-13"), + Timestamp.valueOf("2014-01-13 19:26:25.0123") }, + { Boolean.FALSE, Byte.MIN_VALUE, Short.MIN_VALUE, Integer.MIN_VALUE, Long.MIN_VALUE, + Float.MIN_VALUE, Double.MIN_VALUE, -555.22, "Saint Petersburg", "char(xx)00", + "varchar(yy)", "doh".getBytes(), Date.valueOf("2014-01-14"), + Timestamp.valueOf("2014-01-14 19:26:25.0123") } }; + /** * Test that we properly translate data types in Hive/HCat table schema into Pig schema */ private static void testSchemaLoadPrimitiveTypes() throws IOException { PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery("X = load '" + ALL_PRIMITIVE_TYPES_TABLE + "' using " + HCatLoader.class.getName() + "();"); + server.registerQuery("X = load '" + ALL_PRIMITIVE_TYPES_TABLE + "' using " + + HCatLoader.class.getName() + "();"); Schema dumpedXSchema = server.dumpSchema("X"); List Xfields = dumpedXSchema.getFields(); - assertEquals("Expected " + HCatFieldSchema.Type.numPrimitiveTypes() + " fields, found " + - Xfields.size(), HCatFieldSchema.Type.numPrimitiveTypes(), Xfields.size()); + assertEquals("Expected " + HCatFieldSchema.Type.numPrimitiveTypes() + " fields, found " + + Xfields.size(), HCatFieldSchema.Type.numPrimitiveTypes(), Xfields.size()); checkProjection(Xfields.get(0), "c_boolean", DataType.BOOLEAN); checkProjection(Xfields.get(1), "c_tinyint", DataType.INTEGER); checkProjection(Xfields.get(2), "c_smallint", DataType.INTEGER); @@ -587,58 +629,69 @@ private static void testSchemaLoadPrimitiveTypes() throws IOException { checkProjection(Xfields.get(12), "c_date", DataType.DATETIME); checkProjection(Xfields.get(13), "c_timestamp", DataType.DATETIME); } + /** * Test that value from Hive table are read properly in Pig */ private static void testReadDataPrimitiveTypes() throws Exception { PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery("X = load '" + ALL_PRIMITIVE_TYPES_TABLE + "' using " + HCatLoader.class.getName() + "();"); + server.registerQuery("X = load '" + ALL_PRIMITIVE_TYPES_TABLE + "' using " + + HCatLoader.class.getName() + "();"); Iterator XIter = server.openIterator("X"); int numTuplesRead = 0; while (XIter.hasNext()) { Tuple t = XIter.next(); assertEquals(HCatFieldSchema.Type.numPrimitiveTypes(), t.size()); int colPos = 0; - for(Object referenceData : primitiveRows[numTuplesRead]) { - if(referenceData == null) { - assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data is null; actual " + - t.get(colPos), t.get(colPos) == null); - } - else if(referenceData instanceof java.util.Date) { - assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data=" + ((java.util.Date)referenceData).getTime() + " actual=" + - ((DateTime)t.get(colPos)).getMillis() + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ")", - ((java.util.Date)referenceData).getTime()== ((DateTime)t.get(colPos)).getMillis()); - //note that here we ignore nanos part of Hive Timestamp since nanos are dropped when reading Hive from Pig by design - } - else { - assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data=" + referenceData + " actual=" + - t.get(colPos) + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ")", + for (Object referenceData : primitiveRows[numTuplesRead]) { + if (referenceData == null) { + assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + + " Reference data is null; actual " + t.get(colPos), t.get(colPos) == null); + } else if (referenceData instanceof java.util.Date) { + assertTrue( + "rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data=" + + ((java.util.Date) referenceData).getTime() + " actual=" + + ((DateTime) t.get(colPos)).getMillis() + "; types=(" + + referenceData.getClass() + "," + t.get(colPos).getClass() + ")", + ((java.util.Date) referenceData).getTime() == ((DateTime) t.get(colPos)) + .getMillis()); + // note that here we ignore nanos part of Hive Timestamp since nanos are dropped when + // reading Hive from Pig by design + } else { + assertTrue( + "rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data=" + + referenceData + " actual=" + t.get(colPos) + "; types=(" + + referenceData.getClass() + "," + t.get(colPos).getClass() + ")", referenceData.toString().equals(t.get(colPos).toString())); - //doing String comps here as value objects in Hive in Pig are different so equals() doesn't work + // doing String comps here as value objects in Hive in Pig are different so equals() + // doesn't work } colPos++; } numTuplesRead++; } - assertTrue("Expected " + primitiveRows.length + "; found " + numTuplesRead, numTuplesRead == primitiveRows.length); + assertTrue("Expected " + primitiveRows.length + "; found " + numTuplesRead, + numTuplesRead == primitiveRows.length); } + private static void setupAllTypesTable(Driver driver) throws Exception { String[] primitiveData = new String[primitiveRows.length]; - for(int i = 0; i < primitiveRows.length; i++) { + for (int i = 0; i < primitiveRows.length; i++) { Object[] rowData = primitiveRows[i]; StringBuilder row = new StringBuilder(); - for(Object cell : rowData) { + for (Object cell : rowData) { row.append(row.length() == 0 ? "" : "\t").append(cell == null ? null : cell); } primitiveData[i] = row.toString(); } HcatTestUtils.createTestDataFile(ALL_TYPES_FILE_NAME, primitiveData); - String cmd = "create table " + ALL_PRIMITIVE_TYPES_TABLE + ALL_TYPES_SCHEMA + - "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'" + - " STORED AS TEXTFILE"; + String cmd = + "create table " + ALL_PRIMITIVE_TYPES_TABLE + ALL_TYPES_SCHEMA + + "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'" + " STORED AS TEXTFILE"; executeStatementOnDriver(cmd, driver); - cmd = "load data local inpath '" + HCatUtil.makePathASafeFileName(ALL_TYPES_FILE_NAME) + - "' into table " + ALL_PRIMITIVE_TYPES_TABLE; + cmd = + "load data local inpath '" + HCatUtil.makePathASafeFileName(ALL_TYPES_FILE_NAME) + + "' into table " + ALL_PRIMITIVE_TYPES_TABLE; executeStatementOnDriver(cmd, driver); } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java index eadbf20..06f4d08 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java @@ -51,9 +51,9 @@ public class TestHCatLoaderComplexSchema { - //private static MiniCluster cluster = MiniCluster.buildCluster(); + // private static MiniCluster cluster = MiniCluster.buildCluster(); private static Driver driver; - //private static Properties props; + // private static Properties props; private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoaderComplexSchema.class); private void dropTable(String tablename) throws IOException, CommandNeedRetryException { @@ -61,11 +61,12 @@ private void dropTable(String tablename) throws IOException, CommandNeedRetryExc } protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; } - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + private void createTable(String tablename, String schema, String partitionedBy) + throws IOException, CommandNeedRetryException { String createTable; createTable = "create table " + tablename + "(" + schema + ") "; if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { @@ -76,11 +77,13 @@ private void createTable(String tablename, String schema, String partitionedBy) CommandProcessorResponse result = driver.run(createTable); int retCode = result.getResponseCode(); if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + " " + result.getErrorMessage() + "]"); + throw new IOException("Failed to create table. [" + createTable + + "], return code from hive driver : [" + retCode + " " + result.getErrorMessage() + "]"); } } - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + private void createTable(String tablename, String schema) throws IOException, + CommandNeedRetryException { createTable(tablename, schema, null); } @@ -93,8 +96,8 @@ public static void setUpBeforeClass() throws Exception { hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); driver = new Driver(hiveConf); SessionState.start(new CliSessionState(hiveConf)); - //props = new Properties(); - //props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); + // props = new Properties(); + // props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); } @@ -111,71 +114,69 @@ private DataBag b(Tuple... objects) { /** * artificially complex nested schema to test nested schema conversion + * * @throws Exception */ @Test public void testSyntheticComplexSchema() throws Exception { String pigSchema = - "a: " + - "(" + - "aa: chararray, " + - "ab: long, " + - "ac: map[], " + - "ad: { t: (ada: long) }, " + - "ae: { t: (aea:long, aeb: ( aeba: chararray, aebb: long)) }," + - "af: (afa: chararray, afb: long) " + - ")," + - "b: chararray, " + - "c: long, " + - "d: { t: (da:long, db: ( dba: chararray, dbb: long), dc: { t: (dca: long) } ) } "; + "a: " + "(" + + "aa: chararray, " + + "ab: long, " + + "ac: map[], " + + "ad: { t: (ada: long) }, " + + "ae: { t: (aea:long, aeb: ( aeba: chararray, aebb: long)) }," + + "af: (afa: chararray, afb: long) " + + ")," + + "b: chararray, " + + "c: long, " + + "d: { t: (da:long, db: ( dba: chararray, dbb: long), dc: { t: (dca: long) } ) } "; // with extra structs String tableSchema = - "a struct<" + - "aa: string, " + - "ab: bigint, " + - "ac: map, " + - "ad: array>, " + - "ae: array>>," + - "af: struct " + - ">, " + - "b string, " + - "c bigint, " + - "d array, dc: array>>>"; + "a struct<" + + "aa: string, " + + "ab: bigint, " + + "ac: map, " + + "ad: array>, " + + "ae: array>>," + + "af: struct " + + ">, " + + "b string, " + + "c bigint, " + + "d array, dc: array>>>"; // without extra structs String tableSchema2 = - "a struct<" + - "aa: string, " + - "ab: bigint, " + - "ac: map, " + - "ad: array, " + - "ae: array>>," + - "af: struct " + - ">, " + - "b string, " + - "c bigint, " + - "d array, dc: array>>"; + "a struct<" + + "aa: string, " + + "ab: bigint, " + + "ac: map, " + + "ad: array, " + + "ae: array>>," + + "af: struct " + + ">, " + + "b string, " + + "c bigint, " + + "d array, dc: array>>"; List data = new ArrayList(); for (int i = 0; i < 10; i++) { Tuple t = t( - t( - "aa test", - 2l, - new HashMap() { - { - put("ac test1", "test 1"); - put("ac test2", "test 2"); - } - }, - b(t(3l), t(4l)), - b(t(5l, t("aeba test", 6l))), - t("afa test", 7l) - ), - "b test", - (long) i, - b(t(8l, t("dba test", 9l), b(t(10l))))); + t("aa test", + 2l, + new HashMap() { + { + put("ac test1", "test 1"); + put("ac test2", "test 2"); + } + }, + b(t(3l), t(4l)), + b(t(5l, t("aeba test", 6l))), + t("afa test", 7l)), + "b test", + (long) i, + b(t(8l, t("dba test", 9l), b(t(10l))))); data.add(t); } @@ -183,28 +184,30 @@ public void testSyntheticComplexSchema() throws Exception { verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, false); verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, true); verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, false); - } - private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List data, boolean provideSchemaToStorer) - throws IOException, CommandNeedRetryException, ExecException, FrontendException { + private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, + List data, boolean provideSchemaToStorer) throws IOException, + CommandNeedRetryException, ExecException, FrontendException { MockLoader.setData(tablename + "Input", data); try { createTable(tablename, tableSchema); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); - server.registerQuery("A = load '" + tablename + "Input' using org.apache.hive.hcatalog.pig.MockLoader() AS (" + pigSchema + ");"); + server.registerQuery("A = load '" + tablename + + "Input' using org.apache.hive.hcatalog.pig.MockLoader() AS (" + pigSchema + ");"); Schema dumpedASchema = server.dumpSchema("A"); - server.registerQuery("STORE A into '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatStorer(" - + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") - + ");"); + server.registerQuery("STORE A into '" + tablename + + "' using org.apache.hive.hcatalog.pig.HCatStorer(" + + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") + ");"); ExecJob execJob = server.executeBatch().get(0); if (!execJob.getStatistics().isSuccessful()) { throw new RuntimeException("Import failed", execJob.getException()); } // test that schema was loaded correctly - server.registerQuery("X = load '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("X = load '" + tablename + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); server.dumpSchema("X"); Iterator it = server.openIterator("X"); int i = 0; @@ -216,16 +219,14 @@ private void verifyWriteRead(String tablename, String pigSchema, String tableSch } Schema dumpedXSchema = server.dumpSchema("X"); - Assert.assertEquals( - "expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", - "", - compareIgnoreFiledNames(dumpedASchema, dumpedXSchema)); + Assert.assertEquals("expected " + dumpedASchema + " but was " + dumpedXSchema + + " (ignoring field names)", "", compareIgnoreFiledNames(dumpedASchema, dumpedXSchema)); } finally { dropTable(tablename); } } - + private void compareTuples(Tuple t1, Tuple t2) throws ExecException { Assert.assertEquals("Tuple Sizes don't match", t1.size(), t2.size()); for (int i = 0; i < t1.size(); i++) { @@ -237,7 +238,7 @@ private void compareTuples(Tuple t1, Tuple t2) throws ExecException { Assert.assertEquals(msg, noOrder(f1.toString()), noOrder(f2.toString())); } } - + private String noOrder(String s) { char[] chars = s.toCharArray(); Arrays.sort(chars); @@ -253,14 +254,16 @@ private String compareIgnoreFiledNames(Schema expected, Schema got) throws Front } } if (expected.size() != got.size()) { - return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + got + ")"; + return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + + got + ")"; } String message = ""; for (int i = 0; i < expected.size(); i++) { FieldSchema expectedField = expected.getField(i); FieldSchema gotField = got.getField(i); if (expectedField.type != gotField.type) { - message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + gotField.type + " (" + gotField + ")"; + message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + + gotField.type + " (" + gotField + ")"; } else { message += compareIgnoreFiledNames(expectedField.schema, gotField.schema); } @@ -269,8 +272,8 @@ private String compareIgnoreFiledNames(Schema expected, Schema got) throws Front } /** - * tests that unnecessary tuples are drop while converting schema - * (Pig requires Tuples in Bags) + * tests that unnecessary tuples are drop while converting schema (Pig requires Tuples in Bags) + * * @throws Exception */ @Test @@ -285,7 +288,6 @@ public void testTupleInBagInTupleInBag() throws Exception { data.add(t(b(t(b(t(300l), t(301l)))))); data.add(t(b(t(b(t(400l))), t(b(t(410l), t(411l), t(412l)))))); - verifyWriteRead("TupleInBagInTupleInBag1", pigSchema, tableSchema, data, true); verifyWriteRead("TupleInBagInTupleInBag2", pigSchema, tableSchema, data, false); @@ -304,19 +306,16 @@ public void testMapWithComplexData() throws Exception { List data = new ArrayList(); for (int i = 0; i < 10; i++) { - Tuple t = t( - (long) i, - new HashMap() { - { - put("b test 1", t(1l, "test 1")); - put("b test 2", t(2l, "test 2")); - } - }); + Tuple t = t((long) i, new HashMap() { + { + put("b test 1", t(1l, "test 1")); + put("b test 2", t(2l, "test 2")); + } + }); data.add(t); } verifyWriteRead("testMapWithComplexData", pigSchema, tableSchema, data, true); verifyWriteRead("testMapWithComplexData2", pigSchema, tableSchema, data, false); - } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java index 7162584..b883310 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java @@ -39,15 +39,15 @@ import java.util.List; /** - * Test that require both HCatLoader and HCatStorer. For read or write only functionality, - * please consider @{link TestHCatLoader} or @{link TestHCatStorer}. + * Test that require both HCatLoader and HCatStorer. For read or write only functionality, please + * consider @{link TestHCatLoader} or @{link TestHCatStorer}. */ public class TestHCatLoaderStorer extends HCatBaseTest { private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoaderStorer.class); /** - * Test round trip of smallint/tinyint: Hive->Pig->Hive. This is a more general use case in HCatalog: - * 'read some data from Hive, process it in Pig, write result back to a Hive table' + * Test round trip of smallint/tinyint: Hive->Pig->Hive. This is a more general use case in + * HCatalog: 'read some data from Hive, process it in Pig, write result back to a Hive table' */ @Test public void testReadWrite() throws Exception { @@ -59,15 +59,15 @@ public void testReadWrite() throws Exception { final String INPUT_FILE_NAME = dataDir + "/inputtrw.data"; TestHCatLoader.dropTable(tblName, driver); - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[]{"40\t1"}); + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] { "40\t1" }); - TestHCatLoader.executeStatementOnDriver("create external table " + tblName + - " (my_small_int smallint, my_tiny_int tinyint)" + - " row format delimited fields terminated by '\t' stored as textfile location '" + - dataDir + "'", driver); + TestHCatLoader.executeStatementOnDriver("create external table " + tblName + + " (my_small_int smallint, my_tiny_int tinyint)" + + " row format delimited fields terminated by '\t' stored as textfile location '" + dataDir + + "'", driver); TestHCatLoader.dropTable(tblName2, driver); - TestHCatLoader.createTable(tblName2, "my_small_int smallint, my_tiny_int tinyint", null, driver, - "textfile"); + TestHCatLoader.createTable(tblName2, "my_small_int smallint, my_tiny_int tinyint", null, + driver, "textfile"); LOG.debug("File=" + INPUT_FILE_NAME); TestHCatStorer.dumpFile(INPUT_FILE_NAME); @@ -75,29 +75,30 @@ public void testReadWrite() throws Exception { try { int queryNumber = 1; logAndRegister(server, - "A = load '" + tblName + - "' using org.apache.hive.hcatalog.pig.HCatLoader() as (my_small_int:int, my_tiny_int:int);", - queryNumber++); + "A = load '" + + tblName + + "' using org.apache.hive.hcatalog.pig.HCatLoader() as (my_small_int:int, my_tiny_int:int);", + queryNumber++); logAndRegister(server, - "b = foreach A generate my_small_int + my_tiny_int as my_small_int, my_tiny_int;", - queryNumber++); - logAndRegister(server, "store b into '" + tblName2 + - "' using org.apache.hive.hcatalog.pig.HCatStorer();", queryNumber); - //perform simple checksum here; make sure nothing got turned to NULL + "b = foreach A generate my_small_int + my_tiny_int as my_small_int, my_tiny_int;", + queryNumber++); + logAndRegister(server, "store b into '" + tblName2 + + "' using org.apache.hive.hcatalog.pig.HCatStorer();", queryNumber); + // perform simple checksum here; make sure nothing got turned to NULL TestHCatLoader.executeStatementOnDriver("select my_small_int from " + tblName2, driver); - ArrayList l = new ArrayList(); - driver.getResults(l); - for(Object t : l) { - LOG.debug("t=" + t); + ArrayList results = new ArrayList(); + driver.getResults(results); + for (Object tuple : results) { + LOG.debug("t=" + tuple); } - Assert.assertEquals("Expected '1' rows; got '" + l.size() + "'", 1, l.size()); - int result = Integer.parseInt((String)l.get(0)); + Assert.assertEquals("Expected '1' rows; got '" + results.size() + "'", 1, results.size()); + int result = Integer.parseInt((String) results.get(0)); Assert.assertEquals("Expected value '41'; got '" + result + "'", 41, result); - } - finally { + } finally { server.shutdown(); } } + /** * Ensure Pig can read/write tinyint/smallint columns. */ @@ -114,22 +115,27 @@ public void testSmallTinyInt() throws Exception { FileUtil.fullyDelete(dataDir); // Might not exist Assert.assertTrue(dataDir.mkdir()); - HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[]{ - String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), - String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) - }); + HcatTestUtils.createTestDataFile( + dataFile.getAbsolutePath(), + new String[] { + String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), + String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) + }); // Create a table with smallint/tinyint columns, load data, and query from Hive. Assert.assertEquals(0, driver.run("drop table if exists " + readTblName).getResponseCode()); - Assert.assertEquals(0, driver.run("create external table " + readTblName + - " (my_small_int smallint, my_tiny_int tinyint)" + - " row format delimited fields terminated by '\t' stored as textfile").getResponseCode()); - Assert.assertEquals(0, driver.run("load data local inpath '" + - dataDir.getPath().replaceAll("\\\\", "/") + "' into table " + readTblName).getResponseCode()); + Assert.assertEquals(0, driver.run( + "create external table " + readTblName + + " (my_small_int smallint, my_tiny_int tinyint)" + + " row format delimited fields terminated by '\t' stored as textfile") + .getResponseCode()); + Assert.assertEquals(0, driver.run( + "load data local inpath '" + dataDir.getPath().replaceAll("\\\\", "/") + + "' into table " + readTblName).getResponseCode()); PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery( - "data = load '" + readTblName + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("data = load '" + readTblName + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); // Ensure Pig schema is correct. Schema schema = server.dumpSchema("data"); @@ -152,47 +158,54 @@ public void testSmallTinyInt() throws Exception { // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the // bounds of the column type are written, and values outside throw an exception. Assert.assertEquals(0, driver.run("drop table if exists " + writeTblName).getResponseCode()); - Assert.assertEquals(0, driver.run("create table " + writeTblName + - " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); + Assert.assertEquals(0, driver.run( + "create table " + writeTblName + + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile") + .getResponseCode()); // Values within the column type bounds. - HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[]{ - String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), - String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) - }); - smallTinyIntBoundsCheckHelper(writeDataFile.getPath().replaceAll("\\\\", "/"), ExecJob.JOB_STATUS.COMPLETED); + HcatTestUtils.createTestDataFile( + writeDataFile.getAbsolutePath(), + new String[] { + String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), + String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) + }); + smallTinyIntBoundsCheckHelper(writeDataFile.getPath().replaceAll("\\\\", "/"), + ExecJob.JOB_STATUS.COMPLETED); // Values outside the column type bounds will fail at runtime. - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[]{ - String.format("%d\t%d", Short.MIN_VALUE - 1, 0)}); + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", + new String[] { String.format("%d\t%d", Short.MIN_VALUE - 1, 0) }); smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[]{ - String.format("%d\t%d", Short.MAX_VALUE + 1, 0)}); + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", + new String[] { String.format("%d\t%d", Short.MAX_VALUE + 1, 0) }); smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED); - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[]{ - String.format("%d\t%d", 0, Byte.MIN_VALUE - 1)}); + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", + new String[] { String.format("%d\t%d", 0, Byte.MIN_VALUE - 1) }); smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[]{ - String.format("%d\t%d", 0, Byte.MAX_VALUE + 1)}); + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", + new String[] { String.format("%d\t%d", 0, Byte.MAX_VALUE + 1) }); smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED); } private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) - throws Exception { + throws Exception { Assert.assertEquals(0, driver.run("drop table if exists test_tbl").getResponseCode()); - Assert.assertEquals(0, driver.run("create table test_tbl" + - " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); + Assert.assertEquals(0, driver.run( + "create table test_tbl" + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile") + .getResponseCode()); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); - server.registerQuery("data = load '" + data + - "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);"); + server.registerQuery("data = load '" + data + + "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);"); server.registerQuery( - "store data into 'test_tbl' using org.apache.hive.hcatalog.pig.HCatStorer('','','-onOutOfRangeValue Throw');"); + "store data into 'test_tbl' " + + "using org.apache.hive.hcatalog.pig.HCatStorer('','','-onOutOfRangeValue Throw');"); List jobs = server.executeBatch(); Assert.assertEquals(expectedStatus, jobs.get(0).getStatus()); - } + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java index fcfc642..9ae6614 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java @@ -41,10 +41,13 @@ import org.apache.pig.data.Tuple; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.util.LogUtils; + import org.joda.time.DateTime; import org.joda.time.DateTimeZone; + import org.junit.Assert; import org.junit.Test; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,162 +56,186 @@ private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - //Start: tests that check values from Pig that are out of range for target column + // Start: tests that check values from Pig that are out of range for target column @Test public void testWriteTinyint() throws Exception { - pigValueRangeTest("junitTypeTest1", "tinyint", "int", null, Integer.toString(1), Integer.toString(1)); + pigValueRangeTest("junitTypeTest1", "tinyint", "int", null, Integer.toString(1), + Integer.toString(1)); pigValueRangeTestOverflow("junitTypeTest1", "tinyint", "int", null, Integer.toString(300)); - pigValueRangeTestOverflow("junitTypeTest2", "tinyint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - Integer.toString(300)); - pigValueRangeTestOverflow("junitTypeTest3", "tinyint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - Integer.toString(300)); + pigValueRangeTestOverflow("junitTypeTest2", "tinyint", "int", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, Integer.toString(300)); + pigValueRangeTestOverflow("junitTypeTest3", "tinyint", "int", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, Integer.toString(300)); } + @Test public void testWriteSmallint() throws Exception { pigValueRangeTest("junitTypeTest1", "smallint", "int", null, Integer.toString(Short.MIN_VALUE), - Integer.toString(Short.MIN_VALUE)); - pigValueRangeTestOverflow("junitTypeTest2", "smallint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - Integer.toString(Short.MAX_VALUE + 1)); - pigValueRangeTestOverflow("junitTypeTest3", "smallint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - Integer.toString(Short.MAX_VALUE + 1)); + Integer.toString(Short.MIN_VALUE)); + pigValueRangeTestOverflow("junitTypeTest2", "smallint", "int", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, Integer.toString(Short.MAX_VALUE + 1)); + pigValueRangeTestOverflow("junitTypeTest3", "smallint", "int", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, Integer.toString(Short.MAX_VALUE + 1)); } + @Test public void testWriteChar() throws Exception { pigValueRangeTest("junitTypeTest1", "char(5)", "chararray", null, "xxx", "xxx "); pigValueRangeTestOverflow("junitTypeTest1", "char(5)", "chararray", null, "too_long"); - pigValueRangeTestOverflow("junitTypeTest2", "char(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - "too_long"); - pigValueRangeTestOverflow("junitTypeTest3", "char(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - "too_long2"); + pigValueRangeTestOverflow("junitTypeTest2", "char(5)", "chararray", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, "too_long"); + pigValueRangeTestOverflow("junitTypeTest3", "char(5)", "chararray", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, "too_long2"); } + @Test public void testWriteVarchar() throws Exception { pigValueRangeTest("junitTypeTest1", "varchar(5)", "chararray", null, "xxx", "xxx"); pigValueRangeTestOverflow("junitTypeTest1", "varchar(5)", "chararray", null, "too_long"); - pigValueRangeTestOverflow("junitTypeTest2", "varchar(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - "too_long"); - pigValueRangeTestOverflow("junitTypeTest3", "varchar(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - "too_long2"); + pigValueRangeTestOverflow("junitTypeTest2", "varchar(5)", "chararray", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, "too_long"); + pigValueRangeTestOverflow("junitTypeTest3", "varchar(5)", "chararray", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, "too_long2"); } + @Test public void testWriteDecimalXY() throws Exception { - pigValueRangeTest("junitTypeTest1", "decimal(5,2)", "bigdecimal", null, BigDecimal.valueOf(1.2).toString(), - BigDecimal.valueOf(1.2).toString()); - pigValueRangeTestOverflow("junitTypeTest1", "decimal(5,2)", "bigdecimal", null, BigDecimal.valueOf(12345.12).toString()); - pigValueRangeTestOverflow("junitTypeTest2", "decimal(5,2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - BigDecimal.valueOf(500.123).toString()); - pigValueRangeTestOverflow("junitTypeTest3", "decimal(5,2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - BigDecimal.valueOf(500.123).toString()); + pigValueRangeTest("junitTypeTest1", "decimal(5,2)", "bigdecimal", null, BigDecimal.valueOf(1.2) + .toString(), BigDecimal.valueOf(1.2).toString()); + pigValueRangeTestOverflow("junitTypeTest1", "decimal(5,2)", "bigdecimal", null, BigDecimal + .valueOf(12345.12).toString()); + pigValueRangeTestOverflow("junitTypeTest2", "decimal(5,2)", "bigdecimal", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, BigDecimal.valueOf(500.123).toString()); + pigValueRangeTestOverflow("junitTypeTest3", "decimal(5,2)", "bigdecimal", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, BigDecimal.valueOf(500.123).toString()); } + @Test public void testWriteDecimalX() throws Exception { - //interestingly decimal(2) means decimal(2,0) - pigValueRangeTest("junitTypeTest1", "decimal(2)", "bigdecimal", null, BigDecimal.valueOf(12).toString(), - BigDecimal.valueOf(12).toString()); - pigValueRangeTestOverflow("junitTypeTest2", "decimal(2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - BigDecimal.valueOf(50.123).toString()); - pigValueRangeTestOverflow("junitTypeTest3", "decimal(2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - BigDecimal.valueOf(50.123).toString()); + // interestingly decimal(2) means decimal(2,0) + pigValueRangeTest("junitTypeTest1", "decimal(2)", "bigdecimal", null, BigDecimal.valueOf(12) + .toString(), BigDecimal.valueOf(12).toString()); + pigValueRangeTestOverflow("junitTypeTest2", "decimal(2)", "bigdecimal", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, BigDecimal.valueOf(50.123).toString()); + pigValueRangeTestOverflow("junitTypeTest3", "decimal(2)", "bigdecimal", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, BigDecimal.valueOf(50.123).toString()); } + @Test public void testWriteDecimal() throws Exception { - //decimal means decimal(10,0) - pigValueRangeTest("junitTypeTest1", "decimal", "bigdecimal", null, BigDecimal.valueOf(1234567890).toString(), - BigDecimal.valueOf(1234567890).toString()); - pigValueRangeTestOverflow("junitTypeTest2", "decimal", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - BigDecimal.valueOf(12345678900L).toString()); - pigValueRangeTestOverflow("junitTypeTest3", "decimal", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - BigDecimal.valueOf(12345678900L).toString()); + // decimal means decimal(10,0) + pigValueRangeTest("junitTypeTest1", "decimal", "bigdecimal", null, + BigDecimal.valueOf(1234567890).toString(), BigDecimal.valueOf(1234567890).toString()); + pigValueRangeTestOverflow("junitTypeTest2", "decimal", "bigdecimal", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, BigDecimal.valueOf(12345678900L).toString()); + pigValueRangeTestOverflow("junitTypeTest3", "decimal", "bigdecimal", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, BigDecimal.valueOf(12345678900L).toString()); } + /** - * because we want to ignore TZ which is included in toString() - * include time to make sure it's 0 + * because we want to ignore TZ which is included in toString() include time to make sure it's 0 */ private static final String FORMAT_4_DATE = "yyyy-MM-dd HH:mm:ss"; + @Test public void testWriteDate() throws Exception { - DateTime d = new DateTime(1991,10,11,0,0); + DateTime d = new DateTime(1991, 10, 11, 0, 0); pigValueRangeTest("junitTypeTest1", "date", "datetime", null, d.toString(), - d.toString(FORMAT_4_DATE), FORMAT_4_DATE); - pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - d.plusHours(2).toString(), FORMAT_4_DATE);//time != 0 - pigValueRangeTestOverflow("junitTypeTest3", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - d.plusMinutes(1).toString(), FORMAT_4_DATE);//time != 0 - d = new DateTime(1991,10,11,0,0,DateTimeZone.forOffsetHours(-11)); + d.toString(FORMAT_4_DATE), FORMAT_4_DATE); + + // time != 0 + pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusHours(2).toString(), FORMAT_4_DATE); + + // time != 0 + pigValueRangeTestOverflow("junitTypeTest3", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE); + + d = new DateTime(1991, 10, 11, 0, 0, DateTimeZone.forOffsetHours(-11)); pigValueRangeTest("junitTypeTest4", "date", "datetime", null, d.toString(), - d.toString(FORMAT_4_DATE), FORMAT_4_DATE); - pigValueRangeTestOverflow("junitTypeTest5", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - d.plusHours(2).toString(), FORMAT_4_DATE);//date out of range due to time != 0 - pigValueRangeTestOverflow("junitTypeTest6", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - d.plusMinutes(1).toString(), FORMAT_4_DATE);//date out of range due to time!=0 + d.toString(FORMAT_4_DATE), FORMAT_4_DATE); + + // date out of range due to time != 0 + pigValueRangeTestOverflow("junitTypeTest5", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusHours(2).toString(), FORMAT_4_DATE); + + // date out of range due to time !=0 + pigValueRangeTestOverflow("junitTypeTest6", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE); } + @Test public void testWriteDate3() throws Exception { - DateTime d = new DateTime(1991,10,11,23,10,DateTimeZone.forOffsetHours(-11)); + DateTime d = new DateTime(1991, 10, 11, 23, 10, DateTimeZone.forOffsetHours(-11)); FrontendException fe = null; - //expect to fail since the time component is not 0 - pigValueRangeTestOverflow("junitTypeTest4", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - d.toString(), FORMAT_4_DATE); - pigValueRangeTestOverflow("junitTypeTest5", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - d.plusHours(2).toString(), FORMAT_4_DATE); - pigValueRangeTestOverflow("junitTypeTest6", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - d.plusMinutes(1).toString(), FORMAT_4_DATE); + // expect to fail since the time component is not 0 + pigValueRangeTestOverflow("junitTypeTest4", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.toString(), FORMAT_4_DATE); + pigValueRangeTestOverflow("junitTypeTest5", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusHours(2).toString(), FORMAT_4_DATE); + pigValueRangeTestOverflow("junitTypeTest6", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE); } + @Test public void testWriteDate2() throws Exception { - DateTime d = new DateTime(1991,11,12,0,0, DateTimeZone.forID("US/Eastern")); + DateTime d = new DateTime(1991, 11, 12, 0, 0, DateTimeZone.forID("US/Eastern")); pigValueRangeTest("junitTypeTest1", "date", "datetime", null, d.toString(), - d.toString(FORMAT_4_DATE), FORMAT_4_DATE); - pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - d.plusHours(2).toString(), FORMAT_4_DATE); - pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - d.plusMillis(20).toString(), FORMAT_4_DATE); - pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - d.plusMillis(12).toString(), FORMAT_4_DATE); - pigValueRangeTestOverflow("junitTypeTest3", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, - d.plusMinutes(1).toString(), FORMAT_4_DATE); + d.toString(FORMAT_4_DATE), FORMAT_4_DATE); + pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusHours(2).toString(), FORMAT_4_DATE); + pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusMillis(20).toString(), FORMAT_4_DATE); + pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMillis(12).toString(), FORMAT_4_DATE); + pigValueRangeTestOverflow("junitTypeTest3", "date", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE); } + /** - * Note that the value that comes back from Hive will have local TZ on it. Using local is - * arbitrary but DateTime needs TZ (or will assume default) and Hive does not have TZ. - * So if you start with Pig value in TZ=x and write to Hive, when you read it back the TZ may - * be different. The millis value should match, of course. - * + * Note that the value that comes back from Hive will have local TZ on it. Using local is + * arbitrary but DateTime needs TZ (or will assume default) and Hive does not have TZ. So if you + * start with Pig value in TZ=x and write to Hive, when you read it back the TZ may be different. + * The millis value should match, of course. + * * @throws Exception */ @Test public void testWriteTimestamp() throws Exception { - DateTime d = new DateTime(1991,10,11,14,23,30, 10);//uses default TZ - pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), - d.toDateTime(DateTimeZone.getDefault()).toString()); + DateTime d = new DateTime(1991, 10, 11, 14, 23, 30, 10);// uses default TZ + pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), + d.toDateTime(DateTimeZone.getDefault()).toString()); d = d.plusHours(2); - pigValueRangeTest("junitTypeTest2", "timestamp", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, - d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); + pigValueRangeTest("junitTypeTest2", "timestamp", "datetime", + HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.toString(), + d.toDateTime(DateTimeZone.getDefault()).toString()); d = d.toDateTime(DateTimeZone.UTC); - pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), - d.toDateTime(DateTimeZone.getDefault()).toString()); + pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), + d.toDateTime(DateTimeZone.getDefault()).toString()); - d = new DateTime(1991,10,11,23,24,25, 26); - pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), - d.toDateTime(DateTimeZone.getDefault()).toString()); + d = new DateTime(1991, 10, 11, 23, 24, 25, 26); + pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), + d.toDateTime(DateTimeZone.getDefault()).toString()); d = d.toDateTime(DateTimeZone.UTC); - pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), - d.toDateTime(DateTimeZone.getDefault()).toString()); + pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), + d.toDateTime(DateTimeZone.getDefault()).toString()); } - //End: tests that check values from Pig that are out of range for target column + // End: tests that check values from Pig that are out of range for target column private void pigValueRangeTestOverflow(String tblName, String hiveType, String pigType, - HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String format) throws Exception { + HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String format) throws Exception { pigValueRangeTest(tblName, hiveType, pigType, goal, inputValue, null, format); } + private void pigValueRangeTestOverflow(String tblName, String hiveType, String pigType, - HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue) throws Exception { + HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue) throws Exception { pigValueRangeTest(tblName, hiveType, pigType, goal, inputValue, null, null); } + private void pigValueRangeTest(String tblName, String hiveType, String pigType, - HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, - String expectedValue) throws Exception { + HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue) + throws Exception { pigValueRangeTest(tblName, hiveType, pigType, goal, inputValue, expectedValue, null); } @@ -218,135 +245,137 @@ private void pigValueRangeTest(String tblName, String hiveType, String pigType, String getStorageFormat() { return "RCFILE"; } + /** - * This is used to test how Pig values of various data types which are out of range for Hive target - * column are handled. Currently the options are to raise an error or write NULL. - * 1. create a data file with 1 column, 1 row - * 2. load into pig - * 3. use pig to store into Hive table - * 4. read from Hive table using Pig - * 5. check that read value is what is expected + * This is used to test how Pig values of various data types which are out of range for Hive + * target column are handled. Currently the options are to raise an error or write NULL. 1. create + * a data file with 1 column, 1 row 2. load into pig 3. use pig to store into Hive table 4. read + * from Hive table using Pig 5. check that read value is what is expected + * * @param tblName Hive table name to create * @param hiveType datatype to use for the single column in table * @param pigType corresponding Pig type when loading file into Pig * @param goal how out-of-range values from Pig are handled by HCat, may be {@code null} * @param inputValue written to file which is read by Pig, thus must be something Pig can read - * (e.g. DateTime.toString(), rather than java.sql.Date) + * (e.g. DateTime.toString(), rather than java.sql.Date) * @param expectedValue what Pig should see when reading Hive table * @param format date format to use for comparison of values since default DateTime.toString() - * includes TZ which is meaningless for Hive DATE type + * includes TZ which is meaningless for Hive DATE type */ - private void pigValueRangeTest(String tblName, String hiveType, String pigType, - HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue, String format) - throws Exception { + private void pigValueRangeTest(String tblName, String hiveType, String pigType, + HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue, + String format) throws Exception { TestHCatLoader.dropTable(tblName, driver); final String field = "f1"; TestHCatLoader.createTable(tblName, field + " " + hiveType, null, driver, getStorageFormat()); - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] {inputValue}); + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] { inputValue }); LOG.debug("File=" + INPUT_FILE_NAME); dumpFile(INPUT_FILE_NAME); PigServer server = createPigServer(true); int queryNumber = 1; logAndRegister(server, - "A = load '" + INPUT_FILE_NAME + "' as (" + field + ":" + pigType + ");", queryNumber++); + "A = load '" + INPUT_FILE_NAME + "' as (" + field + ":" + pigType + ");", queryNumber++); Iterator firstLoad = server.openIterator("A"); - if(goal == null) { - logAndRegister(server, - "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++); - } - else { + if (goal == null) { + logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + + "();", queryNumber++); + } else { FrontendException fe = null; try { - logAndRegister(server, - "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "('','','-" + - HCatStorer.ON_OOR_VALUE_OPT + " " + goal + "');", - queryNumber++); - } - catch(FrontendException e) { + logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + + "('','','-" + HCatStorer.ON_OOR_VALUE_OPT + " " + goal + "');", queryNumber++); + } catch (FrontendException e) { fe = e; } switch (goal) { - case Null: - //do nothing, fall through and verify the data - break; - case Throw: - Assert.assertTrue("Expected a FrontendException", fe != null); - Assert.assertEquals("Expected a different FrontendException.", fe.getMessage(), "Unable to store alias A"); - return;//this test is done - default: - Assert.assertFalse("Unexpected goal: " + goal, 1 == 1); + case Null: + // do nothing, fall through and verify the data + break; + case Throw: + Assert.assertTrue("Expected a FrontendException", fe != null); + Assert.assertEquals("Expected a different FrontendException.", fe.getMessage(), + "Unable to store alias A"); + return;// this test is done + default: + Assert.assertFalse("Unexpected goal: " + goal, 1 == 1); } } - logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber); + logAndRegister(server, + "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber); CommandProcessorResponse cpr = driver.run("select * from " + tblName); - LOG.debug("cpr.respCode=" + cpr.getResponseCode() + " cpr.errMsg=" + cpr.getErrorMessage() + - " for table " + tblName); + LOG.debug("cpr.respCode=" + cpr.getResponseCode() + " cpr.errMsg=" + cpr.getErrorMessage() + + " for table " + tblName); List l = new ArrayList(); driver.getResults(l); LOG.debug("Dumping rows via SQL from " + tblName); - for(Object t : l) { + for (Object t : l) { LOG.debug(t == null ? null : t.toString() + " t.class=" + t.getClass()); } Iterator itr = server.openIterator("B"); int numRowsRead = 0; - while(itr.hasNext()) { + while (itr.hasNext()) { Tuple t = itr.next(); - if("date".equals(hiveType)) { - DateTime dateTime = (DateTime)t.get(0); + if ("date".equals(hiveType)) { + DateTime dateTime = (DateTime) t.get(0); Assert.assertTrue(format != null); - Assert.assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, dateTime== null ? null : dateTime.toString(format)); - } - else { - Assert.assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, t.isNull(0) ? null : t.get(0).toString()); + Assert.assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, + dateTime == null ? null : dateTime.toString(format)); + } else { + Assert.assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, + t.isNull(0) ? null : t.get(0).toString()); } - //see comment at "Dumping rows via SQL..." for why this doesn't work - //Assert.assertEquals("Comparing Pig to Hive", t.get(0), l.get(0)); + // see comment at "Dumping rows via SQL..." for why this doesn't work + // Assert.assertEquals("Comparing Pig to Hive", t.get(0), l.get(0)); numRowsRead++; } - Assert.assertEquals("Expected " + 1 + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME + "; table " + - tblName, 1, numRowsRead); - /* Misc notes: - Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a String - thus the timestamp in 't' doesn't match rawData*/ + Assert.assertEquals("Expected " + 1 + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME + + "; table " + tblName, 1, numRowsRead); + /* + * Misc notes: Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a + * String thus the timestamp in 't' doesn't match rawData + */ } + /** - * Create a data file with datatypes added in 0.13. Read it with Pig and use - * Pig + HCatStorer to write to a Hive table. Then read it using Pig and Hive - * and make sure results match. + * Create a data file with datatypes added in 0.13. Read it with Pig and use Pig + HCatStorer to + * write to a Hive table. Then read it using Pig and Hive and make sure results match. */ @Test public void testDateCharTypes() throws Exception { final String tblName = "junit_date_char"; TestHCatLoader.dropTable(tblName, driver); TestHCatLoader.createTable(tblName, - "id int, char5 char(5), varchar10 varchar(10), dec52 decimal(5,2)", null, driver, getStorageFormat()); + "id int, char5 char(5), varchar10 varchar(10), dec52 decimal(5,2)", null, driver, + getStorageFormat()); int NUM_ROWS = 5; String[] rows = new String[NUM_ROWS]; - for(int i = 0; i < NUM_ROWS; i++) { - //since the file is read by Pig, we need to make sure the values are in format that Pig understands - //otherwise it will turn the value to NULL on read + for (int i = 0; i < NUM_ROWS; i++) { + // since the file is read by Pig, we need to make sure the values are in format that Pig + // understands + // otherwise it will turn the value to NULL on read rows[i] = i + "\txxxxx\tyyy\t" + 5.2; } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, rows); LOG.debug("File=" + INPUT_FILE_NAME); -// dumpFile(INPUT_FILE_NAME); + // dumpFile(INPUT_FILE_NAME); PigServer server = createPigServer(true); int queryNumber = 1; + logAndRegister(server, "A = load '" + INPUT_FILE_NAME + + "' as (id:int, char5:chararray, varchar10:chararray, dec52:bigdecimal);", queryNumber++); + logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + + "();", queryNumber++); logAndRegister(server, - "A = load '" + INPUT_FILE_NAME + "' as (id:int, char5:chararray, varchar10:chararray, dec52:bigdecimal);", - queryNumber++); - logAndRegister(server, - "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++); - logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", - queryNumber); + "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber); CommandProcessorResponse cpr = driver.run("select * from " + tblName); LOG.debug("cpr.respCode=" + cpr.getResponseCode() + " cpr.errMsg=" + cpr.getErrorMessage()); List l = new ArrayList(); driver.getResults(l); LOG.debug("Dumping rows via SQL from " + tblName); - /*Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a String - * thus the timestamp in 't' doesn't match rawData*/ - for(Object t : l) { + /* + * Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a String thus + * the timestamp in 't' doesn't match rawData + */ + for (Object t : l) { LOG.debug(t == null ? null : t.toString()); } Iterator itr = server.openIterator("B"); @@ -354,32 +383,37 @@ public void testDateCharTypes() throws Exception { while (itr.hasNext()) { Tuple t = itr.next(); StringBuilder rowFromPig = new StringBuilder(); - for(int i = 0; i < t.size(); i++) { + for (int i = 0; i < t.size(); i++) { rowFromPig.append(t.get(i)).append("\t"); } rowFromPig.setLength(rowFromPig.length() - 1); Assert.assertEquals("Comparing Pig to Raw data", rows[numRowsRead], rowFromPig.toString()); - //see comment at "Dumping rows via SQL..." for why this doesn't work (for all types) - //Assert.assertEquals("Comparing Pig to Hive", rowFromPig.toString(), l.get(numRowsRead)); + // see comment at "Dumping rows via SQL..." for why this doesn't work (for all types) + // Assert.assertEquals("Comparing Pig to Hive", rowFromPig.toString(), l.get(numRowsRead)); numRowsRead++; } - Assert.assertEquals("Expected " + NUM_ROWS + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME, NUM_ROWS, numRowsRead); + Assert.assertEquals("Expected " + NUM_ROWS + " rows; got " + numRowsRead + " file=" + + INPUT_FILE_NAME, NUM_ROWS, numRowsRead); } + static void dumpFile(String fileName) throws Exception { File file = new File(fileName); BufferedReader reader = new BufferedReader(new FileReader(file)); String line = null; LOG.debug("Dumping raw file: " + fileName); - while((line = reader.readLine()) != null) { + while ((line = reader.readLine()) != null) { LOG.debug(line); } reader.close(); } + @Test public void testPartColsInData() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as " + getStorageFormat(); + String createTable = + "create table junit_unparted(a int) partitioned by (b string) stored as " + + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -392,8 +426,10 @@ public void testPartColsInData() throws IOException, CommandNeedRetryException { HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); - server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); + server.registerQuery("store A into 'default.junit_unparted' using " + + HCatStorer.class.getName() + "('b=1');"); + server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + + "();"); Iterator itr = server.openIterator("B"); int i = 0; @@ -414,32 +450,41 @@ public void testPartColsInData() throws IOException, CommandNeedRetryException { public void testMultiPartColsInData() throws IOException, CommandNeedRetryException { driver.run("drop table employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + getStorageFormat(); + String createTable = + "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + String[] inputData = { + "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + "111240\tKavya\t01/01/2002\tF\tIN\tAP" + }; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer pig = new PigServer(ExecType.LOCAL); pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); pig.registerQuery("TN = FILTER A BY emp_state == 'TN';"); pig.registerQuery("KA = FILTER A BY emp_state == 'KA';"); pig.registerQuery("KL = FILTER A BY emp_state == 'KL';"); pig.registerQuery("AP = FILTER A BY emp_state == 'AP';"); - pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');"); - pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');"); - pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');"); - pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');"); + pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + + "('emp_country=IN,emp_state=TN');"); + pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + + "('emp_country=IN,emp_state=KA');"); + pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + + "('emp_country=IN,emp_state=KL');"); + pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + + "('emp_country=IN,emp_state=AP');"); pig.executeBatch(); driver.run("select * from employee"); ArrayList results = new ArrayList(); @@ -455,9 +500,10 @@ public void testMultiPartColsInData() throws IOException, CommandNeedRetryExcept @Test public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as " + getStorageFormat(); + String createTable = + "create table junit_unparted(a int) partitioned by (b string) stored as " + + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -470,8 +516,10 @@ public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryExce HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); - server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); + server.registerQuery("store A into 'default.junit_unparted' using " + + HCatStorer.class.getName() + "('b=1');"); + server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + + "();"); Iterator itr = server.openIterator("B"); int i = 0; @@ -491,7 +539,9 @@ public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryExce @Test public void testNoAlias() throws IOException, CommandNeedRetryException { driver.run("drop table junit_parted"); - String createTable = "create table junit_parted(a int, b string) partitioned by (ds string) stored as " + getStorageFormat(); + String createTable = + "create table junit_parted(a int, b string) partitioned by (ds string) stored as " + + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -502,13 +552,16 @@ public void testNoAlias() throws IOException, CommandNeedRetryException { server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("B = foreach A generate a+10, b;"); - server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); + server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + + "('ds=20100101');"); server.executeBatch(); } catch (PigException fe) { PigException pe = LogUtils.getPigException(fe); Assert.assertTrue(pe instanceof FrontendException); Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); - Assert.assertTrue(pe.getMessage().contains("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.")); + Assert.assertTrue(pe.getMessage().contains( + "Column name for a field is not specified. Please provide the full schema as an " + + "argument to HCatStorer.")); errCaught = true; } Assert.assertTrue(errCaught); @@ -517,13 +570,15 @@ public void testNoAlias() throws IOException, CommandNeedRetryException { server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);"); server.registerQuery("B = foreach A generate a, B;"); - server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); + server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + + "('ds=20100101');"); server.executeBatch(); } catch (PigException fe) { PigException pe = LogUtils.getPigException(fe); Assert.assertTrue(pe instanceof FrontendException); Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); - Assert.assertTrue(pe.getMessage().contains("Column names should all be in lowercase. Invalid name found: B")); + Assert.assertTrue(pe.getMessage().contains( + "Column names should all be in lowercase. Invalid name found: B")); errCaught = true; } driver.run("drop table junit_parted"); @@ -534,7 +589,8 @@ public void testNoAlias() throws IOException, CommandNeedRetryException { public void testStoreMultiTables() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + getStorageFormat(); + String createTable = + "create table junit_unparted(a int, b string) stored as " + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -560,9 +616,11 @@ public void testStoreMultiTables() throws IOException, CommandNeedRetryException server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("B = filter A by a < 2;"); - server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); + server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + + "();"); server.registerQuery("C = filter A by a >= 2;"); - server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + "();"); + server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + + "();"); server.executeBatch(); driver.run("select * from junit_unparted"); @@ -582,14 +640,13 @@ public void testStoreMultiTables() throws IOException, CommandNeedRetryException } Assert.assertFalse(itr.hasNext()); - } @Test public void testStoreWithNoSchema() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + getStorageFormat(); + String createTable = "create table junit_unparted(a int, b string) stored as " + + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -608,7 +665,8 @@ public void testStoreWithNoSchema() throws IOException, CommandNeedRetryExceptio PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('');"); + server.registerQuery("store A into 'default.junit_unparted' using " + + HCatStorer.class.getName() + "('');"); server.executeBatch(); driver.run("select * from junit_unparted"); @@ -621,14 +679,13 @@ public void testStoreWithNoSchema() throws IOException, CommandNeedRetryExceptio } Assert.assertFalse(itr.hasNext()); - } @Test public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + getStorageFormat(); + String createTable = "create table junit_unparted(a int, b string) stored as " + + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -647,7 +704,8 @@ public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryExcept PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); + server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + + "();"); server.executeBatch(); driver.run("select * from junit_unparted"); @@ -660,14 +718,13 @@ public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryExcept } Assert.assertFalse(itr.hasNext()); - } @Test public void testEmptyStore() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + getStorageFormat(); + String createTable = + "create table junit_unparted(a int, b string) stored as " + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -687,7 +744,8 @@ public void testEmptyStore() throws IOException, CommandNeedRetryException { server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("B = filter A by a > 100;"); - server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); + server.registerQuery("store B into 'default.junit_unparted' using " + + HCatStorer.class.getName() + "('','a:int,b:chararray');"); server.executeBatch(); driver.run("select * from junit_unparted"); @@ -696,29 +754,44 @@ public void testEmptyStore() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); Iterator itr = res.iterator(); Assert.assertFalse(itr.hasNext()); - } @Test public void testBagNStruct() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(b string,a struct, arr_of_struct array, " + - "arr_of_struct2 array>, arr_of_struct3 array>) stored as " + getStorageFormat(); + String createTable = "create table junit_unparted(" + + "b string," + + "a struct, " + + "arr_of_struct array, " + + "arr_of_struct2 array>, " + + "arr_of_struct3 array>) " + + "stored as " + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } - String[] inputData = new String[]{"zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", - "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}"}; + String[] inputData = new String[] { + "zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", + "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}" + }; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (b:chararray, a:tuple(a1:int), arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)});"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','b:chararray, a:tuple(a1:int)," + - " arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)}');"); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (" + + "b:chararray, " + + "a:tuple(a1:int), " + + "arr_of_struct:bag{mytup:tuple(s1:chararray)}, " + + "arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, " + + "arr_of_struct3:bag{t3:tuple(s3:chararray)});"); + server.registerQuery("store A into 'default.junit_unparted' using " + + HCatStorer.class.getName() + + "('','b:chararray, a:tuple(a1:int), " + + "arr_of_struct:bag{mytup:tuple(s1:chararray)}, " + + "arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, " + + "arr_of_struct3:bag{t3:tuple(s3:chararray)}');"); server.executeBatch(); driver.run("select * from junit_unparted"); @@ -726,17 +799,25 @@ public void testBagNStruct() throws IOException, CommandNeedRetryException { driver.getResults(res); driver.run("drop table junit_unparted"); Iterator itr = res.iterator(); - Assert.assertEquals("zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", itr.next()); - Assert.assertEquals("chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", itr.next()); + Assert.assertEquals( + "zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t" + + "[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", + itr.next()); + Assert.assertEquals( + "chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t" + + "[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", + itr.next()); Assert.assertFalse(itr.hasNext()); } @Test public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b float, c double, d bigint, e string, h boolean, f binary, g binary) stored as " + getStorageFormat(); + String createTable = + "create table junit_unparted(" + + "a int, b float, c double, d bigint, e string, h boolean, f binary, g binary) " + + "stored as " + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -744,18 +825,25 @@ public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryEx int i = 0; String[] input = new String[3]; - input[i++] = "0\t\t\t\t\t\t\t"; //Empty values except first column - input[i++] = "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "true" + "\tbinary-data"; //First column empty - input[i++] = i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "false" + "\tbinary-data"; + input[i++] = "0\t\t\t\t\t\t\t"; // Empty values except first column + input[i++] = + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "true" + + "\tbinary-data"; // First column empty + input[i++] = + i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "false" + + "\tbinary-data"; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray);"); - //null gets stored into column g which is a binary field. - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray');"); - server.executeBatch(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + + "' as (a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray);"); + // null gets stored into column g which is a binary field. + server.registerQuery("store A into 'default.junit_unparted' using " + + HCatStorer.class.getName() + + "('','a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray');"); + server.executeBatch(); driver.run("select * from junit_unparted"); ArrayList res = new ArrayList(); @@ -763,9 +851,10 @@ public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryEx Iterator itr = res.iterator(); String next = itr.next(); - Assert.assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", next ); + Assert.assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", next); Assert.assertEquals("NULL\t4.2\t2.2\t4\tlets hcat\ttrue\tbinary-data\tNULL", itr.next()); - Assert.assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tfalse\tbinary-data\tNULL", itr.next()); + Assert.assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tfalse\tbinary-data\tNULL", + itr.next()); Assert.assertFalse(itr.hasNext()); server.registerQuery("B = load 'junit_unparted' using " + HCatLoader.class.getName() + ";"); @@ -789,9 +878,9 @@ public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryEx @Test public void testStoreFuncSimple() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + getStorageFormat(); + String createTable = + "create table junit_unparted(a int, b string) stored as " + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -810,7 +899,8 @@ public void testStoreFuncSimple() throws IOException, CommandNeedRetryException PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); + server.registerQuery("store A into 'default.junit_unparted' using " + + HCatStorer.class.getName() + "('','a:int,b:chararray');"); server.executeBatch(); driver.run("select * from junit_unparted"); @@ -825,33 +915,38 @@ public void testStoreFuncSimple() throws IOException, CommandNeedRetryException } } Assert.assertFalse(itr.hasNext()); - } @Test - public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, CommandNeedRetryException { - + public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, + CommandNeedRetryException { driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + getStorageFormat(); + String createTable = + "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + String[] inputData = { + "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + "111240\tKavya\t01/01/2002\tF\tIN\tAP" + }; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer pig = new PigServer(ExecType.LOCAL); pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN');"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + + "('emp_country=IN');"); pig.executeBatch(); driver.run("select * from employee"); ArrayList results = new ArrayList(); @@ -866,27 +961,33 @@ public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOExc } @Test - public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, CommandNeedRetryException { + public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, + CommandNeedRetryException { driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + getStorageFormat(); + String createTable = "CREATE TABLE employee (" + + "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + String[] inputData = { + "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + "111240\tKavya\t01/01/2002\tF\tIN\tAP" + }; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer pig = new PigServer(ExecType.LOCAL); pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (" + + "emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); pig.executeBatch(); @@ -903,11 +1004,13 @@ public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOExceptio } @Test - public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException { - + public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, + CommandNeedRetryException { driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + getStorageFormat(); + String createTable = "CREATE TABLE employee (" + + "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + "PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { @@ -919,8 +1022,9 @@ public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOEx PigServer pig = new PigServer(ExecType.LOCAL); pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (" + + "emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); pig.executeBatch(); @@ -930,12 +1034,13 @@ public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOEx Assert.assertEquals(0, results.size()); driver.run("drop table employee"); } + @Test - public void testPartitionPublish() - throws IOException, CommandNeedRetryException { + public void testPartitionPublish() throws IOException, CommandNeedRetryException { driver.run("drop table ptn_fail"); - String createTable = "create table ptn_fail(a int, c string) partitioned by (b string) stored as " + getStorageFormat(); + String createTable = "create table ptn_fail(a int, c string) partitioned by (b string) " + + "stored as " + getStorageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); @@ -949,20 +1054,17 @@ public void testPartitionPublish() HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME - + "' as (a:int, c:chararray);"); - server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() - + "($0);"); - server.registerQuery("store B into 'ptn_fail' using " - + HCatStorer.class.getName() + "('b=math');"); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, c:chararray);"); + server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() + "($0);"); + server.registerQuery("store B into 'ptn_fail' using " + HCatStorer.class.getName() + + "('b=math');"); server.executeBatch(); String query = "show partitions ptn_fail"; retCode = driver.run(query).getResponseCode(); if (retCode != 0) { - throw new IOException("Error " + retCode + " running query " - + query); + throw new IOException("Error " + retCode + " running query " + query); } ArrayList res = new ArrayList(); @@ -971,21 +1073,20 @@ public void testPartitionPublish() // Make sure the partitions directory is not in hdfs. Assert.assertTrue((new File(TEST_WAREHOUSE_DIR + "/ptn_fail")).exists()); - Assert.assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")) - .exists()); + Assert.assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")).exists()); } static public class FailEvalFunc extends EvalFunc { - - /* - * @param Tuple /* @return null /* @throws IOException - * + /** * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple) + * + * @param Tuple + * @return null + * @throws IOException */ @Override public Boolean exec(Tuple tuple) throws IOException { throw new IOException("Eval Func to mimic Failure."); } - } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java index 76080f7..c362237 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java @@ -25,23 +25,31 @@ import java.util.HashMap; import java.util.Map; -import junit.framework.TestCase; - import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.session.SessionState; + import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.hive.hcatalog.data.Pair; + import org.apache.pig.ExecType; import org.apache.pig.PigServer; -public class TestHCatStorerMulti extends TestCase { +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestHCatStorerMulti { public static final String TEST_DATA_DIR = HCatUtil.makePathASafeFileName( - System.getProperty("user.dir") + "/build/test/data/" + - TestHCatStorerMulti.class.getCanonicalName() + "-" + System.currentTimeMillis()); + System.getProperty("user.dir") + + "/build/test/data/" + + TestHCatStorerMulti.class.getCanonicalName() + + "-" + System.currentTimeMillis()); private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; @@ -52,15 +60,16 @@ private static Map> basicInputData; protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; } private void dropTable(String tablename) throws IOException, CommandNeedRetryException { driver.run("drop table " + tablename); } - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + private void createTable(String tablename, String schema, String partitionedBy) + throws IOException, CommandNeedRetryException { String createTable; createTable = "create table " + tablename + "(" + schema + ") "; if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { @@ -69,16 +78,18 @@ private void createTable(String tablename, String schema, String partitionedBy) createTable = createTable + "stored as " + storageFormat(); int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); + throw new IOException("Failed to create table. [" + createTable + + "], return code from hive driver : [" + retCode + "]"); } } - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + private void createTable(String tablename, String schema) throws IOException, + CommandNeedRetryException { createTable(tablename, schema, null); } - @Override - protected void setUp() throws Exception { + @Before + public void setUp() throws Exception { if (driver == null) { HiveConf hiveConf = new HiveConf(this.getClass()); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); @@ -92,14 +103,13 @@ protected void setUp() throws Exception { cleanup(); } - @Override - protected void tearDown() throws Exception { + @After + public void tearDown() throws Exception { cleanup(); } + @Test public void testStoreBasicTable() throws Exception { - - createTable(BASIC_TABLE, "a int, b string"); populateBasicFile(); @@ -107,7 +117,8 @@ public void testStoreBasicTable() throws Exception { PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.registerQuery("store A into '" + BASIC_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); server.executeBatch(); @@ -117,6 +128,7 @@ public void testStoreBasicTable() throws Exception { assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); } + @Test public void testStorePartitionedTable() throws Exception { createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); @@ -127,9 +139,11 @@ public void testStorePartitionedTable() throws Exception { server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("B2 = filter A by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); server.registerQuery("C2 = filter A by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); server.executeBatch(); @@ -139,9 +153,8 @@ public void testStorePartitionedTable() throws Exception { assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); } + @Test public void testStoreTableMulti() throws Exception { - - createTable(BASIC_TABLE, "a int, b string"); createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); @@ -150,12 +163,15 @@ public void testStoreTableMulti() throws Exception { PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.registerQuery("store A into '" + BASIC_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); server.registerQuery("B2 = filter A by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); server.registerQuery("C2 = filter A by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); server.executeBatch(); diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java index 7f0bca7..08efb6b 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java @@ -33,42 +33,44 @@ import org.junit.Test; /** - * This test checks the {@link HCatConstants#HCAT_PIG_STORER_EXTERNAL_LOCATION} that we can set in the - * UDFContext of {@link HCatStorer} so that it writes to the specified external location. + * This test checks the {@link HCatConstants#HCAT_PIG_STORER_EXTERNAL_LOCATION} that we can set in + * the UDFContext of {@link HCatStorer} so that it writes to the specified external location. * - * Since {@link HCatStorer} does not allow extra parameters in the constructor, we use {@link HCatStorerWrapper} - * that always treats the last parameter as the external path. + * Since {@link HCatStorer} does not allow extra parameters in the constructor, we use + * {@link HCatStorerWrapper} that always treats the last parameter as the external path. */ public class TestHCatStorerWrapper extends HCatBaseTest { private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; @Test - public void testStoreExternalTableWithExternalDir() throws IOException, CommandNeedRetryException{ - + public void testStoreExternalTableWithExternalDir() throws IOException, CommandNeedRetryException { File tmpExternalDir = new File(TEST_DATA_DIR, UUID.randomUUID().toString()); tmpExternalDir.deleteOnExit(); String part_val = "100"; driver.run("drop table junit_external"); - String createTable = "create external table junit_external(a int, b string) partitioned by (c string) stored as RCFILE"; + String createTable = + "create external table junit_external(a int, b string) " + + "partitioned by (c string) stored as RCFILE"; Assert.assertEquals(0, driver.run(createTable).getResponseCode()); int LOOP_SIZE = 3; - String[] inputData = new String[LOOP_SIZE*LOOP_SIZE]; + String[] inputData = new String[LOOP_SIZE * LOOP_SIZE]; int k = 0; - for(int i = 1; i <= LOOP_SIZE; i++) { + for (int i = 1; i <= LOOP_SIZE; i++) { String si = i + ""; - for(int j=1;j<=LOOP_SIZE;j++) { - inputData[k++] = si + "\t"+j; + for (int j = 1; j <= LOOP_SIZE; j++) { + inputData[k++] = si + "\t" + j; } } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); - logAndRegister(server, "A = load '"+INPUT_FILE_NAME+"' as (a:int, b:chararray);"); - logAndRegister(server, "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() + logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + logAndRegister(server, + "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() + "('c=" + part_val + "','" + tmpExternalDir.getPath().replaceAll("\\\\", "/") + "');"); server.executeBatch(); @@ -77,8 +79,8 @@ public void testStoreExternalTableWithExternalDir() throws IOException, CommandN boolean found = false; File[] f = tmpExternalDir.listFiles(); if (f != null) { - for (File fin : f){ - if (fin.getPath().contains("part-m-00000")){ + for (File fin : f) { + if (fin.getPath().contains("part-m-00000")) { found = true; } } @@ -91,10 +93,10 @@ public void testStoreExternalTableWithExternalDir() throws IOException, CommandN driver.getResults(res); driver.run("drop table junit_external"); Iterator itr = res.iterator(); - for(int i = 1; i <= LOOP_SIZE; i++) { + for (int i = 1; i <= LOOP_SIZE; i++) { String si = i + ""; - for(int j=1;j<=LOOP_SIZE;j++) { - Assert.assertEquals( si + "\t" + j + "\t" + part_val,itr.next()); + for (int j = 1; j <= LOOP_SIZE; j++) { + Assert.assertEquals(si + "\t" + j + "\t" + part_val, itr.next()); } } Assert.assertFalse(itr.hasNext()); diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java index 82eb0d7..0740272 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java @@ -26,4 +26,3 @@ protected String storageFormat() { } } - diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatPigStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatPigStorer.java deleted file mode 100644 index a9b4521..0000000 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatPigStorer.java +++ /dev/null @@ -1,31 +0,0 @@ -package org.apache.hive.hcatalog.pig; - -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.junit.Ignore; -import org.junit.Test; - -import java.io.IOException; - -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -public class TestOrcHCatPigStorer extends TestHCatStorer { - @Override String getStorageFormat() { - return "ORC"; - } -} diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java index 1084092..36a2906 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java @@ -1,28 +1,29 @@ /** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. */ + package org.apache.hive.hcatalog.pig; -public class TestOrcHCatStorer extends TestHCatStorerMulti { +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.junit.Ignore; +import org.junit.Test; + +import java.io.IOException; +public class TestOrcHCatStorer extends TestHCatStorer { @Override - protected String storageFormat() { - return "orc"; + String getStorageFormat() { + return "ORC"; } } - diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorerMulti.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorerMulti.java new file mode 100644 index 0000000..658cf02 --- /dev/null +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorerMulti.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +public class TestOrcHCatStorerMulti extends TestHCatStorerMulti { + + @Override + protected String storageFormat() { + return "orc"; + } +} diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java index a8ce61a..70367ab 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java @@ -37,21 +37,23 @@ public void testGetBagSubSchema() throws Exception { // Define the expected schema. ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple") - .setDescription("The tuple in the bag").setType(DataType.TUPLE); + bagSubFieldSchemas[0] = + new ResourceFieldSchema().setName("innertuple").setDescription("The tuple in the bag") + .setType(DataType.TUPLE); ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; innerTupleFieldSchemas[0] = - new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY); + new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY); bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); // Get the actual converted schema. - HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList( - new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); + HCatSchema hCatSchema = + new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", + HCatFieldSchema.Type.STRING, null))); HCatFieldSchema hCatFieldSchema = - new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null); + new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null); ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema); Assert.assertEquals(expected.toString(), actual.toString()); @@ -69,21 +71,23 @@ public void testGetBagSubSchemaConfigured() throws Exception { // Define the expected schema. ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t") - .setDescription("The tuple in the bag").setType(DataType.TUPLE); + bagSubFieldSchemas[0] = + new ResourceFieldSchema().setName("t").setDescription("The tuple in the bag") + .setType(DataType.TUPLE); ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; innerTupleFieldSchemas[0] = - new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY); + new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY); bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); // Get the actual converted schema. - HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList( - new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); + HCatSchema actualHCatSchema = + new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", + HCatFieldSchema.Type.STRING, null))); HCatFieldSchema actualHCatFieldSchema = - new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null); + new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null); ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema); Assert.assertEquals(expected.toString(), actual.toString()); -- 1.8.5.2 (Apple Git-48)