diff --git src/main/docbkx/developer.xml src/main/docbkx/developer.xml index 2628a7b..68d5cb0 100644 --- src/main/docbkx/developer.xml +++ src/main/docbkx/developer.xml @@ -1133,33 +1133,322 @@ pecularity that is probably fixable but we've not spent the time trying to figur
Developing
Codelines - Most development is done on the master branch (TRUNK). - However, there are branches for minor releases (e.g., 0.90.1, 0.90.2, and 0.90.3 are on the 0.90 branch). + Most development is done on the master branch, which is named + master in the Git repository. Previously, HBase used Subversion, in + which the master branch was called TRUNK. Branches exist for minor + releases, and important features and bug fixes are often back-ported.
-
- Unit Tests - In HBase we use JUnit 4. - If you need to run miniclusters of HDFS, ZooKeeper, HBase, or MapReduce testing, - be sure to checkout the HBaseTestingUtility. - Alex Baranau of Sematext describes how it can be used in - HBase Case-Study: Using HBaseTestingUtility for Local Testing and Development (2010). - -
- Mockito - Sometimes you don't need a full running server - unit testing. For example, some methods can make do with a - a org.apache.hadoop.hbase.Server instance - or a org.apache.hadoop.hbase.master.MasterServices - Interface reference rather than a full-blown - org.apache.hadoop.hbase.master.HMaster. - In these cases, you maybe able to get away with a mocked - Server instance. For example: - - TODO... - - -
+
+ Unit Tests + The following information is from http://blog.cloudera.com/blog/2013/09/how-to-test-hbase-applications-using-popular-tools/. + The following sections discuss JUnit, Mockito, MRUnit, and HBaseTestingUtility. + +
+ JUnit + HBase uses JUnit 4 for unit tests + This example will add unit tests to the following example class: + +public class MyHBaseDAO { + + public static void insertRecord(HTableInterface table, HBaseTestObj obj) + throws Exception { + Put put = createPut(obj); + table.put(put); + } + + private static Put createPut(HBaseTestObj obj) { + Put put = new Put(Bytes.toBytes(obj.getRowKey())); + put.add(Bytes.toBytes("CF"), Bytes.toBytes("CQ-1"), + Bytes.toBytes(obj.getData1())); + put.add(Bytes.toBytes("CF"), Bytes.toBytes("CQ-2"), + Bytes.toBytes(obj.getData2())); + return put; + } +} + + The first step is to add JUnit dependencies to your Maven POM file: + + junit + junit + 4.11 + test + + ]]> + Next, add some unit tests to your code. Tests are annotated with + @Test. Here, the unit tests are in bold. + +public class TestMyHbaseDAOData { + @Test + public void testCreatePut() throws Exception { + HBaseTestObj obj = new HBaseTestObj(); + obj.setRowKey("ROWKEY-1"); + obj.setData1("DATA-1"); + obj.setData2("DATA-2"); + Put put = MyHBaseDAO.createPut(obj); + assertEquals(obj.getRowKey(), Bytes.toString(put.getRow())); + assertEquals(obj.getData1(), Bytes.toString(put.get(Bytes.toBytes("CF"), Bytes.toBytes("CQ-1")).get(0).getValue())); + assertEquals(obj.getData2(), Bytes.toString(put.get(Bytes.toBytes("CF"), Bytes.toBytes("CQ-2")).get(0).getValue())); + } +} + + These tests ensure that your createPut method creates, populates, + and returns a Put object with expected values. Of course, JUnit can + do much more than this. For an introduction to JUnit, see https://github.com/junit-team/junit/wiki/Getting-started. + +
+ +
+ Mockito + Mockito is a mocking framework. It goes further than JUnit by allowing you to + test the interactions between objects without having to replicate the entire + environment. You can read more about Mockito at its project site, https://code.google.com/p/mockito/. + You can use Mockito to do unit testing on smaller units. For instance, you can + mock a org.apache.hadoop.hbase.Server instance or a + org.apache.hadoop.hbase.master.MasterServices + interface reference rather than a full-blown + org.apache.hadoop.hbase.master.HMaster. + This example builds upon the example code in , to test the insertRecord + method. + First, add a dependency for Mockito to your Maven POM file. + + org.mockito + mockito-all + 1.9.5 + test + + ]]> + Next, add a @RunWith annotation to your test class, to direct it + to use Mockito. + +@RunWith(MockitoJUnitRunner.class) +public class TestMyHBaseDAO{ + @Mock + private HTableInterface table; + @Mock + private HTablePool hTablePool; + @Captor + private ArgumentCaptor putCaptor; + + @Test + public void testInsertRecord() throws Exception { + //return mock table when getTable is called + when(hTablePool.getTable("tablename")).thenReturn(table); + //create test object and make a call to the DAO that needs testing + HBaseTestObj obj = new HBaseTestObj(); + obj.setRowKey("ROWKEY-1"); + obj.setData1("DATA-1"); + obj.setData2("DATA-2"); + MyHBaseDAO.insertRecord(table, obj); + verify(table).put(putCaptor.capture()); + Put put = putCaptor.getValue(); + + assertEquals(Bytes.toString(put.getRow()), obj.getRowKey()); + assert(put.has(Bytes.toBytes("CF"), Bytes.toBytes("CQ-1"))); + assert(put.has(Bytes.toBytes("CF"), Bytes.toBytes("CQ-2"))); + assertEquals(Bytes.toString(put.get(Bytes.toBytes("CF"),Bytes.toBytes("CQ-1")).get(0).getValue()), "DATA-1"); + assertEquals(Bytes.toString(put.get(Bytes.toBytes("CF"),Bytes.toBytes("CQ-2")).get(0).getValue()), "DATA-2"); + } +} + + This code populates HBaseTestObj with “ROWKEY-1”, “DATA-1”, + “DATA-2” as values. It then inserts the record into the mocked table. The Put + that the DAO would have inserted is captured, and values are tested to verify + that they are what you expected them to be. + The key here is to manage htable pool and htable instance creation outside the + DAO. This allows you to mock them cleanly and test Puts as shown above. + Similarly, you can now expand into other operations such as Get, Scan, or + Delete. + +
+
+ MRUnit + Apache MRUnit is a library + that allows you to unit-test MapReduce jobs. You can use it to test HBase jobs + in the same way as other MapReduce jobs. + Given a MapReduce job that writes to an HBase table called + MyTest, which has one column family called + CF, the reducer of such a job could look like the + following: + { + public static final byte[] CF = "CF".getBytes(); + public static final byte[] QUALIFIER = "CQ-1".getBytes(); + public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { + //bunch of processing to extract data to be inserted, in our case, lets say we are simply + //appending all the records we receive from the mapper for this particular + //key and insert one record into HBase + StringBuffer data = new StringBuffer(); + Put put = new Put(Bytes.toBytes(key.toString())); + for (Text val : values) { + data = data.append(val); + } + put.add(CF, QUALIFIER, Bytes.toBytes(data.toString())); + //write to HBase + context.write(new ImmutableBytesWritable(Bytes.toBytes(key.toString())), put); + } + } ]]> + + To test this code, the first step is to add a dependency to MRUnit to your + Maven POM file. + + org.apache.mrunit + mrunit + 1.0.0 + test + + ]]> + Next, use the ReducerDriver provided by MRUnit, in your Reducer job. + reduceDriver; + byte[] CF = "CF".getBytes(); + byte[] QUALIFIER = "CQ-1".getBytes(); + + @Before + public void setUp() { + MyReducer reducer = new MyReducer(); + reduceDriver = ReduceDriver.newReduceDriver(reducer); + } + + @Test + public void testHBaseInsert() throws IOException { + String strKey = "RowKey-1", strValue = "DATA", strValue1 = "DATA1", +strValue2 = "DATA2"; + List list = new ArrayList(); + list.add(new Text(strValue)); + list.add(new Text(strValue1)); + list.add(new Text(strValue2)); + //since in our case all that the reducer is doing is appending the records that the mapper + //sends it, we should get the following back + String expectedOutput = strValue + strValue1 + strValue2; + //Setup Input, mimic what mapper would have passed + //to the reducer and run test + reduceDriver.withInput(new Text(strKey), list); + //run the reducer and get its output + List> result = reduceDriver.run(); + + //extract key from result and verify + assertEquals(Bytes.toString(result.get(0).getFirst().get()), strKey); + + //extract value for CF/QUALIFIER and verify + Put a = (Put)result.get(0).getSecond(); + String c = Bytes.toString(a.get(CF, QUALIFIER).get(0).getValue()); + assertEquals(expectedOutput,c ); + } + +} + ]]> + Your MRUnit test verifies that the output is as expected, the Put that is + inserted into HBase has the correct value, and the ColumnFamily and + ColumnQualifier have the correct values. + MRUnit includes a MapperDriver to test mapping jobs, and you can use MRUnit to + test other operations, including reading from HBase, processing data, or writing + to HDFS, +
+ +
+ Integration Testing with a HBase Mini-Cluster + HBase ships with HBaseTestingUtility, which makes it easy to write integration + tests using a mini-cluster. The first step is to add some + dependencies to your Maven POM file. Check the versions to be sure they are + appropriate. + + org.apache.hadoop + hadoop-common + 2.0.0 + test-jar + test + + + + org.apache.hbase + hbase + 0.98.3 + test-jar + test + + + + org.apache.hadoop + hadoop-hdfs + 2.0.0 + test-jar + test + + + + org.apache.hadoop + hadoop-hdfs + 2.0.0 + test + + ]]> + This code represents an integration test for the MyDAO insert shown in . + +public class MyHBaseIntegrationTest { + private static HBaseTestingUtility utility; + byte[] CF = "CF".getBytes(); + byte[] QUALIFIER = "CQ-1".getBytes(); + + @Before + public void setup() throws Exception { + utility = new HBaseTestingUtility(); + utility.startMiniCluster(); + } + + @Test + public void testInsert() throws Exception { + HTableInterface table = utility.createTable(Bytes.toBytes("MyTest"), + Bytes.toBytes("CF")); + HBaseTestObj obj = new HBaseTestObj(); + obj.setRowKey("ROWKEY-1"); + obj.setData1("DATA-1"); + obj.setData2("DATA-2"); + MyHBaseDAO.insertRecord(table, obj); + Get get1 = new Get(Bytes.toBytes(obj.getRowKey())); + get1.addColumn(CF, CQ1); + Result result1 = table.get(get1); + assertEquals(Bytes.toString(result1.getRow()), obj.getRowKey()); + assertEquals(Bytes.toString(result1.value()), obj.getData1()); + Get get2 = new Get(Bytes.toBytes(obj.getRowKey())); + get2.addColumn(CF, CQ2); + Result result2 = table.get(get2); + assertEquals(Bytes.toString(result2.getRow()), obj.getRowKey()); + assertEquals(Bytes.toString(result2.value()), obj.getData2()); + } +} + + This code creates an HBase mini-cluster and starts it. Next, it creates a + table called MyTest with one column family, + CF. A record is inserted, a Get is performed from the + same table, and the insertion is verified. + + Starting the mini-cluster takes about 20-30 seconds, but that should be + appropriate for integration testing. + + To use an HBase mini-cluster on Microsoft Windows, you need to use a Cygwin + environment. + See the paper at HBase + Case-Study: Using HBaseTestingUtility for Local Testing and + Development (2010) for more information about + HBaseTestingUtility. +
+