From 19e955a7fb3f808c1e04938b9a2716afbea22863 Mon Sep 17 00:00:00 2001 From: khemani Date: Thu, 15 Dec 2011 11:43:15 -0800 Subject: [PATCH] HBASE-5029 [jira] TestDistributedLogSplitting fails on occasion Summary: HBASE-5029 TestDistributedLogSplitting fails on occasion This is how it usually fails: https://builds.apache.org/view/G-L/view/HBase/job/HBase-0.92/lastCompletedBuild/testReport/org.apache.hadoop.hbase.master/TestDistributedLogSplitting/testWorkerAbort/ Assigning mighty Prakash since he offered to take a looksee. Test Plan: EMPTY Reviewers: JIRA --- .../hbase/master/TestDistributedLogSplitting.java | 34 +++++++++++++------ 1 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java b/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java index 3c77215..e490b9b 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java @@ -19,12 +19,10 @@ */ package org.apache.hadoop.hbase.master; -import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_final_transistion_failed; -import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_task_acquired; -import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_task_err; -import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_task_resigned; +import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.*; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.IOException; import java.util.Iterator; @@ -246,6 +244,15 @@ public class TestDistributedLogSplitting { assertEquals(NUM_LOG_LINES, count); } + /** + * The original intention of this test was to force an abort of a region + * server and to make sure that the failure path in the region servers is + * properly evaluated. But it is difficult to ensure that the region server + * doesn't finish the log splitting before it aborts. Also now, there is + * this code path where the master will preempt the region server when master + * detects that the region server has aborted. + * @throws Exception + */ @Test (timeout=300000) public void testWorkerAbort() throws Exception { LOG.info("testWorkerAbort"); @@ -279,21 +286,26 @@ public class TestDistributedLogSplitting { slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch); //waitForCounter but for one of the 2 counters long curt = System.currentTimeMillis(); - long endt = curt + 30000; + long waitTime = 30000; + long endt = curt + waitTime; while (curt < endt) { if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() + - tot_wkr_final_transistion_failed.get()) == 0) { + tot_wkr_final_transistion_failed.get() + tot_wkr_task_done.get() + + tot_wkr_preempt_task.get()) == 0) { Thread.yield(); curt = System.currentTimeMillis(); } else { - assertEquals(1, (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() + - tot_wkr_final_transistion_failed.get())); + assertEquals(1, (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() + + tot_wkr_final_transistion_failed.get() + tot_wkr_task_done.get() + + tot_wkr_preempt_task.get())); return; } } - assertEquals(1, batch.done); - // fail("region server completed the split before aborting"); - return; + fail("none of the following counters went up in " + waitTime + + " milliseconds - " + + "tot_wkr_task_resigned, tot_wkr_task_err, " + + "tot_wkr_final_transistion_failed, tot_wkr_task_done, " + + "tot_wkr_preempt_task"); } HTable installTable(ZooKeeperWatcher zkw, String tname, String fname, -- 1.7.7.2