Details
-
Bug
-
Status: Closed
-
Major
-
Resolution: Fixed
-
None
-
None
-
None
Description
Enhancement: Oozie should check that the fork node and join node are correct in pair when user submits the job. This should be a static check, not when the workflow is running.
Current logic bug:
A workflow with different number of forks and joins was run. The wf job should have been killed but it succeeded. Also, strangely, the action was killed.
Following are the different types of tests run and their results with varying delays.
test1: wf job SUCCEEDED, action java12 KILLED.
delay11=11
delay12=12
delay121=1
delay122=2
delay21=1
delay22=1
test2: wf job SUCCEEDED, action java12 KILLED.
delay11=1
delay12=12
delay121=1
delay122=2
delay21=1
delay22=1
test3: wf job SUCCEEED, all actions OK. question: why wf job always pass in this scenario, even when fork-join not in
pair?
delay11=10
delay12=10
delay121=15
delay122=15
delay21=20
delay22=20
workflow.xml
============
<workflow-app xmlns='uri:oozie:workflow:0.1' name='fork-join-4735180-wf'>
<start to='fork1' />
<fork name="fork1">
<path start="java11" />
<path start="fork12" />
</fork>
<action name='java11'>
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<main-class>qa.test.tests.testsleep</main-class>
<arg>${delay11}</arg>
</java>
<ok to="java12" />
<error to="fail" />
</action>
<action name='java12'>
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<main-class>qa.test.tests.testsleep</main-class>
<arg>${delay12}</arg>
</java>
<ok to="join1" />
<error to="fail" />
</action>
<fork name="fork12">
<path start="java121" />
<path start="java122" />
</fork>
<action name='java121'>
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<main-class>qa.test.tests.testsleep</main-class>
<arg>${delay121}</arg>
</java>
<ok to="join12" />
<error to="fail" />
</action>
<action name='java122'>
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<main-class>qa.test.tests.testsleep</main-class>
<arg>${delay122}</arg>
</java>
<ok to="join12" />
<error to="fail" />
</action>
<join name="join12" to="fork2" />
<fork name="fork2">
<path start="java21" />
<path start="java22" />
</fork>
<action name='java21'>
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<main-class>qa.test.tests.testsleep</main-class>
<arg>${delay21}</arg>
</java>
<ok to="join1" />
<error to="fail" />
</action>
<action name='java22'>
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<main-class>qa.test.tests.testsleep</main-class>
<arg>${delay22}</arg>
</java>
<ok to="join1" />
<error to="fail" />
</action>
<join name="join1" to="end" />
<kill name="fail">
<message>Streaming Map/Reduce failed, error
message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name='end' />
</workflow-app>