Uploaded image for project: 'Oozie'
  1. Oozie
  2. OOZIE-636

Check fork and join in the workflow in the submission time

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Major
    • Resolution: Fixed
    • None
    • 3.2.0
    • None
    • None

    Description

      Enhancement: Oozie should check that the fork node and join node are correct in pair when user submits the job. This should be a static check, not when the workflow is running.

      Current logic bug:
      A workflow with different number of forks and joins was run. The wf job should have been killed but it succeeded. Also, strangely, the action was killed.
      Following are the different types of tests run and their results with varying delays.

      test1: wf job SUCCEEDED, action java12 KILLED.
      delay11=11
      delay12=12
      delay121=1
      delay122=2
      delay21=1
      delay22=1

      test2: wf job SUCCEEDED, action java12 KILLED.
      delay11=1
      delay12=12
      delay121=1
      delay122=2
      delay21=1
      delay22=1

      test3: wf job SUCCEEED, all actions OK. question: why wf job always pass in this scenario, even when fork-join not in
      pair?
      delay11=10
      delay12=10
      delay121=15
      delay122=15
      delay21=20
      delay22=20

      workflow.xml
      ============
      <workflow-app xmlns='uri:oozie:workflow:0.1' name='fork-join-4735180-wf'>
      <start to='fork1' />

      <fork name="fork1">
      <path start="java11" />
      <path start="fork12" />
      </fork>

      <action name='java11'>
      <java>
      <job-tracker>${jobTracker}</job-tracker>
      <name-node>${nameNode}</name-node>
      <configuration>
      <property>
      <name>mapred.job.queue.name</name>
      <value>${queueName}</value>
      </property>
      </configuration>
      <main-class>qa.test.tests.testsleep</main-class>
      <arg>${delay11}</arg>
      </java>
      <ok to="java12" />
      <error to="fail" />
      </action>
      <action name='java12'>
      <java>
      <job-tracker>${jobTracker}</job-tracker>
      <name-node>${nameNode}</name-node>
      <configuration>
      <property>
      <name>mapred.job.queue.name</name>
      <value>${queueName}</value>
      </property>
      </configuration>
      <main-class>qa.test.tests.testsleep</main-class>
      <arg>${delay12}</arg>
      </java>
      <ok to="join1" />
      <error to="fail" />
      </action>

      <fork name="fork12">
      <path start="java121" />
      <path start="java122" />
      </fork>
      <action name='java121'>
      <java>
      <job-tracker>${jobTracker}</job-tracker>
      <name-node>${nameNode}</name-node>
      <configuration>
      <property>
      <name>mapred.job.queue.name</name>
      <value>${queueName}</value>
      </property>
      </configuration>
      <main-class>qa.test.tests.testsleep</main-class>
      <arg>${delay121}</arg>
      </java>
      <ok to="join12" />
      <error to="fail" />
      </action>
      <action name='java122'>
      <java>
      <job-tracker>${jobTracker}</job-tracker>
      <name-node>${nameNode}</name-node>
      <configuration>
      <property>
      <name>mapred.job.queue.name</name>
      <value>${queueName}</value>
      </property>
      </configuration>
      <main-class>qa.test.tests.testsleep</main-class>
      <arg>${delay122}</arg>
      </java>
      <ok to="join12" />
      <error to="fail" />
      </action>

      <join name="join12" to="fork2" />

      <fork name="fork2">
      <path start="java21" />
      <path start="java22" />
      </fork>

      <action name='java21'>
      <java>
      <job-tracker>${jobTracker}</job-tracker>
      <name-node>${nameNode}</name-node>
      <configuration>
      <property>
      <name>mapred.job.queue.name</name>
      <value>${queueName}</value>
      </property>
      </configuration>
      <main-class>qa.test.tests.testsleep</main-class>
      <arg>${delay21}</arg>
      </java>
      <ok to="join1" />
      <error to="fail" />
      </action>
      <action name='java22'>
      <java>
      <job-tracker>${jobTracker}</job-tracker>
      <name-node>${nameNode}</name-node>
      <configuration>
      <property>
      <name>mapred.job.queue.name</name>
      <value>${queueName}</value>
      </property>
      </configuration>
      <main-class>qa.test.tests.testsleep</main-class>
      <arg>${delay22}</arg>
      </java>
      <ok to="join1" />
      <error to="fail" />
      </action>

      <join name="join1" to="end" />

      <kill name="fail">
      <message>Streaming Map/Reduce failed, error
      message[${wf:errorMessage(wf:lastErrorNode())}]</message>
      </kill>
      <end name='end' />
      </workflow-app>

      Attachments

        Activity

          People

            virag Virag Kothari
            virag Virag Kothari
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: