Uploaded image for project: 'Falcon'
  1. Falcon
  2. FALCON-372

Process that has hcat partitions with / in partition name does not succeed

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Open
    • Major
    • Resolution: Unresolved
    • None
    • None
    • None
    • None

    Description

      Test name: HCatProcessTest#OneHCatInputOneHCatOutput, with separator '/'
      Here are the entity definition:

      <?xml version="1.0" encoding="UTF-8"?>
      <cluster xmlns="uri:falcon:cluster:0.1" name="corp-65636c71-a466-4906-aabc-3ce40f7a9850" description="" colo="default">
        <interfaces>
          <interface type="readonly" endpoint="webhdfs://rkg1-2:50070" version="0.20.2"/>
          <interface type="write" endpoint="hdfs://rkg1-2:8020" version="0.20.2"/>
          <interface type="execute" endpoint="rkg1-2:8032" version="0.20.2"/>
          <interface type="workflow" endpoint="http://rkg1-2:11000/oozie/" version="3.1"/>
          <interface type="messaging" endpoint="tcp://rkg1-2:61616?daemon=true" version="5.1.6"/>
          <interface type="registry" endpoint="thrift://rkg1-2.cs1cloud.internal:9083" version="0.11.0"/>
        </interfaces>
        <locations>
          <location name="staging" path="/projects/ivory/staging"/>
          <location name="temp" path="/tmp"/>
          <location name="working" path="/projectsTest/ivory/working"/>
        </locations>
        <properties>
          <property name="field1" value="value1"/>
          <property name="field2" value="value2"/>
          <property name="dfs.namenode.kerberos.principal" value="nn/rkg1-2@EXAMPLE.COM"/>
          <property name="hive.metastore.kerberos.principal" value="hive/rkg1-2@EXAMPLE.COM"/>
        </properties>
      </cluster>
      
      <?xml version="1.0" encoding="UTF-8"?>
      <feed xmlns="uri:falcon:feed:0.1" name="raaw-logs16-b36bdd1e-8b81-47f5-bfef-42fec8ecc68a" description="clicks log">
        <frequency>hours(1)</frequency>
        <timezone>UTC</timezone>
        <late-arrival cut-off="hours(6)"/>
        <clusters>
          <cluster name="corp-65636c71-a466-4906-aabc-3ce40f7a9850" type="source">
            <validity start="2010-01-01T20:00Z" end="2010-01-02T04:00Z"/>
            <retention limit="months(9000)" action="delete"/>
          </cluster>
        </clusters>
        <table uri="catalog:default:hcatprocesstest_input_table#dt=${YEAR}/${MONTH}/${DAY}/${HOUR}"/>
        <ACL owner="testuser" group="group" permission="0x755"/>
        <schema location="hcat" provider="hcat"/>
        <properties>
          <property name="field1" value="value1"/>
          <property name="field2" value="value2"/>
        </properties>
      </feed>
      
      <?xml version="1.0" encoding="UTF-8"?>
      <feed xmlns="uri:falcon:feed:0.1" name="agregated-logs16-621e03ea-dee5-4a47-b6f5-09523b8fc0ee" description="clicks log">
        <frequency>hours(1)</frequency>
        <timezone>UTC</timezone>
        <late-arrival cut-off="hours(6)"/>
        <clusters>
          <cluster name="corp-65636c71-a466-4906-aabc-3ce40f7a9850" type="source">
            <validity start="2010-01-01T20:00Z" end="2010-01-02T04:00Z"/>
            <retention limit="hours(6)" action="delete"/>
          </cluster>
        </clusters>
        <table uri="catalog:default:hcatprocesstest_output_table#dt=${YEAR}/${MONTH}/${DAY}/${HOUR}"/>
        <ACL owner="testuser" group="group" permission="0x755"/>
        <schema location="hcat" provider="hcat"/>
        <properties>
          <property name="field1" value="value1"/>
          <property name="field2" value="value2"/>
        </properties>
      </feed>
      
      <process xmlns="uri:falcon:process:0.1" name="agregator-coord16-fa1776e0-1b9a-4bb9-815e-3b43c147aee4">
        <clusters>
          <cluster name="corp-65636c71-a466-4906-aabc-3ce40f7a9850">
            <validity start="2010-01-01T20:00Z" end="2010-01-02T04:00Z"/>
          </cluster>
        </clusters>
        <parallel>1</parallel>
        <order>FIFO</order>
        <frequency>hours(1)</frequency>
        <timezone>UTC</timezone>
        <inputs>
          <input name="inputData" feed="raaw-logs16-b36bdd1e-8b81-47f5-bfef-42fec8ecc68a" start="now(0,0)" end="now(0,0)"/>
        </inputs>
        <outputs>
          <output name="outputData" feed="agregated-logs16-621e03ea-dee5-4a47-b6f5-09523b8fc0ee" instance="now(0,0)"/>
        </outputs>
        <properties>
          <property name="queueName" value="default"/>
        </properties>
        <workflow engine="hive" path="/tmp/falcon-regression/workflows/hive/script.hql"/>
        <retry policy="periodic" delay="minutes(3)" attempts="3"/>
      </process>
      

      Stacktrace from oozie logs:

      org.apache.oozie.command.CommandException: E1012: Coord Job Materialization Error: E0906: URI parsing error : hcat://rkg1-2.cs1cloud.internal:9083/default/hcatprocesstest_input_table\
      /dt=2010/01/01/20
              at org.apache.oozie.command.coord.CoordMaterializeTransitionXCommand.materialize(CoordMaterializeTransitionXCommand.java:282)
              at org.apache.oozie.command.MaterializeTransitionXCommand.execute(MaterializeTransitionXCommand.java:72)
              at org.apache.oozie.command.MaterializeTransitionXCommand.execute(MaterializeTransitionXCommand.java:28)
              at org.apache.oozie.command.XCommand.call(XCommand.java:280)
              at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:175)
              at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
              at java.lang.Thread.run(Thread.java:662)
      Caused by: org.apache.oozie.dependency.URIHandlerException: E0906: URI parsing error : hcat://rkg1-2.cs1cloud.internal:9083/default/hcatprocesstest_input_table/dt=2010/01/01/20
              at org.apache.oozie.dependency.HCatURIHandler.validate(HCatURIHandler.java:169)
              at org.apache.oozie.command.coord.CoordCommandUtils.createEarlyURIs(CoordCommandUtils.java:365)
              at org.apache.oozie.command.coord.CoordCommandUtils.separateResolvedAndUnresolved(CoordCommandUtils.java:303)
              at org.apache.oozie.command.coord.CoordCommandUtils.materializeDataEvents(CoordCommandUtils.java:577)
              at org.apache.oozie.command.coord.CoordCommandUtils.materializeOneInstance(CoordCommandUtils.java:444)
              at org.apache.oozie.command.coord.CoordMaterializeTransitionXCommand.materializeActions(CoordMaterializeTransitionXCommand.java:381)
              at org.apache.oozie.command.coord.CoordMaterializeTransitionXCommand.materialize(CoordMaterializeTransitionXCommand.java:263)
              ... 7 more
      Caused by: java.net.URISyntaxException: URI path is not in expected format: hcat://rkg1-2.cs1cloud.internal:9083/default/hcatprocesstest_input_table/dt=2010/01/01/20
              at org.apache.oozie.util.HCatURI.parse(HCatURI.java:66)
              at org.apache.oozie.util.HCatURI.<init>(HCatURI.java:52)
              at org.apache.oozie.util.HCatURI.<init>(HCatURI.java:48)
              at org.apache.oozie.dependency.HCatURIHandler.validate(HCatURIHandler.java:166)
              ... 13 more
      

      Attachments

        Activity

          People

            Unassigned Unassigned
            raghavgautam Raghav Kumar Gautam
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated: