Uploaded image for project: 'Apache Hudi'
  1. Apache Hudi
  2. HUDI-2234

MERGE INTO works only ON primary key

    XMLWordPrintableJSON

Details

    • Task
    • Status: Closed
    • Blocker
    • Resolution: Resolved
    • None
    • 0.10.0
    • spark
    • None

    Description

      drop table if exists hudi_gh_ext_fixed;
      
      create table hudi_gh_ext_fixed (id int, name string, price double, ts long) using hudi options(primaryKey = 'id', precombineField = 'ts') location 'file:///tmp/hudi-h4-fixed';
      
      insert into hudi_gh_ext_fixed values(3, 'AMZN', 300, 120);
      insert into hudi_gh_ext_fixed values(2, 'UBER', 300, 120);
      insert into hudi_gh_ext_fixed values(4, 'GOOG', 300, 120);
      
      update hudi_gh_ext_fixed set price = 150.0 where name = 'UBER';
      
      drop table if exists hudi_fixed;
      
      create table hudi_fixed (id int, name string, price double, ts long) using hudi options(primaryKey = 'id', precombineField = 'ts') partitioned by (ts) location 'file:///tmp/hudi-h4-part-fixed';
      
      insert into hudi_fixed values(2, 'UBER', 200, 120);
      
      MERGE INTO hudi_fixed 
      USING (select id, name, price, ts from hudi_gh_ext_fixed) updates
      ON hudi_fixed.name = updates.name
      WHEN MATCHED THEN
        UPDATE SET *
      WHEN NOT MATCHED
        THEN INSERT *;
      
      -- java.lang.IllegalArgumentException: Merge Key[name] is not Equal to the defined primary key[id] in table hudi_fixed
      -- 	at org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.buildMergeIntoConfig(MergeIntoHoodieTableCommand.scala:425)
      -- 	at org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand.run(MergeIntoHoodieTableCommand.scala:146)
      -- 	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
      -- 	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
      -- 	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
      -- 	at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
      

      Attachments

        Issue Links

          Activity

            People

              biyan900116@gmail.com Yann Byron
              codope Sagar Sumit
              Shiyan Xu
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: