[HUDI-1064] hoodie table name value trim （ value not trim，the code is not robust） - ASF JIRA

Details

Type: Improvement
Status: Closed
Priority: Major
Resolution: Fixed
Affects Version/s: None
Fix Version/s: 0.6.0
Component/s: None
Labels:
- pull-request-available

Description

step 1、when i insert data to hudi

// code placeholder   
val writer = dataFrame.write.format("hudi")
writer.option("hoodie.datasource.hive_sync.table", "hudi_class_test_2 ")
  .option("hoodie.datasource.hive_sync.database", "default")
  .option("hoodie.datasource.hive_sync.partition_fields", "member_id")
  .option("hoodie.datasource.hive_sync.enable", true)
  .option("hoodie.datasource.hive_sync.jdbcurl", "jdbc:hive2://XX.XX.XX.XXX:10000")
  .option("hoodie.memory.merge.max.size", "2004857600000")
  .option("hoodie.datasource.hive_sync.partition_extractor_class", "org.apache.hudi.hive.MultiPartKeysValueExtractor")
  .option("hoodie.datasource.write.partitionpath.field", "member_id")
  .option("hoodie.datasource.write.recordkey.field", "member_id")
  .option("hoodie.datasource.write.precombine.field", "modify_time")
  .option("hoodie.table.name", "hudi_class_test_2 ")
  .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
  .option("hoodie.datasource.write.operation", "insert")
  .mode(SaveMode.Append)
  .save("/tmp/12037701/hivehudi/hudi_class_test_2")

step 2、

// code placeholder

org.apache.hudi.exception.HoodieException: hoodie table with name hudi_class_test_2 already exist at /tmp/12037701/hivehudi/hudi_class_test_2org.apache.hudi.exception.HoodieException: hoodie table with name hudi_class_test_2 already exist at /tmp/12037701/hivehudi/hudi_class_test_2 at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:89) at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:108) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:46) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:676) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:78) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:290) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229) at com.ly.test.SparkHudiTest.insertPartition(SparkHudiTest.scala:148) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50) at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236) at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229) at org.junit.runners.ParentRunner.run(ParentRunner.java:309) at org.junit.runner.JUnitCore.run(JUnitCore.java:160) at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68) at com.intellij.rt.execution.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:47) at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:242) at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:70)

step3、we can verify the table alreay exists.

hive show tables；