Uploaded image for project: 'CarbonData'
  1. CarbonData
  2. CARBONDATA-1630

load data into hive table fail

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Critical
    • Resolution: Fixed
    • 1.2.0
    • None
    • hive-integration
    • None
    • hive.version:1.1.0-cdh5.10.0
      hadoop:version:2.6.0-cdh5.10.0

    Description

      import org.apache.spark.sql.SparkSession
      import org.apache.spark.sql.CarbonSession._
      val rootPath = "hdfs://namenodeb:8020/app/carbondata"
      val storeLocation = s"$rootPath/store"
      val warehouse = s"$rootPath/warehouse"
      val metastoredb = s"$rootPath/metastore_db"
      val carbon = SparkSession.builder().enableHiveSupport().config("spark.sql.warehouse.dir", warehouse).config(org.apache.carbondata.core.constants.CarbonCommonConstants.STORE_LOCATION, storeLocation).getOrCreateCarbonSession(storeLocation, metastoredb)
      import org.apache.spark.sql.types._
      import org.apache.spark.sql.Row
      val rdd = sc.textFile("/data/home/hadoop/test.txt");
      val schemaString = "id name city"
      val fields = schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, nullable = true))
      val schema = StructType(fields)
      val rowRDD = rdd.map(_.split(",")).map(attributes => Row(attributes(0),attributes(1),attributes(2)))
      val peopleDF = spark.createDataFrame(rowRDD, schema)
      peopleDF.createOrReplaceTempView("tmp_table")
      spark.sql("insert into target_table SELECT * FROM tmp_table")

      java.lang.RuntimeException: Failed to add entry in table status for default.target_table
      at scala.sys.package$.error(package.scala:27)
      at org.apache.carbondata.spark.util.CommonUtil$.readAndUpdateLoadProgressInTableMeta(CommonUtil.scala:533)
      at org.apache.spark.sql.execution.command.LoadTable.processData(carbonTableSchema.scala:928)
      at org.apache.spark.sql.execution.command.LoadTable.run(carbonTableSchema.scala:754)
      at org.apache.spark.sql.execution.command.LoadTableByInsert.processData(carbonTableSchema.scala:651)
      at org.apache.spark.sql.execution.command.LoadTableByInsert.run(carbonTableSchema.scala:637)
      at org.apache.spark.sql.CarbonDatasourceHadoopRelation.insert(CarbonDatasourceHadoopRelation.scala:98)
      at org.apache.spark.sql.execution.datasources.InsertIntoDataSourceCommand.run(InsertIntoDataSourceCommand.scala:43)
      at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
      at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
      at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
      at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
      at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
      at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
      at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
      at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
      at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
      at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
      at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
      at org.apache.spark.sql.Dataset.<init>(Dataset.scala:185)
      at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
      at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592)
      ... 52 elided

      Attachments

        Activity

          People

            Unassigned Unassigned
            xujie19852006 xujie
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: