Uploaded image for project: 'CarbonData'
  1. CarbonData
  2. CARBONDATA-3248

Spark carbon file format can't read transactional table segment path

    XMLWordPrintableJSON

Details

    • Improvement
    • Status: Open
    • Major
    • Resolution: Unresolved
    • None
    • None
    • None
    • None

    Description

      Code:

              val tableNameForAllTypeOriginal = "alluxio_table_all_type_original"
              val tableNameForAllType = "alluxio_table_all_type"
              try {
      
                  sql("DROP TABLE IF EXISTS " + tableNameForAllTypeOriginal)
                  sql(
                      s"""create table $tableNameForAllTypeOriginal(
                         | smallIntField SMALLINT,
                         | intField INT,
                         | bigIntField BIGINT,
                         | floatField FLOAT,
                         | doubleField DOUBLE,
                         | decimalField DECIMAL(25, 4),
                         | timestampField TIMESTAMP,
                         | dateField DATE,
                         | stringField STRING,
                         | varcharField VARCHAR(10),
                         | charField CHAR(10),
                         | arrayField ARRAY<string>,
                         | structField STRUCT<col1:STRING, col2:STRING, col3:STRING>,
                         | booleanField BOOLEAN)
                         | using carbondata
                   """.stripMargin)
      
                  val path = localAlluxioCluster.getMasterURI + allDataTypeRemote
      
                  try {
                      sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE $tableNameForAllTypeOriginal " +
                              "options('COMPLEX_DELIMITER_LEVEL_1'='$','COMPLEX_DELIMITER_LEVEL_2'=':')")
      
                      sql(s"select * from $tableNameForAllTypeOriginal").show()
                      assert(false)
                      // Don't support like that, TODO: to analysis whether can support it
                  } catch {
                      case e: Exception =>
      //                    e.printStackTrace()
                          assert(true)
                  } finally {
                      sql("DROP TABLE IF EXISTS " + tableNameForAllTypeOriginal)
                  }
      
                  println("\n\n\n\n")
                  sql("DROP TABLE IF EXISTS " + tableNameForAllType)
                  sql("DROP TABLE IF EXISTS " + tableNameForAllTypeOriginal)
                  sql(
                      s"""create table $tableNameForAllTypeOriginal(
                         | smallIntField SMALLINT,
                         | intField INT,
                         | bigIntField BIGINT,
                         | floatField FLOAT,
                         | doubleField DOUBLE,
                         | decimalField DECIMAL(25, 4),
                         | timestampField TIMESTAMP,
                         | dateField DATE,
                         | stringField STRING,
                         | varcharField VARCHAR(10),
                         | charField CHAR(10),
                         | arrayField ARRAY<string>,
                         | structField STRUCT<col1:STRING, col2:STRING, col3:STRING>,
                         | booleanField BOOLEAN)
                         | stored by 'carbondata'
                   """.stripMargin)
      
      
                  sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE $tableNameForAllTypeOriginal " +
                          "options('COMPLEX_DELIMITER_LEVEL_1'='$','COMPLEX_DELIMITER_LEVEL_2'=':')")
      
                  fileSystemShell.run("ls", carbonAndAlluxio + "/default")
                  val externalTablePath = localAlluxioCluster.getMasterURI + carbonAndAlluxio + "/default/" + tableNameForAllTypeOriginal + "/Fact/Part0/Segment_0"
      
                  fileSystemShell.run("ls",externalTablePath)
                  sql(s"CREATE TABLE $tableNameForAllType using carbon" +
                          s" LOCATION '$externalTablePath'")
      
      

      Exception:

      2019-01-14 15:09:10 AUDIT audit:93 - {"time":"January 13, 2019 11:09:10 PM PST","username":"xubo","opName":"CREATE TABLE","opId":"15248775671301","opStatus":"SUCCESS","opTime":"140 ms","table":"default.alluxio_table_all_type_original","extraInfo":{"bad_record_path":"","local_dictionary_enable":"true","external":"false","sort_columns":"","comment":""}}
      2019-01-14 15:09:10 AUDIT audit:72 - {"time":"January 13, 2019 11:09:10 PM PST","username":"xubo","opName":"LOAD DATA","opId":"15248921660444","opStatus":"START"}
      2019-01-14 15:09:10 AUDIT audit:93 - {"time":"January 13, 2019 11:09:10 PM PST","username":"xubo","opName":"LOAD DATA","opId":"15248921660444","opStatus":"SUCCESS","opTime":"511 ms","table":"default.alluxio_table_all_type_original","extraInfo":{"SegmentId":"0","DataSize":"5.07KB","IndexSize":"2.48KB"}}
      drwxr-xr-x xubo           staff                        3       PERSISTED 01-13-2019 23:09:10:129  DIR /CarbonAndAlluxio/default/alluxio_table_all_type_original
      -rw-r--r-- xubo           staff                     2588       PERSISTED 01-13-2019 23:09:10:498 100% /CarbonAndAlluxio/default/alluxio_table_all_type_original/Fact/Part0/Segment_0/0_1547449750488.carbonindexmerge
      -rw-r--r-- xubo           staff                     5187       PERSISTED 01-13-2019 23:09:10:303   0% /CarbonAndAlluxio/default/alluxio_table_all_type_original/Fact/Part0/Segment_0/part-0-0_batchno0-0-0-1547449750082.carbondata
      2019-01-14 15:09:10 ERROR AbstractQueryExecutor:280 - Schema of alluxio://xubodembp:52203/CarbonAndAlluxio/default/alluxio_table_all_type_original/Fact/Part0/Segment_0/part-0-0_batchno0-0-0-1547449750082.carbondata doesn't match with the table's schema
      2019-01-14 15:09:10 ERROR Executor:91 - Exception in task 0.0 in stage 5.0 (TID 5)
      java.io.IOException: All the files doesn't have same schema. Unsupported operation on nonTransactional table. Check logs.
      	at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:281)
      	at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234)
      	at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138)
      	at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:406)
      	at org.apache.carbondata.core.scan.executor.impl.DetailQueryExecutor.execute(DetailQueryExecutor.java:47)
      	at org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:112)
      	at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:427)
      	at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:381)
      	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:124)
      	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:174)
      	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105)
      	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
      	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
      	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395)
      	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:234)
      	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228)
      	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
      	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
      	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
      	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
      	at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
      	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
      	at org.apache.spark.scheduler.Task.run(Task.scala:108)
      	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
      	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
      	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
      	at java.lang.Thread.run(Thread.java:748)
      2019-01-14 15:09:10 ERROR TaskSetManager:70 - Task 0 in stage 5.0 failed 1 times; aborting job
      

      Attachments

        Activity

          People

            Unassigned Unassigned
            xubo245 Bo Xu
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated: