Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-22386 Data Source V2 improvements
  3. SPARK-25280

Add support for USING syntax for DataSourceV2

    XMLWordPrintableJSON

    Details

    • Type: Sub-task
    • Status: In Progress
    • Priority: Major
    • Resolution: Unresolved
    • Affects Version/s: 2.4.0
    • Fix Version/s: None
    • Component/s: SQL
    • Labels:
      None

      Description

      
      class SourcesTest extends SparkFunSuite {
        val spark = SparkSession.builder().master("local").getOrCreate()
      
        test("Test CREATE TABLE ... USING - v1") {
          spark.read.format(classOf[SimpleDataSourceV1].getCanonicalName).load()
        }
      
        test("Test DataFrameReader - v1") {
          spark.sql(s"CREATE TABLE tableA USING ${classOf[SimpleDataSourceV1].getCanonicalName}")
        }
      
        test("Test CREATE TABLE ... USING - v2") {
          spark.read.format(classOf[SimpleDataSourceV2].getCanonicalName).load()
        }
      
        test("Test DataFrameReader - v2") {
          spark.sql(s"CREATE TABLE tableB USING ${classOf[SimpleDataSourceV2].getCanonicalName}")
        }
      }
      
      org.apache.spark.sql.sources.v2.SimpleDataSourceV2 is not a valid Spark SQL Data Source.;
      org.apache.spark.sql.AnalysisException: org.apache.spark.sql.sources.v2.SimpleDataSourceV2 is not a valid Spark SQL Data Source.;
      	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:385)
      	at org.apache.spark.sql.execution.command.CreateDataSourceTableCommand.run(createDataSourceTables.scala:78)
      	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
      	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
      	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
      	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
      	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
      	at org.apache.spark.sql.Dataset$$anonfun$54.apply(Dataset.scala:3296)
      	at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
      	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
      	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
      	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3295)
      	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:190)
      	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:75)
      	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:641)
      	at org.apache.spark.sql.sources.v2.SourcesTest$$anonfun$4.apply(DataSourceV2Suite.scala:45)
      	at org.apache.spark.sql.sources.v2.SourcesTest$$anonfun$4.apply(DataSourceV2Suite.scala:45)
      	at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
      	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
      	at org.scalatest.Transformer.apply(Transformer.scala:22)
      	at org.scalatest.Transformer.apply(Transformer.scala:20)
      	at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186)
      	at org.scalatest.TestSuite$class.withFixture(TestSuite.scala:196)
      	at org.scalatest.FunSuite.withFixture(FunSuite.scala:1560)
      	at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:183)
      	at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
      	at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
      	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289)
      	at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:196)
      	at org.scalatest.FunSuite.runTest(FunSuite.scala:1560)
      	at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
      	at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
      	at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:396)
      	at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:384)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384)
      	at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:379)
      	at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461)
      	at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:229)
      	at org.scalatest.FunSuite.runTests(FunSuite.scala:1560)
      	at org.scalatest.Suite$class.run(Suite.scala:1147)
      	at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
      	at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
      	at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
      	at org.scalatest.SuperEngine.runImpl(Engine.scala:521)
      	at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:233)
      	at org.scalatest.FunSuite.run(FunSuite.scala:1560)
      	at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)
      	at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1340)
      	at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1334)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334)
      	at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011)
      	at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1010)
      	at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500)
      	at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010)
      	at org.scalatest.tools.Runner$.run(Runner.scala:850)
      	at org.scalatest.tools.Runner.run(Runner.scala)
      	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131)
      	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)
      

      Currently, USING syntax does not work with Datasource V2. So that developers (Datasource V1) can easily migrate and users can use USING syntax from Datasource v1, we better support this case as well.

      There's a discussion thread about this here as well - http://apache-spark-developers-list.1001551.n3.nabble.com/DISCUSS-USING-syntax-for-Datasource-V2-td24754.html

        Attachments

          Activity

            People

            • Assignee:
              Unassigned
              Reporter:
              hyukjin.kwon Hyukjin Kwon
            • Votes:
              1 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

              • Created:
                Updated: