Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-5852

Fail to convert a newly created empty metastore parquet table to a data source parquet table.

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Blocker
    • Resolution: Fixed
    • None
    • 1.3.0
    • SQL
    • None

    Description

      To reproduce the exception, try

      val rdd = sc.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
      sqlContext.jsonRDD(rdd).registerTempTable("jt")
      sqlContext.sql("create table test stored as parquet as select * from jt")
      

      ParquetConversions tries to convert the write path to the data source API write path. But, the following exception was thrown.

      java.lang.UnsupportedOperationException: empty.reduceLeft
      	at scala.collection.TraversableOnce$class.reduceLeft(TraversableOnce.scala:167)
      	at scala.collection.mutable.ArrayBuffer.scala$collection$IndexedSeqOptimized$$super$reduceLeft(ArrayBuffer.scala:47)
      	at scala.collection.IndexedSeqOptimized$class.reduceLeft(IndexedSeqOptimized.scala:68)
      	at scala.collection.mutable.ArrayBuffer.reduceLeft(ArrayBuffer.scala:47)
      	at scala.collection.TraversableOnce$class.reduce(TraversableOnce.scala:195)
      	at scala.collection.AbstractTraversable.reduce(Traversable.scala:105)
      	at org.apache.spark.sql.parquet.ParquetRelation2$.readSchema(newParquet.scala:633)
      	at org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache.org$apache$spark$sql$parquet$ParquetRelation2$MetadataCache$$readSchema(newParquet.scala:349)
      	at org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache$$anonfun$refresh$8.apply(newParquet.scala:290)
      	at org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache$$anonfun$refresh$8.apply(newParquet.scala:290)
      	at scala.Option.getOrElse(Option.scala:120)
      	at org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache.refresh(newParquet.scala:290)
      	at org.apache.spark.sql.parquet.ParquetRelation2.<init>(newParquet.scala:354)
      	at org.apache.spark.sql.hive.HiveMetastoreCatalog.org$apache$spark$sql$hive$HiveMetastoreCatalog$$convertToParquetRelation(HiveMetastoreCatalog.scala:218)
      	at org.apache.spark.sql.hive.HiveMetastoreCatalog$ParquetConversions$$anonfun$apply$4.apply(HiveMetastoreCatalog.scala:440)
      	at org.apache.spark.sql.hive.HiveMetastoreCatalog$ParquetConversions$$anonfun$apply$4.apply(HiveMetastoreCatalog.scala:439)
      	at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:51)
      	at scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:60)
      	at scala.collection.mutable.ArrayBuffer.foldLeft(ArrayBuffer.scala:47)
      	at org.apache.spark.sql.hive.HiveMetastoreCatalog$ParquetConversions$.apply(HiveMetastoreCatalog.scala:439)
      	at org.apache.spark.sql.hive.HiveMetastoreCatalog$ParquetConversions$.apply(HiveMetastoreCatalog.scala:416)
      	at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61)
      	at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59)
      	at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
      	at scala.collection.immutable.List.foldLeft(List.scala:84)
      	at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59)
      	at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51)
      	at scala.collection.immutable.List.foreach(List.scala:318)
      	at org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51)
      	at org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:917)
      	at org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:917)
      	at org.apache.spark.sql.SQLContext$QueryExecution.withCachedData$lzycompute(SQLContext.scala:918)
      	at org.apache.spark.sql.SQLContext$QueryExecution.withCachedData(SQLContext.scala:918)
      	at org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan$lzycompute(SQLContext.scala:919)
      	at org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan(SQLContext.scala:919)
      	at org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:924)
      	at org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:922)
      	at org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:928)
      	at org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:928)
      	at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:931)
      	at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:931)
      	at org.apache.spark.sql.hive.execution.CreateTableAsSelect.run(CreateTableAsSelect.scala:71)
      	at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:55)
      	at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:55)
      	at org.apache.spark.sql.execution.ExecutedCommand.execute(commands.scala:65)
      	at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:931)
      	at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:931)
      	at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:75)
      	at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:58)
      	at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:35)
      	at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:77)
      	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)
      	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
      	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28)
      	at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:30)
      	at $iwC$$iwC$$iwC$$iwC.<init>(<console>:32)
      	at $iwC$$iwC$$iwC.<init>(<console>:34)
      	at $iwC$$iwC.<init>(<console>:36)
      	at $iwC.<init>(<console>:38)
      	at <init>(<console>:40)
      	at .<init>(<console>:44)
      	at .<clinit>(<console>)
      	at .<init>(<console>:7)
      	at .<clinit>(<console>)
      	at $print(<console>)
      	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
      	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
      	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
      	at java.lang.reflect.Method.invoke(Method.java:606)
      	at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
      	at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
      	at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
      	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
      	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
      	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:856)
      	at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:901)
      	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:874)
      	at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:901)
      	at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:813)
      	at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:656)
      	at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:664)
      	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:669)
      	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:996)
      	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944)
      	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944)
      	at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
      	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:944)
      	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1058)
      	at org.apache.spark.repl.Main$.main(Main.scala:31)
      	at org.apache.spark.repl.Main.main(Main.scala)
      

      Attachments

        Activity

          People

            yhuai Yin Huai
            yhuai Yin Huai
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: