Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-10113

Support for unsigned Parquet logical types

    XMLWordPrintableJSON

Details

    • Improvement
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • 1.5.0
    • 1.6.0
    • SQL
    • None

    Description

      Add support for unsigned Parquet logical types UINT_16, UINT_32 and UINT_64.

      org.apache.spark.sql.AnalysisException: Illegal Parquet type: INT64 (UINT_64);
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.illegalType$1(CatalystSchemaConverter.scala:130)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convertPrimitiveField(CatalystSchemaConverter.scala:169)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convertField(CatalystSchemaConverter.scala:115)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:97)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:94)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:742)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
      	at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
      	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
      	at scala.collection.TraversableLike$class.map(TraversableLike.scala:245)
      	at scala.collection.AbstractTraversable.map(Traversable.scala:104)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.org$apache$spark$sql$parquet$CatalystSchemaConverter$$convert(CatalystSchemaConverter.scala:94)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$convertGroupField$1.apply(CatalystSchemaConverter.scala:200)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$convertGroupField$1.apply(CatalystSchemaConverter.scala:200)
      	at scala.Option.fold(Option.scala:158)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convertGroupField(CatalystSchemaConverter.scala:200)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convertField(CatalystSchemaConverter.scala:116)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:97)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:94)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:742)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
      	at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
      	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
      	at scala.collection.TraversableLike$class.map(TraversableLike.scala:245)
      	at scala.collection.AbstractTraversable.map(Traversable.scala:104)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.org$apache$spark$sql$parquet$CatalystSchemaConverter$$convert(CatalystSchemaConverter.scala:94)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convert(CatalystSchemaConverter.scala:91)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$readSchemaFromFooter$2.apply(ParquetRelation.scala:734)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$readSchemaFromFooter$2.apply(ParquetRelation.scala:734)
      	at scala.Option.getOrElse(Option.scala:121)
      	at org.apache.spark.sql.parquet.ParquetRelation$.readSchemaFromFooter(ParquetRelation.scala:734)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28$$anonfun$apply$8.apply(ParquetRelation.scala:714)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28$$anonfun$apply$8.apply(ParquetRelation.scala:713)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:742)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
      	at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
      	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
      	at scala.collection.TraversableLike$class.map(TraversableLike.scala:245)
      	at scala.collection.AbstractTraversable.map(Traversable.scala:104)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28.apply(ParquetRelation.scala:713)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28.apply(ParquetRelation.scala:692)
      	at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706)
      	at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706)
      	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
      	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
      	at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
      	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
      	at org.apache.spark.scheduler.Task.run(Task.scala:88)
      	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
      	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
      	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
      	at java.lang.Thread.run(Thread.java:745)
      15/08/19 07:10:26 WARN TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1, localhost): org.apache.spark.sql.AnalysisException: Illegal Parquet type: INT64 (UINT_64);
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.illegalType$1(CatalystSchemaConverter.scala:130)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convertPrimitiveField(CatalystSchemaConverter.scala:169)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convertField(CatalystSchemaConverter.scala:115)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:97)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:94)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:742)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
      	at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
      	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
      	at scala.collection.TraversableLike$class.map(TraversableLike.scala:245)
      	at scala.collection.AbstractTraversable.map(Traversable.scala:104)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.org$apache$spark$sql$parquet$CatalystSchemaConverter$$convert(CatalystSchemaConverter.scala:94)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$convertGroupField$1.apply(CatalystSchemaConverter.scala:200)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$convertGroupField$1.apply(CatalystSchemaConverter.scala:200)
      	at scala.Option.fold(Option.scala:158)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convertGroupField(CatalystSchemaConverter.scala:200)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convertField(CatalystSchemaConverter.scala:116)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:97)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter$$anonfun$2.apply(CatalystSchemaConverter.scala:94)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:742)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
      	at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
      	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
      	at scala.collection.TraversableLike$class.map(TraversableLike.scala:245)
      	at scala.collection.AbstractTraversable.map(Traversable.scala:104)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.org$apache$spark$sql$parquet$CatalystSchemaConverter$$convert(CatalystSchemaConverter.scala:94)
      	at org.apache.spark.sql.parquet.CatalystSchemaConverter.convert(CatalystSchemaConverter.scala:91)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$readSchemaFromFooter$2.apply(ParquetRelation.scala:734)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$readSchemaFromFooter$2.apply(ParquetRelation.scala:734)
      	at scala.Option.getOrElse(Option.scala:121)
      	at org.apache.spark.sql.parquet.ParquetRelation$.readSchemaFromFooter(ParquetRelation.scala:734)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28$$anonfun$apply$8.apply(ParquetRelation.scala:714)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28$$anonfun$apply$8.apply(ParquetRelation.scala:713)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:742)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
      	at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
      	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
      	at scala.collection.TraversableLike$class.map(TraversableLike.scala:245)
      	at scala.collection.AbstractTraversable.map(Traversable.scala:104)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28.apply(ParquetRelation.scala:713)
      	at org.apache.spark.sql.parquet.ParquetRelation$$anonfun$28.apply(ParquetRelation.scala:692)
      	at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706)
      	at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:706)
      	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
      	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
      	at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
      	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
      	at org.apache.spark.scheduler.Task.run(Task.scala:88)
      	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
      	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
      	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
      	at java.lang.Thread.run(Thread.java:745)
      

      Attachments

        Issue Links

          Activity

            People

              gurwls223 Hyukjin Kwon
              jwthomas113 Jordan Thomas
              Votes:
              0 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: