Uploaded image for project: 'Parquet'
  1. Parquet
  2. PARQUET-1488

UserDefinedPredicate throw NPE

    XMLWordPrintableJSON

Details

    Description

      It throws NullPointerException after upgrade parquet to 1.11.0 when using UserDefinedPredicate.

      The  UserDefinedPredicate is:

      new UserDefinedPredicate[Binary] with Serializable {                                  
        private val strToBinary = Binary.fromReusedByteArray(v.getBytes)                    
        private val size = strToBinary.length                                               
                                                                                            
        override def canDrop(statistics: Statistics[Binary]): Boolean = {                   
          val comparator = PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR   
          val max = statistics.getMax                                                       
          val min = statistics.getMin                                                       
          comparator.compare(max.slice(0, math.min(size, max.length)), strToBinary) < 0 ||  
            comparator.compare(min.slice(0, math.min(size, min.length)), strToBinary) > 0   
        }                                                                                   
                                                                                            
        override def inverseCanDrop(statistics: Statistics[Binary]): Boolean = {            
          val comparator = PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR   
          val max = statistics.getMax                                                       
          val min = statistics.getMin                                                       
          comparator.compare(max.slice(0, math.min(size, max.length)), strToBinary) == 0 && 
            comparator.compare(min.slice(0, math.min(size, min.length)), strToBinary) == 0  
        }                                                                                   
                                                                                            
        override def keep(value: Binary): Boolean = {                                       
          UTF8String.fromBytes(value.getBytes).startsWith(                                  
            UTF8String.fromBytes(strToBinary.getBytes))                                     
        }                                                                                   
      }                                                                                     
      

      The stack trace is:

      java.lang.NullPointerException
      	at org.apache.spark.sql.execution.datasources.parquet.ParquetFilters$$anon$1.keep(ParquetFilters.scala:573)
      	at org.apache.spark.sql.execution.datasources.parquet.ParquetFilters$$anon$1.keep(ParquetFilters.scala:552)
      	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:152)
      	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
      	at org.apache.parquet.filter2.predicate.Operators$UserDefined.accept(Operators.java:377)
      	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:181)
      	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
      	at org.apache.parquet.filter2.predicate.Operators$And.accept(Operators.java:309)
      	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
      	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
      

      Attachments

        Issue Links

          Activity

            People

              gszadovszky Gabor Szadovszky
              yumwang Yuming Wang
              Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: