Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-15205

Codegen can compile the same source code more than twice

Details

    • Improvement
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • 2.0.0
    • 1.6.2, 2.0.0
    • SQL
    • None

    Description

      Sometimes, we have generated codes they are equal except for comments.

      One example is here.

      val df = sc.parallelize(1 to 10).toDF
      df.selectExpr("value + 1").show // query1
      df.selectExpr("value + 2").show // query2
      

      The following code is one of generated code when query1 above is executed.

      /* 001 */ 
      /* 002 */ public java.lang.Object generate(Object[] references) {
      /* 003 */   return new SpecificSafeProjection(references);
      /* 004 */ }
      /* 005 */ 
      /* 006 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
      /* 007 */   
      /* 008 */   private Object[] references;
      /* 009 */   private MutableRow mutableRow;
      /* 010 */   private Object[] values;
      /* 011 */   private org.apache.spark.sql.types.StructType schema;
      /* 012 */   
      /* 013 */   
      /* 014 */   public SpecificSafeProjection(Object[] references) {
      /* 015 */     this.references = references;
      /* 016 */     mutableRow = (MutableRow) references[references.length - 1];
      /* 017 */     
      /* 018 */     this.schema = (org.apache.spark.sql.types.StructType) references[0];
      /* 019 */   }
      /* 020 */   
      /* 021 */   public java.lang.Object apply(java.lang.Object _i) {
      /* 022 */     InternalRow i = (InternalRow) _i;
      /* 023 */     /* createexternalrow(if (isnull(input[0, int])) null else input[0, int], StructField((value + 1),IntegerType,false)) */
      /* 024 */     values = new Object[1];
      /* 025 */     /* if (isnull(input[0, int])) null else input[0, int] */
      /* 026 */     /* isnull(input[0, int]) */
      /* 027 */     /* input[0, int] */
      /* 028 */     int value3 = i.getInt(0);
      /* 029 */     boolean isNull1 = false;
      /* 030 */     int value1 = -1;
      /* 031 */     if (!false && false) {
      /* 032 */       /* null */
      /* 033 */       final int value4 = -1;
      /* 034 */       isNull1 = true;
      /* 035 */       value1 = value4;
      /* 036 */     } else {
      /* 037 */       /* input[0, int] */
      /* 038 */       int value5 = i.getInt(0);
      /* 039 */       isNull1 = false;
      /* 040 */       value1 = value5;
      /* 041 */     }
      /* 042 */     if (isNull1) {
      /* 043 */       values[0] = null;
      /* 044 */     } else {
      /* 045 */       values[0] = value1;
      /* 046 */     }
      /* 047 */     
      /* 048 */     final org.apache.spark.sql.Row value = new org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values, this.schema);
      /* 049 */     if (false) {
      /* 050 */       mutableRow.setNullAt(0);
      /* 051 */     } else {
      /* 052 */       
      /* 053 */       mutableRow.update(0, value);
      /* 054 */     }
      /* 055 */     
      /* 056 */     return mutableRow;
      /* 057 */   }
      /* 058 */ }
      /* 059 */ 
      

      On the other hand, the following code is for query2.

      /* 001 */ 
      /* 002 */ public java.lang.Object generate(Object[] references) {
      /* 003 */   return new SpecificSafeProjection(references);
      /* 004 */ }
      /* 005 */ 
      /* 006 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
      /* 007 */   
      /* 008 */   private Object[] references;
      /* 009 */   private MutableRow mutableRow;
      /* 010 */   private Object[] values;
      /* 011 */   private org.apache.spark.sql.types.StructType schema;
      /* 012 */   
      /* 013 */   
      /* 014 */   public SpecificSafeProjection(Object[] references) {
      /* 015 */     this.references = references;
      /* 016 */     mutableRow = (MutableRow) references[references.length - 1];
      /* 017 */     
      /* 018 */     this.schema = (org.apache.spark.sql.types.StructType) references[0];
      /* 019 */   }
      /* 020 */   
      /* 021 */   public java.lang.Object apply(java.lang.Object _i) {
      /* 022 */     InternalRow i = (InternalRow) _i;
      /* 023 */     /* createexternalrow(if (isnull(input[0, int])) null else input[0, int], StructField((value + 2),IntegerType,false)) */
      /* 024 */     values = new Object[1];
      /* 025 */     /* if (isnull(input[0, int])) null else input[0, int] */
      /* 026 */     /* isnull(input[0, int]) */
      /* 027 */     /* input[0, int] */
      /* 028 */     int value3 = i.getInt(0);
      /* 029 */     boolean isNull1 = false;
      /* 030 */     int value1 = -1;
      /* 031 */     if (!false && false) {
      /* 032 */       /* null */
      /* 033 */       final int value4 = -1;
      /* 034 */       isNull1 = true;
      /* 035 */       value1 = value4;
      /* 036 */     } else {
      /* 037 */       /* input[0, int] */
      /* 038 */       int value5 = i.getInt(0);
      /* 039 */       isNull1 = false;
      /* 040 */       value1 = value5;
      /* 041 */     }
      /* 042 */     if (isNull1) {
      /* 043 */       values[0] = null;
      /* 044 */     } else {
      /* 045 */       values[0] = value1;
      /* 046 */     }
      /* 047 */     
      /* 048 */     final org.apache.spark.sql.Row value = new org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values, this.schema);
      /* 049 */     if (false) {
      /* 050 */       mutableRow.setNullAt(0);
      /* 051 */     } else {
      /* 052 */       
      /* 053 */       mutableRow.update(0, value);
      /* 054 */     }
      /* 055 */     
      /* 056 */     return mutableRow;
      /* 057 */   }
      /* 058 */ }
      /* 059 */ 
      

      As you can notice, those two generated codes are essentially equal but not equal as String objects so they will be compiled each.

      Attachments

        Activity

          Transition Time In Source Status Execution Times
          Apache Spark made transition -
          Open In Progress
          22m 26s 1
          Kousuke Saruta made transition -
          In Progress Resolved
          13d 6h 3m 1

          People

            sarutak Kousuke Saruta
            sarutak Kousuke Saruta
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: