Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-15205

Codegen can compile the same source code more than twice

    Details

    • Type: Improvement
    • Status: Resolved
    • Priority: Major
    • Resolution: Fixed
    • Affects Version/s: 2.0.0
    • Fix Version/s: 1.6.2, 2.0.0
    • Component/s: SQL
    • Labels:
      None
    • Target Version/s:

      Description

      Sometimes, we have generated codes they are equal except for comments.

      One example is here.

      val df = sc.parallelize(1 to 10).toDF
      df.selectExpr("value + 1").show // query1
      df.selectExpr("value + 2").show // query2
      

      The following code is one of generated code when query1 above is executed.

      /* 001 */ 
      /* 002 */ public java.lang.Object generate(Object[] references) {
      /* 003 */   return new SpecificSafeProjection(references);
      /* 004 */ }
      /* 005 */ 
      /* 006 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
      /* 007 */   
      /* 008 */   private Object[] references;
      /* 009 */   private MutableRow mutableRow;
      /* 010 */   private Object[] values;
      /* 011 */   private org.apache.spark.sql.types.StructType schema;
      /* 012 */   
      /* 013 */   
      /* 014 */   public SpecificSafeProjection(Object[] references) {
      /* 015 */     this.references = references;
      /* 016 */     mutableRow = (MutableRow) references[references.length - 1];
      /* 017 */     
      /* 018 */     this.schema = (org.apache.spark.sql.types.StructType) references[0];
      /* 019 */   }
      /* 020 */   
      /* 021 */   public java.lang.Object apply(java.lang.Object _i) {
      /* 022 */     InternalRow i = (InternalRow) _i;
      /* 023 */     /* createexternalrow(if (isnull(input[0, int])) null else input[0, int], StructField((value + 1),IntegerType,false)) */
      /* 024 */     values = new Object[1];
      /* 025 */     /* if (isnull(input[0, int])) null else input[0, int] */
      /* 026 */     /* isnull(input[0, int]) */
      /* 027 */     /* input[0, int] */
      /* 028 */     int value3 = i.getInt(0);
      /* 029 */     boolean isNull1 = false;
      /* 030 */     int value1 = -1;
      /* 031 */     if (!false && false) {
      /* 032 */       /* null */
      /* 033 */       final int value4 = -1;
      /* 034 */       isNull1 = true;
      /* 035 */       value1 = value4;
      /* 036 */     } else {
      /* 037 */       /* input[0, int] */
      /* 038 */       int value5 = i.getInt(0);
      /* 039 */       isNull1 = false;
      /* 040 */       value1 = value5;
      /* 041 */     }
      /* 042 */     if (isNull1) {
      /* 043 */       values[0] = null;
      /* 044 */     } else {
      /* 045 */       values[0] = value1;
      /* 046 */     }
      /* 047 */     
      /* 048 */     final org.apache.spark.sql.Row value = new org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values, this.schema);
      /* 049 */     if (false) {
      /* 050 */       mutableRow.setNullAt(0);
      /* 051 */     } else {
      /* 052 */       
      /* 053 */       mutableRow.update(0, value);
      /* 054 */     }
      /* 055 */     
      /* 056 */     return mutableRow;
      /* 057 */   }
      /* 058 */ }
      /* 059 */ 
      

      On the other hand, the following code is for query2.

      /* 001 */ 
      /* 002 */ public java.lang.Object generate(Object[] references) {
      /* 003 */   return new SpecificSafeProjection(references);
      /* 004 */ }
      /* 005 */ 
      /* 006 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
      /* 007 */   
      /* 008 */   private Object[] references;
      /* 009 */   private MutableRow mutableRow;
      /* 010 */   private Object[] values;
      /* 011 */   private org.apache.spark.sql.types.StructType schema;
      /* 012 */   
      /* 013 */   
      /* 014 */   public SpecificSafeProjection(Object[] references) {
      /* 015 */     this.references = references;
      /* 016 */     mutableRow = (MutableRow) references[references.length - 1];
      /* 017 */     
      /* 018 */     this.schema = (org.apache.spark.sql.types.StructType) references[0];
      /* 019 */   }
      /* 020 */   
      /* 021 */   public java.lang.Object apply(java.lang.Object _i) {
      /* 022 */     InternalRow i = (InternalRow) _i;
      /* 023 */     /* createexternalrow(if (isnull(input[0, int])) null else input[0, int], StructField((value + 2),IntegerType,false)) */
      /* 024 */     values = new Object[1];
      /* 025 */     /* if (isnull(input[0, int])) null else input[0, int] */
      /* 026 */     /* isnull(input[0, int]) */
      /* 027 */     /* input[0, int] */
      /* 028 */     int value3 = i.getInt(0);
      /* 029 */     boolean isNull1 = false;
      /* 030 */     int value1 = -1;
      /* 031 */     if (!false && false) {
      /* 032 */       /* null */
      /* 033 */       final int value4 = -1;
      /* 034 */       isNull1 = true;
      /* 035 */       value1 = value4;
      /* 036 */     } else {
      /* 037 */       /* input[0, int] */
      /* 038 */       int value5 = i.getInt(0);
      /* 039 */       isNull1 = false;
      /* 040 */       value1 = value5;
      /* 041 */     }
      /* 042 */     if (isNull1) {
      /* 043 */       values[0] = null;
      /* 044 */     } else {
      /* 045 */       values[0] = value1;
      /* 046 */     }
      /* 047 */     
      /* 048 */     final org.apache.spark.sql.Row value = new org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values, this.schema);
      /* 049 */     if (false) {
      /* 050 */       mutableRow.setNullAt(0);
      /* 051 */     } else {
      /* 052 */       
      /* 053 */       mutableRow.update(0, value);
      /* 054 */     }
      /* 055 */     
      /* 056 */     return mutableRow;
      /* 057 */   }
      /* 058 */ }
      /* 059 */ 
      

      As you can notice, those two generated codes are essentially equal but not equal as String objects so they will be compiled each.

        Attachments

          Activity

            People

            • Assignee:
              sarutak Kousuke Saruta
              Reporter:
              sarutak Kousuke Saruta
            • Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

              • Created:
                Updated:
                Resolved: