Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-25974

Optimizes Generates bytecode for ordering based on the given order

    XMLWordPrintableJSON

Details

    • Improvement
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • 2.4.1
    • 3.0.0
    • SQL
    • None

    Description

      Currently, when generates the code for ordering based on the given order, too many variables and assignment statements will be generated, which is not necessary. This PR will eliminate redundant variables. Optimizes Generates bytecode for ordering based on the given order.
      The generated code looks like:

      spark.range(1).selectExpr(
           "id as key",
           "(id & 1023) as value1",
      "cast(id & 1023 as double) as value2",
      "cast(id & 1023 as int) as value3"
      ).select("value1", "value2", "value3").orderBy("value1", "value2").collect()

      before PR(codegen size: 178)

      Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1, double, false] ASC NULLS FIRST:
      /* 001 */ public SpecificOrdering generate(Object[] references)

      { /* 002 */   return new SpecificOrdering(references); /* 003 */ }
      /* 004 */
      /* 005 */ class SpecificOrdering extends org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering {
      /* 006 */
      /* 007 */   private Object[] references;
      /* 008 */
      /* 009 */
      /* 010 */   public SpecificOrdering(Object[] references) { /* 011 */     this.references = references; /* 012 */ /* 013 */   }
      /* 014 */
      /* 015 */   public int compare(InternalRow a, InternalRow b) {
      /* 016 */
      /* 017 */     InternalRow i = null;
      /* 018 */
      /* 019 */     i = a;
      /* 020 */     boolean isNullA_0;
      /* 021 */     long primitiveA_0;
      /* 022 */     { /* 023 */       long value_0 = i.getLong(0); /* 024 */       isNullA_0 = false; /* 025 */       primitiveA_0 = value_0; /* 026 */     }
      /* 027 */     i = b;
      /* 028 */     boolean isNullB_0;
      /* 029 */     long primitiveB_0;
      /* 030 */     { /* 031 */       long value_0 = i.getLong(0); /* 032 */       isNullB_0 = false; /* 033 */       primitiveB_0 = value_0; /* 034 */     }
      /* 035 */     if (isNullA_0 && isNullB_0) { /* 036 */       // Nothing /* 037 */     } else if (isNullA_0) { /* 038 */       return -1; /* 039 */     } else if (isNullB_0) { /* 040 */       return 1; /* 041 */     } else {
      /* 042 */       int comp = (primitiveA_0 > primitiveB_0 ? 1 : primitiveA_0 < primitiveB_0 ? -1 : 0);
      /* 043 */       if (comp != 0) { /* 044 */         return comp; /* 045 */       }
      /* 046 */     }
      /* 047 */
      /* 048 */     i = a;
      /* 049 */     boolean isNullA_1;
      /* 050 */     double primitiveA_1;
      /* 051 */     { /* 052 */       double value_1 = i.getDouble(1); /* 053 */       isNullA_1 = false; /* 054 */       primitiveA_1 = value_1; /* 055 */     }
      /* 056 */     i = b;
      /* 057 */     boolean isNullB_1;
      /* 058 */     double primitiveB_1;
      /* 059 */     { /* 060 */       double value_1 = i.getDouble(1); /* 061 */       isNullB_1 = false; /* 062 */       primitiveB_1 = value_1; /* 063 */     }
      /* 064 */     if (isNullA_1 && isNullB_1) { /* 065 */       // Nothing /* 066 */     } else if (isNullA_1) { /* 067 */       return -1; /* 068 */     } else if (isNullB_1) { /* 069 */       return 1; /* 070 */     } else {
      /* 071 */       int comp = org.apache.spark.util.Utils.nanSafeCompareDoubles(primitiveA_1, primitiveB_1);
      /* 072 */       if (comp != 0) { /* 073 */         return comp; /* 074 */       }
      /* 075 */     }
      /* 076 */
      /* 077 */
      /* 078 */     return 0;
      /* 079 */   }
      /* 080 */
      /* 081 */
      /* 082 */ }

      After PR(codegen size: 89)
      Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1, double, false] ASC NULLS FIRST:
      /* 001 */ public SpecificOrdering generate(Object[] references) { /* 002 */   return new SpecificOrdering(references); /* 003 */ }

      /* 004 */
      /* 005 */ class SpecificOrdering extends org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering {
      /* 006 */
      /* 007 */   private Object[] references;
      /* 008 */
      /* 009 */
      /* 010 */   public SpecificOrdering(Object[] references)

      { /* 011 */     this.references = references; /* 012 */ /* 013 */   }

      /* 014 */
      /* 015 */   public int compare(InternalRow a, InternalRow b) {
      /* 016 */
      /* 017 */
      /* 018 */     long value_0 = a.getLong(0);
      /* 019 */     long value_2 = b.getLong(0);
      /* 020 */     if (false && false)

      { /* 021 */       // Nothing /* 022 */     }

      else if (false)

      { /* 023 */       return -1; /* 024 */     }

      else if (false)

      { /* 025 */       return 1; /* 026 */     }

      else {
      /* 027 */       int comp = (value_0 > value_2 ? 1 : value_0 < value_2 ? -1 : 0);
      /* 028 */       if (comp != 0)

      { /* 029 */         return comp; /* 030 */       }

      /* 031 */     }
      /* 032 */
      /* 033 */     double value_1 = a.getDouble(1);
      /* 034 */     double value_3 = b.getDouble(1);
      /* 035 */     if (false && false)

      { /* 036 */       // Nothing /* 037 */     }

      else if (false)

      { /* 038 */       return -1; /* 039 */     }

      else if (false)

      { /* 040 */       return 1; /* 041 */     }

      else {
      /* 042 */       int comp = org.apache.spark.util.Utils.nanSafeCompareDoubles(value_1, value_3);
      /* 043 */       if (comp != 0)

      { /* 044 */         return comp; /* 045 */       }

      /* 046 */     }
      /* 047 */
      /* 048 */
      /* 049 */     return 0;
      /* 050 */   }
      /* 051 */
      /* 052 */
      /* 053 */ }

      Attachments

        Activity

          People

            heary-cao caoxuewen
            heary-cao caoxuewen
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: