Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-24901

Merge the codegen of RegularHashMap and fastHashMap to reduce compiler maxCodesize when VectorizedHashMap is false

    XMLWordPrintableJSON

    Details

    • Type: Improvement
    • Status: Resolved
    • Priority: Major
    • Resolution: Fixed
    • Affects Version/s: 2.4.0
    • Fix Version/s: 3.0.0
    • Component/s: SQL
    • Labels:
      None

      Description

      Currently, Generate code of update UnsafeRow in hash aggregation.
      FastHashMap and RegularHashMap are two separate codes,These two separate codes need only when VectorizedHashMap is true. but other cases, we can merge together to reduce compiler maxCodesize. thanks.
      case class DistinctAgg(a: Int, b: Float, c: Double, d: Int, e: String)
      spark.sparkContext.parallelize(
            DistinctAgg(8, 2, 3, 4, "a") ::
            DistinctAgg(9, 3, 4, 5, "b") ::Nil).toDF()createOrReplaceTempView("distinctAgg")
      val df = sql("select a,b,e, min(d) as mind, min(case when a > 10 then a else null end) as mincasea, min(a) as mina from distinctAgg group by a, b, e")
      println(org.apache.spark.sql.execution.debug.codegenString(df.queryExecution.executedPlan))
      df.show()

      Generate code like:
       Before modified:
      Generated code:
      /* 001 */ public Object generate(Object[] references)

      { /* 002 */   return new GeneratedIteratorForCodegenStage1(references); /* 003 */ }
      /* 004 */
      ...............
      /* 354 */
      /* 355 */     if (agg_fastAggBuffer_0 != null) {
      /* 356 */       // common sub-expressions
      /* 357 */
      /* 358 */       // evaluate aggregate function
      /* 359 */       agg_agg_isNull_31_0 = true;
      /* 360 */       int agg_value_34 = -1;
      /* 361 */
      /* 362 */       boolean agg_isNull_32 = agg_fastAggBuffer_0.isNullAt(0);
      /* 363 */       int agg_value_35 = agg_isNull_32 ?
      /* 364 */       -1 : (agg_fastAggBuffer_0.getInt(0));
      /* 365 */
      /* 366 */       if (!agg_isNull_32 && (agg_agg_isNull_31_0 ||
      /* 367 */           agg_value_34 > agg_value_35)) { /* 368 */         agg_agg_isNull_31_0 = false; /* 369 */         agg_value_34 = agg_value_35; /* 370 */       }
      /* 371 */
      /* 372 */       if (!false && (agg_agg_isNull_31_0 ||
      /* 373 */           agg_value_34 > agg_expr_2_0)) { /* 374 */         agg_agg_isNull_31_0 = false; /* 375 */         agg_value_34 = agg_expr_2_0; /* 376 */       }
      /* 377 */       agg_agg_isNull_34_0 = true;
      /* 378 */       int agg_value_37 = -1;
      /* 379 */
      /* 380 */       boolean agg_isNull_35 = agg_fastAggBuffer_0.isNullAt(1);
      /* 381 */       int agg_value_38 = agg_isNull_35 ?
      /* 382 */       -1 : (agg_fastAggBuffer_0.getInt(1));
      /* 383 */
      /* 384 */       if (!agg_isNull_35 && (agg_agg_isNull_34_0 ||
      /* 385 */           agg_value_37 > agg_value_38)) { /* 386 */         agg_agg_isNull_34_0 = false; /* 387 */         agg_value_37 = agg_value_38; /* 388 */       }
      /* 389 */
      /* 390 */       byte agg_caseWhenResultState_1 = -1;
      /* 391 */       do {
      /* 392 */         boolean agg_value_40 = false;
      /* 393 */         agg_value_40 = agg_expr_0_0 > 10;
      /* 394 */         if (!false && agg_value_40) { /* 395 */           agg_caseWhenResultState_1 = (byte)(false ? 1 : 0); /* 396 */           agg_agg_value_39_0 = agg_expr_0_0; /* 397 */           continue; /* 398 */         }
      /* 399 */
      /* 400 */         agg_caseWhenResultState_1 = (byte)(true ? 1 : 0);
      /* 401 */         agg_agg_value_39_0 = -1;
      /* 402 */
      /* 403 */       } while (false);
      /* 404 */       // TRUE if any condition is met and the result is null, or no any condition is met.
      /* 405 */       final boolean agg_isNull_36 = (agg_caseWhenResultState_1 != 0);
      /* 406 */
      /* 407 */       if (!agg_isNull_36 && (agg_agg_isNull_34_0 ||
      /* 408 */           agg_value_37 > agg_agg_value_39_0)) { /* 409 */         agg_agg_isNull_34_0 = false; /* 410 */         agg_value_37 = agg_agg_value_39_0; /* 411 */       }
      /* 412 */       agg_agg_isNull_42_0 = true;
      /* 413 */       int agg_value_45 = -1;
      /* 414 */
      /* 415 */       boolean agg_isNull_43 = agg_fastAggBuffer_0.isNullAt(2);
      /* 416 */       int agg_value_46 = agg_isNull_43 ?
      /* 417 */       -1 : (agg_fastAggBuffer_0.getInt(2));
      /* 418 */
      /* 419 */       if (!agg_isNull_43 && (agg_agg_isNull_42_0 ||
      /* 420 */           agg_value_45 > agg_value_46)) { /* 421 */         agg_agg_isNull_42_0 = false; /* 422 */         agg_value_45 = agg_value_46; /* 423 */       }
      /* 424 */
      /* 425 */       if (!false && (agg_agg_isNull_42_0 ||
      /* 426 */           agg_value_45 > agg_expr_0_0)) { /* 427 */         agg_agg_isNull_42_0 = false; /* 428 */         agg_value_45 = agg_expr_0_0; /* 429 */       }
      /* 430 */       // update fast row
      /* 431 */       agg_fastAggBuffer_0.setInt(0, agg_value_34);
      /* 432 */
      /* 433 */       if (!agg_agg_isNull_34_0) { /* 434 */         agg_fastAggBuffer_0.setInt(1, agg_value_37); /* 435 */       } else { /* 436 */         agg_fastAggBuffer_0.setNullAt(1); /* 437 */       }
      /* 438 */
      /* 439 */       agg_fastAggBuffer_0.setInt(2, agg_value_45);
      /* 440 */     } else {
      /* 441 */       // common sub-expressions
      /* 442 */
      /* 443 */       // evaluate aggregate function
      /* 444 */       agg_agg_isNull_17_0 = true;
      /* 445 */       int agg_value_20 = -1;
      /* 446 */
      /* 447 */       boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0);
      /* 448 */       int agg_value_21 = agg_isNull_18 ?
      /* 449 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
      /* 450 */
      /* 451 */       if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
      /* 452 */           agg_value_20 > agg_value_21)) { /* 453 */         agg_agg_isNull_17_0 = false; /* 454 */         agg_value_20 = agg_value_21; /* 455 */       }
      /* 456 */
      /* 457 */       if (!false && (agg_agg_isNull_17_0 ||
      /* 458 */           agg_value_20 > agg_expr_2_0)) { /* 459 */         agg_agg_isNull_17_0 = false; /* 460 */         agg_value_20 = agg_expr_2_0; /* 461 */       }
      /* 462 */       agg_agg_isNull_20_0 = true;
      /* 463 */       int agg_value_23 = -1;
      /* 464 */
      /* 465 */       boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1);
      /* 466 */       int agg_value_24 = agg_isNull_21 ?
      /* 467 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
      /* 468 */
      /* 469 */       if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
      /* 470 */           agg_value_23 > agg_value_24)) { /* 471 */         agg_agg_isNull_20_0 = false; /* 472 */         agg_value_23 = agg_value_24; /* 473 */       }
      /* 474 */
      /* 475 */       byte agg_caseWhenResultState_0 = -1;
      /* 476 */       do {
      /* 477 */         boolean agg_value_26 = false;
      /* 478 */         agg_value_26 = agg_expr_0_0 > 10;
      /* 479 */         if (!false && agg_value_26) { /* 480 */           agg_caseWhenResultState_0 = (byte)(false ? 1 : 0); /* 481 */           agg_agg_value_25_0 = agg_expr_0_0; /* 482 */           continue; /* 483 */         }
      /* 484 */
      /* 485 */         agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
      /* 486 */         agg_agg_value_25_0 = -1;
      /* 487 */
      /* 488 */       } while (false);
      /* 489 */       // TRUE if any condition is met and the result is null, or no any condition is met.
      /* 490 */       final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 0);
      /* 491 */
      /* 492 */       if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
      /* 493 */           agg_value_23 > agg_agg_value_25_0)) { /* 494 */         agg_agg_isNull_20_0 = false; /* 495 */         agg_value_23 = agg_agg_value_25_0; /* 496 */       }
      /* 497 */       agg_agg_isNull_28_0 = true;
      /* 498 */       int agg_value_31 = -1;
      /* 499 */
      /* 500 */       boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2);
      /* 501 */       int agg_value_32 = agg_isNull_29 ?
      /* 502 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
      /* 503 */
      /* 504 */       if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
      /* 505 */           agg_value_31 > agg_value_32)) { /* 506 */         agg_agg_isNull_28_0 = false; /* 507 */         agg_value_31 = agg_value_32; /* 508 */       }
      /* 509 */
      /* 510 */       if (!false && (agg_agg_isNull_28_0 ||
      /* 511 */           agg_value_31 > agg_expr_0_0)) { /* 512 */         agg_agg_isNull_28_0 = false; /* 513 */         agg_value_31 = agg_expr_0_0; /* 514 */       }
      /* 515 */       // update unsafe row buffer
      /* 516 */       agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
      /* 517 */
      /* 518 */       if (!agg_agg_isNull_20_0) { /* 519 */         agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23); /* 520 */       } else { /* 521 */         agg_unsafeRowAggBuffer_0.setNullAt(1); /* 522 */       }
      /* 523 */
      /* 524 */       agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
      /* 525 */
      /* 526 */     }
      /* 527 */
      /* 528 */   }
      ......................
      /* 554 */     // output the result
      /* 555 */
      /* 556 */     while (agg_fastHashMapIter_0.next()) { /* 557 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_fastHashMapIter_0.getKey(); /* 558 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_fastHashMapIter_0.getValue(); /* 559 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0); /* 560 */ /* 561 */       if (shouldStop()) return; /* 562 */     }
      /* 563 */     agg_fastHashMap_0.close();
      /* 564 */
      /* 565 */     while (agg_mapIter_0.next()) { /* 566 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey(); /* 567 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_mapIter_0.getValue(); /* 568 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0); /* 569 */ /* 570 */       if (shouldStop()) return; /* 571 */     }
      /* 572 */
      /* 573 */     agg_mapIter_0.close();
      /* 574 */     if (agg_sorter_0 == null) { /* 575 */       agg_hashMap_0.free(); /* 576 */     }
      /* 577 */   }
      /* 578 */
      /* 579 */ }

      whole codegen max code size:954

       After modified:
      Generated code:
      /* 001 */ public Object generate(Object[] references) { /* 002 */   return new GeneratedIteratorForCodegenStage1(references); /* 003 */ }

      /* 004 */
      .............
      /* 350 */
      /* 351 */     if (agg_fastAggBuffer_0 != null)

      { /* 352 */       agg_unsafeRowAggBuffer_0 = agg_fastAggBuffer_0; /* 353 */     }

      /* 354 */
      /* 355 */     // common sub-expressions
      /* 356 */
      /* 357 */     // evaluate aggregate function
      /* 358 */     agg_agg_isNull_17_0 = true;
      /* 359 */     int agg_value_20 = -1;
      /* 360 */
      /* 361 */     boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0);
      /* 362 */     int agg_value_21 = agg_isNull_18 ?
      /* 363 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
      /* 364 */
      /* 365 */     if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
      /* 366 */         agg_value_20 > agg_value_21))

      { /* 367 */       agg_agg_isNull_17_0 = false; /* 368 */       agg_value_20 = agg_value_21; /* 369 */     }

      /* 370 */
      /* 371 */     if (!false && (agg_agg_isNull_17_0 ||
      /* 372 */         agg_value_20 > agg_expr_2_0))

      { /* 373 */       agg_agg_isNull_17_0 = false; /* 374 */       agg_value_20 = agg_expr_2_0; /* 375 */     }

      /* 376 */     agg_agg_isNull_20_0 = true;
      /* 377 */     int agg_value_23 = -1;
      /* 378 */
      /* 379 */     boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1);
      /* 380 */     int agg_value_24 = agg_isNull_21 ?
      /* 381 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
      /* 382 */
      /* 383 */     if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
      /* 384 */         agg_value_23 > agg_value_24))

      { /* 385 */       agg_agg_isNull_20_0 = false; /* 386 */       agg_value_23 = agg_value_24; /* 387 */     }

      /* 388 */
      /* 389 */     byte agg_caseWhenResultState_0 = -1;
      /* 390 */     do {
      /* 391 */       boolean agg_value_26 = false;
      /* 392 */       agg_value_26 = agg_expr_0_0 > 10;
      /* 393 */       if (!false && agg_value_26)

      { /* 394 */         agg_caseWhenResultState_0 = (byte)(false ? 1 : 0); /* 395 */         agg_agg_value_25_0 = agg_expr_0_0; /* 396 */         continue; /* 397 */       }

      /* 398 */
      /* 399 */       agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
      /* 400 */       agg_agg_value_25_0 = -1;
      /* 401 */
      /* 402 */     } while (false);
      /* 403 */     // TRUE if any condition is met and the result is null, or no any condition is met.
      /* 404 */     final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 0);
      /* 405 */
      /* 406 */     if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
      /* 407 */         agg_value_23 > agg_agg_value_25_0))

      { /* 408 */       agg_agg_isNull_20_0 = false; /* 409 */       agg_value_23 = agg_agg_value_25_0; /* 410 */     }

      /* 411 */     agg_agg_isNull_28_0 = true;
      /* 412 */     int agg_value_31 = -1;
      /* 413 */
      /* 414 */     boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2);
      /* 415 */     int agg_value_32 = agg_isNull_29 ?
      /* 416 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
      /* 417 */
      /* 418 */     if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
      /* 419 */         agg_value_31 > agg_value_32))

      { /* 420 */       agg_agg_isNull_28_0 = false; /* 421 */       agg_value_31 = agg_value_32; /* 422 */     }

      /* 423 */
      /* 424 */     if (!false && (agg_agg_isNull_28_0 ||
      /* 425 */         agg_value_31 > agg_expr_0_0))

      { /* 426 */       agg_agg_isNull_28_0 = false; /* 427 */       agg_value_31 = agg_expr_0_0; /* 428 */     }

      /* 429 */     // update unsafe row buffer
      /* 430 */     agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
      /* 431 */
      /* 432 */     if (!agg_agg_isNull_20_0)

      { /* 433 */       agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23); /* 434 */     }

      else

      { /* 435 */       agg_unsafeRowAggBuffer_0.setNullAt(1); /* 436 */     }

      /* 437 */
      /* 438 */     agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
      /* 439 */
      /* 440 */   }
      /* 441 */
      ...........
      /* 466 */     // output the result
      /* 467 */
      /* 468 */     while (agg_fastHashMapIter_0.next())

      { /* 469 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_fastHashMapIter_0.getKey(); /* 470 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_fastHashMapIter_0.getValue(); /* 471 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0); /* 472 */ /* 473 */       if (shouldStop()) return; /* 474 */     }

      /* 475 */     agg_fastHashMap_0.close();
      /* 476 */
      /* 477 */     while (agg_mapIter_0.next())

      { /* 478 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey(); /* 479 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) agg_mapIter_0.getValue(); /* 480 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, agg_aggBuffer_0); /* 481 */ /* 482 */       if (shouldStop()) return; /* 483 */     }

      /* 484 */
      /* 485 */     agg_mapIter_0.close();
      /* 486 */     if (agg_sorter_0 == null)

      { /* 487 */       agg_hashMap_0.free(); /* 488 */     }

      /* 489 */   }
      /* 490 */
      /* 491 */ }

      whole codegen max code size:598

        Attachments

          Activity

            People

            • Assignee:
              heary-cao caoxuewen
              Reporter:
              heary-cao caoxuewen
            • Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

              • Created:
                Updated:
                Resolved: