Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-17733

InferFiltersFromConstraints rule never terminates for query

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Critical
    • Resolution: Fixed
    • 2.0.0
    • 2.0.2, 2.1.0
    • SQL
    • None

    Description

      The following (complicated) example becomes stuck in the InferFiltersFromConstraints rule and never runs. However, it doesn't fail with a stack overflow and doesn't hit the limit on optimization passes, so I think there's some sort of non-obvious infinite loop within the rule itself.

      Table Creation
       -- Query #0
      
      CREATE TEMPORARY VIEW table_4(float_col_1, boolean_col_2, decimal2610_col_3, boolean_col_4, timestamp_col_5, boolean_col_6, bigint_col_7, timestamp_col_8) AS VALUES
        (CAST(21.920416 AS FLOAT), false, -182.0700000000BD, true, TIMESTAMP('1996-10-24 00:00:00.0'), true, CAST(-993 AS BIGINT), TIMESTAMP('2007-01-13 00:00:00.0')),
        (CAST(722.4906 AS FLOAT), true, 497.5400000000BD, true, TIMESTAMP('2015-12-14 00:00:00.0'), false, CAST(268 AS BIGINT), TIMESTAMP('2021-04-19 00:00:00.0')),
        (CAST(534.9996 AS FLOAT), true, -470.8300000000BD, true, TIMESTAMP('1996-01-31 00:00:00.0'), false, CAST(-910 AS BIGINT), TIMESTAMP('2019-10-16 00:00:00.0')),
        (CAST(-289.6454 AS FLOAT), false, 892.2500000000BD, false, TIMESTAMP('2014-03-14 00:00:00.0'), false, CAST(-462 AS BIGINT), CAST(NULL AS TIMESTAMP)),
        (CAST(46.395535 AS FLOAT), true, -662.8900000000BD, true, TIMESTAMP('2000-10-16 00:00:00.0'), false, CAST(-656 AS BIGINT), TIMESTAMP('2024-09-01 00:00:00.0')),
        (CAST(-555.36285 AS FLOAT), true, -938.9300000000BD, true, TIMESTAMP('2007-04-10 00:00:00.0'), true, CAST(252 AS BIGINT), TIMESTAMP('2028-12-03 00:00:00.0')),
        (CAST(826.29004 AS FLOAT), true, 53.1800000000BD, false, TIMESTAMP('2004-06-11 00:00:00.0'), false, CAST(437 AS BIGINT), TIMESTAMP('1994-04-04 00:00:00.0')),
        (CAST(-15.276999 AS FLOAT), CAST(NULL AS BOOLEAN), -889.3100000000BD, true, TIMESTAMP('1991-05-23 00:00:00.0'), true, CAST(226 AS BIGINT), TIMESTAMP('2023-07-08 00:00:00.0')),
        (CAST(385.27386 AS FLOAT), CAST(NULL AS BOOLEAN), -9.9500000000BD, false, TIMESTAMP('2022-10-22 00:00:00.0'), true, CAST(430 AS BIGINT), TIMESTAMP('2013-09-29 00:00:00.0')),
        (CAST(988.7868 AS FLOAT), CAST(NULL AS BOOLEAN), 715.1700000000BD, false, TIMESTAMP('2026-10-03 00:00:00.0'), true, CAST(-696 AS BIGINT), TIMESTAMP('1990-08-10 00:00:00.0'))
           ;
      
      
       -- Query #1
      
      CREATE TEMPORARY VIEW table_1(double_col_1, boolean_col_2, timestamp_col_3, smallint_col_4, boolean_col_5, int_col_6, timestamp_col_7, varchar0008_col_8, int_col_9, string_col_10) AS VALUES
        (CAST(-147.818640624 AS DOUBLE), CAST(NULL AS BOOLEAN), TIMESTAMP('2012-10-19 00:00:00.0'), CAST(9 AS SMALLINT), false, 77, TIMESTAMP('2014-07-01 00:00:00.0'), '-945', -646, '722'),
        (CAST(594.195125271 AS DOUBLE), false, TIMESTAMP('2016-12-04 00:00:00.0'), CAST(NULL AS SMALLINT), CAST(NULL AS BOOLEAN), CAST(NULL AS INT), TIMESTAMP('1999-12-26 00:00:00.0'), '250', -861, '55'),
        (CAST(-454.171126363 AS DOUBLE), false, TIMESTAMP('2008-12-13 00:00:00.0'), CAST(NULL AS SMALLINT), false, -783, TIMESTAMP('2010-05-28 00:00:00.0'), '211', -959, CAST(NULL AS STRING)),
        (CAST(437.670945524 AS DOUBLE), true, TIMESTAMP('2011-10-16 00:00:00.0'), CAST(952 AS SMALLINT), true, 297, TIMESTAMP('2013-01-13 00:00:00.0'), '262', CAST(NULL AS INT), '936'),
        (CAST(-387.226759334 AS DOUBLE), false, TIMESTAMP('2019-10-03 00:00:00.0'), CAST(-496 AS SMALLINT), CAST(NULL AS BOOLEAN), -925, TIMESTAMP('2028-06-27 00:00:00.0'), '-657', 948, '18'),
        (CAST(-306.138230875 AS DOUBLE), true, TIMESTAMP('1997-10-07 00:00:00.0'), CAST(332 AS SMALLINT), false, 744, TIMESTAMP('1990-09-22 00:00:00.0'), '-345', 566, '-574'),
        (CAST(675.402140308 AS DOUBLE), false, TIMESTAMP('2017-06-26 00:00:00.0'), CAST(972 AS SMALLINT), true, CAST(NULL AS INT), TIMESTAMP('2026-06-10 00:00:00.0'), '518', 683, '-320'),
        (CAST(734.839647174 AS DOUBLE), true, TIMESTAMP('1995-06-01 00:00:00.0'), CAST(-792 AS SMALLINT), CAST(NULL AS BOOLEAN), CAST(NULL AS INT), TIMESTAMP('2021-07-11 00:00:00.0'), '-318', 564, '142'),
        (CAST(-836.513475295 AS DOUBLE), true, TIMESTAMP('2027-01-02 00:00:00.0'), CAST(-446 AS SMALLINT), true, CAST(NULL AS INT), TIMESTAMP('1993-09-01 00:00:00.0'), '771', CAST(NULL AS INT), '977'),
        (CAST(-768.883638815 AS DOUBLE), false, TIMESTAMP('1994-02-11 00:00:00.0'), CAST(-244 AS SMALLINT), true, -493, TIMESTAMP('1994-01-02 00:00:00.0'), '-921', CAST(NULL AS INT), '-409')
           ;
      
      
       -- Query #2
      
      CREATE TEMPORARY VIEW table_5(float_col_1, varchar0138_col_2, string_col_3, decimal2211_col_4, float_col_5, string_col_6, timestamp_col_7, varchar0207_col_8) AS VALUES
        (CAST(-885.7606 AS FLOAT), '-740', '680', -929.06000000000BD, CAST(NULL AS FLOAT), '-915', TIMESTAMP('1994-09-12 00:00:00.0'), CAST(NULL AS STRING)),
        (CAST(NULL AS FLOAT), '489', '692', -220.60000000000BD, CAST(939.18964 AS FLOAT), '-514', CAST(NULL AS TIMESTAMP), '181'),
        (CAST(210.7055 AS FLOAT), '44', CAST(NULL AS STRING), -174.70000000000BD, CAST(760.21045 AS FLOAT), '325', TIMESTAMP('2019-09-25 00:00:00.0'), '505'),
        (CAST(952.8074 AS FLOAT), '838', '705', CAST(NULL AS DECIMAL(22,11)), CAST(NULL AS FLOAT), '-62', TIMESTAMP('2029-05-22 00:00:00.0'), CAST(NULL AS STRING)),
        (CAST(-113.300446 AS FLOAT), '-210', '765', CAST(NULL AS DECIMAL(22,11)), CAST(-819.2468 AS FLOAT), '-829', CAST(NULL AS TIMESTAMP), '465'),
        (CAST(-739.9902 AS FLOAT), '614', '-393', -509.22000000000BD, CAST(-339.78568 AS FLOAT), '568', TIMESTAMP('2013-05-14 00:00:00.0'), '305'),
        (CAST(976.0611 AS FLOAT), '670', '71', 663.23000000000BD, CAST(-685.9362 AS FLOAT), '42', CAST(NULL AS TIMESTAMP), '150'),
        (CAST(NULL AS FLOAT), '302', '-404', -349.42000000000BD, CAST(2.113715 AS FLOAT), '-703', TIMESTAMP('2003-01-09 00:00:00.0'), '-863'),
        (CAST(-40.604317 AS FLOAT), '856', '632', 844.57000000000BD, CAST(-730.8376 AS FLOAT), '151', TIMESTAMP('2021-05-11 00:00:00.0'), '494'),
        (CAST(884.62714 AS FLOAT), '-195', '960', -664.40000000000BD, CAST(374.4844 AS FLOAT), '814', TIMESTAMP('2006-06-12 00:00:00.0'), '-900')
           ;
      
      
       -- Query #3
      
      CREATE TEMPORARY VIEW table_2(bigint_col_1, boolean_col_2, double_col_3, double_col_4, double_col_5, varchar0164_col_6) AS VALUES
        (CAST(-374 AS BIGINT), CAST(NULL AS BOOLEAN), CAST(939.626553676 AS DOUBLE), CAST(-777.275379746 AS DOUBLE), CAST(235.613760023 AS DOUBLE), '86'),
        (CAST(324 AS BIGINT), true, CAST(-507.23760783 AS DOUBLE), CAST(NULL AS DOUBLE), CAST(966.753434439 AS DOUBLE), '304'),
        (CAST(882 AS BIGINT), false, CAST(-366.529706229 AS DOUBLE), CAST(787.000491043 AS DOUBLE), CAST(-331.333188698 AS DOUBLE), '158'),
        (CAST(-510 AS BIGINT), CAST(NULL AS BOOLEAN), CAST(-855.344932257 AS DOUBLE), CAST(-858.167264921 AS DOUBLE), CAST(NULL AS DOUBLE), '-419'),
        (CAST(-13 AS BIGINT), false, CAST(589.966987492 AS DOUBLE), CAST(NULL AS DOUBLE), CAST(-653.515783257 AS DOUBLE), '970'),
        (CAST(-361 AS BIGINT), true, CAST(-413.021011259 AS DOUBLE), CAST(-716.638705947 AS DOUBLE), CAST(-936.480108205 AS DOUBLE), '807'),
        (CAST(815 AS BIGINT), true, CAST(-643.690268711 AS DOUBLE), CAST(-684.206112496 AS DOUBLE), CAST(335.557479371 AS DOUBLE), '-872'),
        (CAST(617 AS BIGINT), true, CAST(-93.3806447556 AS DOUBLE), CAST(-322.66171021 AS DOUBLE), CAST(-951.18299435 AS DOUBLE), '-167'),
        (CAST(-876 AS BIGINT), false, CAST(-481.774062168 AS DOUBLE), CAST(-204.40537387 AS DOUBLE), CAST(224.889845986 AS DOUBLE), '-986'),
        (CAST(2 AS BIGINT), false, CAST(462.843898322 AS DOUBLE), CAST(-9.85549856798 AS DOUBLE), CAST(-549.875829922 AS DOUBLE), '121')
           ;
      
      
       -- Query #4
      
      CREATE TEMPORARY VIEW table_3(string_col_1, float_col_2, timestamp_col_3, boolean_col_4, timestamp_col_5, decimal3317_col_6) AS VALUES
        ('-450', CAST(-903.6053 AS FLOAT), CAST(NULL AS TIMESTAMP), true, TIMESTAMP('2020-08-22 00:00:00.0'), -376.39000000000000000BD),
        ('698', CAST(402.56534 AS FLOAT), TIMESTAMP('2013-10-13 00:00:00.0'), true, TIMESTAMP('2012-11-06 00:00:00.0'), -498.81000000000000000BD),
        ('139', CAST(-895.7336 AS FLOAT), TIMESTAMP('2018-09-08 00:00:00.0'), true, TIMESTAMP('2019-03-13 00:00:00.0'), CAST(NULL AS DECIMAL(33,17))),
        ('616', CAST(-464.9475 AS FLOAT), TIMESTAMP('2028-05-18 00:00:00.0'), true, TIMESTAMP('2016-05-22 00:00:00.0'), -109.88000000000000000BD),
        ('943', CAST(605.42303 AS FLOAT), TIMESTAMP('1996-08-04 00:00:00.0'), false, TIMESTAMP('2028-05-18 00:00:00.0'), 201.36000000000000000BD),
        ('-764', CAST(-503.56726 AS FLOAT), TIMESTAMP('1990-02-28 00:00:00.0'), false, CAST(NULL AS TIMESTAMP), 211.25000000000000000BD),
        ('-587', CAST(84.67886 AS FLOAT), TIMESTAMP('2013-06-06 00:00:00.0'), true, TIMESTAMP('2022-05-07 00:00:00.0'), 90.75000000000000000BD),
        ('712', CAST(141.08926 AS FLOAT), TIMESTAMP('2001-05-12 00:00:00.0'), true, TIMESTAMP('2019-11-22 00:00:00.0'), 929.89000000000000000BD),
        ('948', CAST(0.74294764 AS FLOAT), TIMESTAMP('2002-06-14 00:00:00.0'), false, TIMESTAMP('1990-01-13 00:00:00.0'), -100.90000000000000000BD),
        ('-201', CAST(366.82578 AS FLOAT), TIMESTAMP('2015-11-28 00:00:00.0'), false, CAST(NULL AS TIMESTAMP), 196.33000000000000000BD)
           ;
      
      Query
      SELECT
      t1.int_col_2,
      (t1.bigint_col_7) / (t2.double_col_3) AS float_col,
      TRIM(t2.varchar0164_col_6) AS char_col
      FROM (
      SELECT
      COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7) AS int_col,
      t1.bigint_col_7,
      t2.bigint_col_7 AS int_col_1,
      t1.bigint_col_7 AS int_col_2,
      COALESCE(t2.bigint_col_7, (COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7)) - (t1.bigint_col_7), MIN(t2.bigint_col_7)) AS int_col_3
      FROM table_4 t1
      INNER JOIN table_4 t2 ON ((t2.timestamp_col_5) = (t1.timestamp_col_8)) AND ((t2.decimal2610_col_3) = (t1.decimal2610_col_3))
      WHERE
      (t1.bigint_col_7) IN (t2.bigint_col_7, t2.bigint_col_7)
      GROUP BY
      COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7),
      t1.bigint_col_7,
      t2.bigint_col_7,
      t1.bigint_col_7
      HAVING
      (MIN(COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7))) NOT IN (423.13, t2.bigint_col_7)
      UNION
      SELECT
      MIN((436) * (927)) OVER (ORDER BY (t2.int_col_6) * (NULL) DESC, (t1.smallint_col_4) - (t2.int_col_6) DESC ROWS BETWEEN 96 PRECEDING AND 16 PRECEDING) AS int_col,
      (t2.int_col_6) * (NULL) AS decimal_col,
      (t1.smallint_col_4) - (t2.int_col_6) AS int_col_1,
      LAG((449) * (-157.519107824), 46) OVER (ORDER BY (t2.int_col_6) * (NULL) DESC, (t1.smallint_col_4) - (t2.int_col_6) DESC) AS float_col,
      AVG((669) - (-773)) OVER (ORDER BY (t2.int_col_6) * (NULL) ASC, (t1.smallint_col_4) - (t2.int_col_6) ASC ROWS BETWEEN CURRENT ROW AND 62 FOLLOWING) AS float_col_1
      FROM table_1 t1
      INNER JOIN table_1 t2 ON ((t2.smallint_col_4) = (t1.int_col_9)) AND ((t2.smallint_col_4) = (t1.int_col_6))
      ) t1
      INNER JOIN table_2 t2 ON (((t2.bigint_col_1) = (t1.bigint_col_7)) AND ((t2.bigint_col_1) = (t1.int_col))) AND ((t2.bigint_col_1) = (t1.int_col_1))
      GROUP BY
      t1.int_col_2,
      (t1.bigint_col_7) / (t2.double_col_3),
      TRIM(t2.varchar0164_col_6)
      

      I attached YourKit to my Spark process and recorded some stack traces. See the attached screenshots showing the distribution of time for the hung query.

      Attachments

        Issue Links

          Activity

            People

              jiangxb1987 Xingbo Jiang
              joshrosen Josh Rosen
              Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: