Description
df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) w = Window.partitionBy("value").orderBy("key") from pyspark.sql import functions as F sel = df.select( df.value, df.key, F.max("key").over(w.rowsBetween(0, 1)), F.min("key").over(w.rowsBetween(0, 1)), F.count("key").over(w.rowsBetween(float("-inf"), float("inf"))), F.row_number().over(w), F.rank().over(w), F.dense_rank().over(w), F.ntile(2).over(w), ) rs = sorted(sel.collect())
Traceback (most recent call last): File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 821, in test_window_functions F.count("key").over(w.rowsBetween(float("-inf"), float("inf"))), File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/window.py", line 152, in rowsBetween raise TypeError(f"start must be a int, but got {type(start).__name__}") TypeError: start must be a int, but got float