Description
Fitting a simple multinomial logistic regression model fails with:
17/08/02 14:53:23 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:23 ERROR LBFGS: Failure! Resetting history: breeze.optimize.FirstOrderException: Line search failed
Example repro case:
from pyspark.sql import Row
from pyspark.ml.linalg import Vectors
from pyspark.ml.classification import LogisticRegression
df = spark.createDataFrame([
Row(label=0, features=Vectors.dense([0.0, 0.0, 0.0, 0.0, 2.9, 0.0, 2.9, 2.9, 0.0, 0.0, 0.0, 0.0, 2.9, 0.0, 0.0, 2.9, 2.9, 0.0, 0.0, 0.0, 2.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.9, 2.9, 2.9, 2.9, 0.0, 2.9, 0.0, 0.0, 2.9, 0.0, 2.9, 2.9, 0.0, 2.9, 2.9, 0.0, 0.0, 2.9, 2.9, 2.9, 0.0, 2.9, 2.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.9, 2.9, 0.0, 2.9, 2.9, 2.9, 2.9, 0.0, 0.0, 2.9, 2.9, 0.0, 0.0, 0.0, 2.9, 2.9, 0.0, 2.9, 2.9, 2.9, 0.0, 0.0, 2.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])),
Row(label=1, features=Vectors.dense([1.8, 1.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.9, 1.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 1.9, 1.9, 1.9, 1.9, 1.9, 1.8, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 0.0, 1.9, 0.0, 1.9, 1.9, 0.0, 1.9, 1.9, 0.0, 1.8, 1.9, 0.0, 0.0, 1.9, 0.0, 1.9, 0.0, 1.9, 1.9, 1.9, 1.9, 0.0, 1.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.9, 1.9, 1.9, 0.0, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 0.0, 0.0, 0.0, 1.9, 0.0, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 1.9, 0.0, 0.0, 1.9, 1.9, 0.0, 0.0, 0.0])),
Row(label=2, features=Vectors.dense([0.0, 0.0, 0.0, 0.0, 0.0, 1.6, 0.0, 0.0, 0.0, 0.0, 0.0, 1.6, 0.0, 0.0, 1.6, 0.0, 0.0, 0.0, 1.6, 0.0, 0.0, 1.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.6, 1.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.6, 1.6, 1.6, 1.6, 1.6, 0.0, 1.6, 1.6, 1.6, 1.6, 1.6, 0.0, 1.6, 1.6, 0.0, 1.6, 1.6, 1.6, 0.0, 1.6, 1.6, 0.0, 1.6, 1.6, 1.6, 1.6, 0.0, 1.6, 1.6, 1.6, 1.6, 0.0, 0.0, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 0.0, 1.6, 0.0, 0.0, 0.0, 1.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 0.0, 0.0, 0.0, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 0.0, 1.6, 1.6, 0.0, 1.6, 1.6, 0.0, 0.0, 1.6])),
Row(label=3, features=Vectors.dense([0.0, 0.0, 0.0, 1.4, 0.7, 1.1, 0.0, 0.0, 0.7, 0.0, 1.4, 1.1, 1.4, 0.0, 1.1, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0, 0.0, 0.0, 0.7, 0.0, 0.7, 0.0, 0.0, 1.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 1.4, 0.0, 0.0, 0.0, 0.0, 1.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.0, 0.0, 0.0, 1.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.4, 0.7, 0.0, 0.0, 0.0, 0.0, 1.4, 0.7, 1.9, 0.0, 0.0, 0.0, 1.1, 1.4, 0.0, 0.0, 0.0, 2.1, 2.1, 2.1, 1.6, 1.9, 1.8, 2.1, 2.1, 1.9, 2.1, 1.6, 1.8, 1.6, 2.1, 1.8, 1.9, 2.1, 2.1, 2.1, 2.1, 2.1, 1.8, 2.1, 0.0, 1.9, 2.1, 0.0, 2.1, 2.1, 0.0, 1.8, 2.1, 2.1, 0.0, 1.9, 0.0, 1.9, 0.0, 2.1, 1.8, 2.1, 2.1, 0.0, 2.1, 0.0, 0.0, 1.9, 0.0, 0.0, 1.6, 0.0, 0.0, 0.0, 0.0, 1.6, 0.0, 0.0, 0.0, 0.0, 0.0, 2.1, 2.1, 2.1, 1.9, 2.1, 2.1, 2.1, 1.8, 2.1, 2.1, 2.1, 0.0, 0.0, 0.0, 1.6, 1.9, 2.1, 2.1, 2.1, 2.1, 1.6, 1.9, 0.7, 2.1, 0.0, 0.0, 1.8, 1.6, 0.0, 0.0, 2.1])),
Row(label=4, features=Vectors.dense([0.0, 2.8, 2.8, 0.0, 0.0, 2.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.8, 0.0, 2.8, 0.0, 2.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.8, 0.0, 0.0, 0.0, 0.0, 0.0, 2.8, 0.0, 0.0, 2.8, 2.8, 0.0, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 0.0, 2.8, 0.0, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 0.0, 0.0, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 0.0, 2.8, 0.0, 0.0, 0.0, 2.8, 0.0, 0.0, 2.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 0.0, 0.0, 0.0, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 2.8, 0.0, 2.8, 2.8, 0.0, 0.0, 2.8])),
Row(label=5, features=Vectors.dense([0.0, 0.0, 0.0, 0.0, 0.0, 2.6, 0.0, 0.0, 0.0, 1.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.4, 0.0, 0.0, 0.0, 0.0, 0.0, 2.4, 1.1, 2.6, 0.0, 0.0, 0.0, 0.0, 2.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0, 2.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.4, 0.0, 0.0, 0.0, 1.1, 2.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.6, 2.6, 2.6, 2.6, 2.6, 0.0, 2.6, 2.6, 2.6, 2.4, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 1.1, 2.6, 2.6, 0.0, 2.6, 2.6, 1.1, 2.4, 0.0, 2.6, 0.0, 2.6, 0.0, 1.1, 0.0, 0.0, 0.0, 2.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.6, 2.4, 2.6, 0.0, 2.6, 0.0, 0.0, 0.0, 2.6, 2.6, 2.6, 1.1, 2.6, 2.6, 2.6, 2.4, 0.0, 2.6, 0.0, 0.0, 2.6, 2.6, 0.0, 0.0, 2.6])),
])
lr = LogisticRegression()
model = lr.fit(df)
'''
17/08/02 14:53:21 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:22 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:22 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:22 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:22 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:22 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:22 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:23 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:23 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:23 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:23 ERROR LBFGS: Failure! Resetting history: breeze.optimize.FirstOrderException: Line search failed
17/08/02 14:53:23 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:24 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:24 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:24 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:24 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:24 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:24 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:25 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:25 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:25 ERROR StrongWolfeLineSearch: Encountered bad values in function evaluation. Decreasing step size to NaN
17/08/02 14:53:25 ERROR LBFGS: Failure again! Giving up and returning. Maybe the objective is just poorly behaved?
'''
I'm on Amazon EMR release emr-5.3.1 running Spark 2.1.0
Attachments
Issue Links
- duplicates
-
SPARK-21523 Fix bug of strong wolfe linesearch `init` parameter lose effectiveness
- Resolved