---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-14-ab497c22ec7c> in <module>()
----> 1 m = LogisticRegressionWithSGD.train(r)
/usr/iop/4.0.0.0/spark/python/pyspark/mllib/classification.py in train(cls, data, iterations, step, miniBatchFraction, initialWeights, regParam, regType, intercept)
162 bool(intercept))
163
--> 164 return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
165
166
/usr/iop/4.0.0.0/spark/python/pyspark/mllib/regression.py in _regression_train_wrapper(train_func, modelClass, data, initial_weights)
138 if initial_weights is None:
139 initial_weights = [0.0] * len(data.first().features)
--> 140 weights, intercept = train_func(data, _convert_to_vector(initial_weights))
141 return modelClass(weights, intercept)
142
/usr/iop/4.0.0.0/spark/python/pyspark/mllib/classification.py in train(rdd, i)
160 return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
161 float(step), float(miniBatchFraction), i, float(regParam), regType,
--> 162 bool(intercept))
163
164 return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
/usr/iop/4.0.0.0/spark/python/pyspark/mllib/common.py in callMLlibFunc(name, *args)
118 sc = SparkContext._active_spark_context
119 api = getattr(sc._jvm.PythonMLLibAPI(), name)
--> 120 return callJavaFunc(sc, api, *args)
121
122
/usr/iop/4.0.0.0/spark/python/pyspark/mllib/common.py in callJavaFunc(sc, func, *args)
111 """ Call Java Function """
112 args = [_py2java(sc, a) for a in args]
--> 113 return _java2py(sc, func(*args))
114
115
/usr/iop/4.0.0.0/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
536 answer = self.gateway_client.send_command(command)
537 return_value = get_return_value(answer, self.gateway_client,
--> 538 self.target_id, self.name)
539
540 for temp_arg in temp_args:
/usr/iop/4.0.0.0/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
298 raise Py4JJavaError(
299 'An error occurred while calling {0}{1}{2}.\n'.
--> 300 format(target_id, '.', name), value)
301 else:
302 raise Py4JError(
Py4JJavaError: An error occurred while calling o86.trainLogisticRegressionModelWithSGD.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 11.0 failed 1 times, most recent failure: Lost task 7.0 in stage 11.0 (TID 47, localhost): java.lang.ArrayIndexOutOfBoundsException: 2
at org.apache.spark.mllib.linalg.BLAS$.dot(BLAS.scala:136)
at org.apache.spark.mllib.linalg.BLAS$.dot(BLAS.scala:106)
at org.apache.spark.mllib.optimization.LogisticGradient.compute(Gradient.scala:169)
at org.apache.spark.mllib.optimization.GradientDescent$$anonfun$runMiniBatchSGD$1$$anonfun$1.apply(GradientDescent.scala:192)
at org.apache.spark.mllib.optimization.GradientDescent$$anonfun$runMiniBatchSGD$1$$anonfun$1.apply(GradientDescent.scala:190)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:144)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:144)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:144)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1157)
at scala.collection.TraversableOnce$class.aggregate(TraversableOnce.scala:201)
at scala.collection.AbstractIterator.aggregate(Iterator.scala:1157)
at org.apache.spark.rdd.RDD$$anonfun$28.apply(RDD.scala:988)
at org.apache.spark.rdd.RDD$$anonfun$28.apply(RDD.scala:988)
at org.apache.spark.rdd.RDD$$anonfun$29.apply(RDD.scala:989)
at org.apache.spark.rdd.RDD$$anonfun$29.apply(RDD.scala:989)
at org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:634)
at org.apache.spark.rdd.RDD$$anonfun$14.apply(RDD.scala:634)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:64)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1204)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1193)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1192)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)