Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Workaround
-
2.2.0, 2.2.1
-
None
-
None
-
Ubuntu pseudo distributed installation of Spark 2.2.0
Description
When I submit a Pyspark program with spark-submit command this error is thrown.
It happens when for code like below
RDD2 = RDD1.map(lambda m: function_x(m)).reduceByKey(lambda c,v :c+v)
or
RDD2 = RDD1.flatMap(lambda m: function_x(m)).reduceByKey(lambda c,v :c+v)
or
RDD2 = RDD1.flatMap(lambda m: function_x(m)).reduce(lambda c,v :c+v)
Traceback (most recent call last):
File "/home/prateek/Project/textrank.py", line 299, in <module>
summaryRDD = sentenceTokensReduceRDD.map(lambda m: get_summary(m)).reduceByKey(lambda c,v :c+v)
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 1608, in reduceByKey
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 1846, in combineByKey
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 1783, in partitionBy
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 2455, in _jrdd
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 2388, in _wrap_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 2374, in _prepare_for_python_RDD
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 460, in dumps
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 704, in dumps
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 148, in dump
File "/usr/lib/python3.5/pickle.py", line 408, in dump
self.save(obj)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 740, in save_tuple
save(element)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 255, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 292, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 725, in save_tuple
save(element)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 770, in save_list
self._batch_appends(obj)
File "/usr/lib/python3.5/pickle.py", line 794, in _batch_appends
save
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 255, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 292, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 725, in save_tuple
save(element)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 770, in save_list
self._batch_appends(obj)
File "/usr/lib/python3.5/pickle.py", line 794, in _batch_appends
save
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 255, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 292, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 725, in save_tuple
save(element)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 770, in save_list
self._batch_appends(obj)
File "/usr/lib/python3.5/pickle.py", line 794, in _batch_appends
save
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 255, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 292, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 725, in save_tuple
save(element)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 770, in save_list
self._batch_appends(obj)
File "/usr/lib/python3.5/pickle.py", line 797, in _batch_appends
save(tmp[0])
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 249, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 297, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 810, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.5/pickle.py", line 841, in _batch_setitems
save(v)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 249, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 297, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 810, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.5/pickle.py", line 836, in _batch_setitems
save(v)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 249, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 297, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 810, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.5/pickle.py", line 836, in _batch_setitems
save(v)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 249, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 297, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 810, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.5/pickle.py", line 841, in _batch_setitems
save(v)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 249, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 297, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 810, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.5/pickle.py", line 836, in _batch_setitems
save(v)
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 249, in save_function
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 297, in save_function_tuple
File "/usr/lib/python3.5/pickle.py", line 475, in save
f(self, obj) # Call unbound method with explicit self
File "/usr/lib/python3.5/pickle.py", line 810, in save_dict
self._batch_setitems(obj.items())
File "/usr/lib/python3.5/pickle.py", line 836, in _batch_setitems
save(v)
File "/usr/lib/python3.5/pickle.py", line 520, in save
self.save_reduce(obj=obj, *rv)
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 565, in save_reduce
pickle.PicklingError: args[0] from __newobj_ args has the wrong class
I tried replacing the cloudpickle code from GitHub , but that started giving error copy_reg not defined and copyreg not defined .(for both python 2.7 and 3.5)