Description
Trying to create a Spark DataFrame from a pandas dataframe with no explicit column name :
pandasDF = pd.DataFrame([[1, 2], [5, 6]])
sparkDF = sqlContext.createDataFrame(pandasDF)
***********
----> 1 sparkDF = sqlContext.createDataFrame(pandasDF)
/usr/local/Cellar/apache-spark/1.4.0/libexec/python/pyspark/sql/context.pyc in createDataFrame(self, data, schema, samplingRatio)
344
345 jrdd = self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd())
--> 346 df = self._ssql_ctx.applySchemaToPythonRDD(jrdd.rdd(), schema.json())
347 return DataFrame(df, self)
348
/usr/local/Cellar/apache-spark/1.4.0/libexec/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in _call_(self, *args)
536 answer = self.gateway_client.send_command(command)
537 return_value = get_return_value(answer, self.gateway_client,
--> 538 self.target_id, self.name)
539
540 for temp_arg in temp_args:
/usr/local/Cellar/apache-spark/1.4.0/libexec/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
298 raise Py4JJavaError(
299 'An error occurred while calling
.\n'.
--> 300 format(target_id, '.', name), value)
301 else:
302 raise Py4JError(
Py4JJavaError: An error occurred while calling o87.applySchemaToPythonRDD.
Attachments
Issue Links
- links to