Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Invalid
-
2.1.0
-
None
-
None
-
Important
Description
Issue happen when trying to create json file using a dataframe (see code below)
from pyspark.sql import SQLContext
a = ["a1","a2"]
b = ["b1","b2","b3"]
c = ["c1","c2","c3", "c4"]
d = {'d1':1, 'd2':2}
e = {'e1':1, 'e2':2, 'e3':3}
f = ['f1','f2','f3']
g = ['g1','g2','g3','g4']
metadata_dump = dict(asi=a, basi=b, casi = c, dasi=d, fasi=f, gasi=g, easi=e)
md = sqlContext.createDataFrame([metadata_dump]).collect()
metadata = sqlContext.createDataFrame(md,['asi', 'basi', 'casi','dasi','fasi', 'gasi', 'easi'])
metadata_path = "/folder/fileNameErr"
metadata.write.mode('overwrite').json(metadata_path)
{"asi":["a1","a2"],"basi":["b1","b2","b3"],"casi":["c1","c2","c3","c4"],"dasi":{"d1":1,"d2":2{color}},"fasi":{"e1":1,"e2":2,"e3":3},"gasi":["f1","f2","f3"],"easi":["g1","g2","g3","g4"]}
when switching the dictionary e
metadata_dump = dict(asi=a, basi=b, casi = c, dasi=d, easi=e, fasi=f, gasi=g)
md = sqlContext.createDataFrame([metadata_dump]).collect()
metadata = sqlContext.createDataFrame(md,['asi', 'basi', 'casi','dasi', 'easi','fasi', 'gasi'])
metadata_path = "/folder/fileNameCorr"
metadata.write.mode('overwrite').json(metadata_path)
{"asi":["a1","a2"],"basi":["b1","b2","b3"],"casi":["c1","c2","c3","c4"],"dasi":
{"d1":1,"d2":2},"easi":{"e1":1,"e2":2,"e3":3},"fasi":["f1","f2","f3"],"gasi":["g1","g2","g3","g4"]}