var data01 = sqlContext.sql("select 1 as id, \"{\\\"animal\\\":{\\\"type\\\": \\\"cat\\\"}},{\\\"animal\\\":{\\\"type\\\": \\\"dog\\\"}},{\\\"animal\\\":{\\\"type\\\": \\\"donkey\\\"}},{\\\"animal\\\":{\\\"type\\\": \\\"turkey\\\"}},{\\\"animal\\\":{\\\"type\\\": \\\"cat\\\"}},{\\\"animal\\\":{\\\"NOTANIMAL\\\": \\\"measuring tape\\\"}}\" as field")
case class SubField(fieldling: String)
var data02 = data01.explode(data01("field")){ case Row(field: String) => field.split(",").map(SubField(_))}
.selectExpr("id","fieldling","get_json_object(fieldling,\"$.animal.type\") as animal")
var data03 = data01.explode(data01("field")){ case Row(field: String) => field.split(",").map(SubField(_))}
.selectExpr("id","fieldling","get_json_object(fieldling,\"$.animal.type\") as animal")
data02.cache()
data02.select($"animal" === "cat").explain
== Physical Plan ==
Project [(animal#25 = cat) AS (animal = cat)#263]
InMemoryColumnarTableScan [animal#25], (InMemoryRelation [id#20,fieldling#24,animal#25], true, 10000, StorageLevel(true, true, false, true, 1), (TungstenProject [id#20,fieldling#24,get_json_object(fieldling#24,$.animal.type) AS animal#25]), None)
data02.select($"animal" === "cat").show
+--------------+
|(animal = cat)|
+--------------+
| true|
| false|
| false|
| false|
| true|
| null|
+--------------+
data02.filter($"animal" === "cat").explain
== Physical Plan ==
Filter (animal#25 = cat)
InMemoryColumnarTableScan [id#20,fieldling#24,animal#25], [(animal#25 = cat)], (InMemoryRelation [id#20,fieldling#24,animal#25], true, 10000, StorageLevel(true, true, false, true, 1), (TungstenProject [id#20,fieldling#24,get_json_object(fieldling#24,$.animal.type) AS animal#25]), None)
data02.filter($"animal" === "cat").show
+---+---------+------+
| id|fieldling|animal|
+---+---------+------+
+---+---------+------+