Description
Here's a patch that reproduces the issue:
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index 09c1547..29bb3db 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.execution.datasources.parquet.ParquetTest +import org.apache.spark.sql.functions.{lit, struct} import org.apache.spark.sql.hive.test.TestHiveSingleton case class Cases(lower: String, UPPER: String) @@ -76,4 +77,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton } } } + + test("column names including ':' characters") { + withTempPath { path => + withTable("test_table") { + spark.range(0) + .select(struct(lit(0).as("nested:column")).as("toplevel:column")) + .write.format("parquet") + .option("path", path.getCanonicalPath) + .saveAsTable("test_table") + + sql("CREATE VIEW test_view_1 AS SELECT `toplevel:column`.* FROM test_table") + sql("CREATE VIEW test_view_2 AS SELECT * FROM test_table") + + } + } + } }
The first "CREATE VIEW" statement succeeds, but the second one fails with:
org.apache.spark.SparkException: Cannot recognize hive type string: struct<nested:column:int>