Details
-
Sub-task
-
Status: Resolved
-
Major
-
Resolution: Fixed
-
3.1.0
-
None
-
None
Description
For test case
test("SPARK-25990: TRANSFORM should handle different data types correctly") { assume(TestUtils.testCommandAvailable("python")) val scriptFilePath = getTestResourcePath("test_script.py") withTempView("v") { val df = Seq( (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1), Date.valueOf("2015-05-21")), (2, "2", 2.0, BigDecimal(2.0), new Timestamp(2), Date.valueOf("2015-05-22")), (3, "3", 3.0, BigDecimal(3.0), new Timestamp(3), Date.valueOf("2015-05-23")) ).toDF("a", "b", "c", "d", "e", "f") // Note column d's data type is Decimal(38, 18) df.createTempView("v") val query = sql( s""" |SELECT |TRANSFORM(a, b, c, d, e, f) |USING 'python $scriptFilePath' AS (a, b, c, d, e, f) |FROM v """.stripMargin) val decimalToString: Column => Column = c => c.cast("string") checkAnswer(query, identity, df.select( 'a.cast("string"), 'b.cast("string"), 'c.cast("string"), decimalToString('d), 'e.cast("string"), 'f.cast("string")).collect()) } }
Get wrong result
[info] - SPARK-25990: TRANSFORM should handle different data types correctly *** FAILED *** (4 seconds, 997 milliseconds) [info] Results do not match for Spark plan: [info] ScriptTransformation [a#19, b#20, c#21, d#22, e#23, f#24], python /Users/angerszhu/Documents/project/AngersZhu/spark/sql/core/target/scala-2.12/test-classes/test_script.py, [a#31, b#32, c#33, d#34, e#35, f#36], org.apache.spark.sql.execution.script.ScriptTransformIOSchema@1ad5a29c [info] +- Project [_1#6 AS a#19, _2#7 AS b#20, _3#8 AS c#21, _4#9 AS d#22, _5#10 AS e#23, _6#11 AS f#24] [info] +- LocalTableScan [_1#6, _2#7, _3#8, _4#9, _5#10, _6#11] [info] [info] [info] == Results == [info] !== Expected Answer - 3 == == Actual Answer - 3 == [info] ![1,1,1.0,1.000000000000000000,1970-01-01 08:00:00.001,2015-05-21] [1,1,1.0,1.000000000000000000,1000,16576] [info] ![2,2,2.0,2.000000000000000000,1970-01-01 08:00:00.002,2015-05-22] [2,2,2.0,2.000000000000000000,2000,16577] [info] ![3,3,3.0,3.000000000000000000,1970-01-01 08:00:00.003,2015-05-23] [3,3,3.0,3.000000000000000000,3000,16578] (SparkPlanTest.scala:95) [