java.io.IOException: Illegal file pattern: error parsing regexp: Unclosed character class at pos 8: `['python`
at org.apache.hadoop.fs.GlobFilter.init(GlobFilter.java:71)
at org.apache.hadoop.fs.GlobFilter.<init>(GlobFilter.java:50)
at org.apache.hadoop.fs.Globber.doGlob(Globber.java:265)
at org.apache.hadoop.fs.Globber.glob(Globber.java:202)
at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:2124)
at org.apache.spark.deploy.SparkHadoopUtil.globPath(SparkHadoopUtil.scala:254)
at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$checkAndGlobPathIfNecessary$3(DataSource.scala:736)
at org.apache.spark.util.ThreadUtils$.$anonfun$parmap$2(ThreadUtils.scala:393)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1402)
at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1067)
at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1703)
at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:172)
Caused by: org.apache.hadoop.shaded.com.google.re2j.PatternSyntaxException: error parsing regexp: Unclosed character class at pos 8: `['python`
at org.apache.hadoop.fs.GlobPattern.error(GlobPattern.java:168)
at org.apache.hadoop.fs.GlobPattern.set(GlobPattern.java:151)
at org.apache.hadoop.fs.GlobPattern.<init>(GlobPattern.java:42)
at org.apache.hadoop.fs.GlobFilter.init(GlobFilter.java:67)
... 19 more
pyspark/sql/tests/test_datasources.py:123 (DataSourcesParityTests.test_read_text_file_list)
self = <pyspark.sql.tests.connect.test_parity_datasources.DataSourcesParityTests testMethod=test_read_text_file_list>
def test_read_text_file_list(self):
df = self.spark.read.text(
["python/test_support/sql/text-test.txt", "python/test_support/sql/text-test.txt"]
)
> count = df.count()
../test_datasources.py:128:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../connect/dataframe.py:177: in count
pdd = self.agg(_invoke_function("count", lit(1))).toPandas()
../../connect/dataframe.py:1297: in toPandas
return self._session.client.to_pandas(query)
../../connect/client.py:422: in to_pandas
table, metrics = self._execute_and_fetch(req)
../../connect/client.py:593: in _execute_and_fetch
self._handle_error(rpc_error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb160b85580>
rpc_error = <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.INTERNAL
details = "Illegal file pattern:...tatus:13, grpc_message:"Illegal file pattern: error parsing regexp: Unclosed character class at pos 8: `[\'python`"}"
>
def _handle_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https: status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
if info.reason == "org.apache.spark.sql.AnalysisException":
raise SparkConnectAnalysisException(
info.reason, info.metadata["message"], info.metadata["plan"]
) from None
else:
raise SparkConnectException(status.message, info.reason) from None
> raise SparkConnectException(status.message) from None
E pyspark.sql.connect.client.SparkConnectException: Illegal file pattern: error parsing regexp: Unclosed character class at pos 8: `['python`
../../connect/client.py:638: SparkConnectException