Details
-
New Feature
-
Status: Open
-
Major
-
Resolution: Unresolved
-
None
-
None
Description
It seems that when getting a value from a StructScalar extension information is lost. See:
import pyarrow as pa class ExampleScalar(pa.ExtensionScalar): def as_py(self): print("ExampleScalar.as_py -> {self.value.as_py()}") return self.value.as_py() class ExampleArray(pa.ExtensionArray): def __getitem__(self, item): return f"ExampleArray.__getitem__[{item}] -> {self.storage[item]}" def __arrow_ext_scalar_class__(self): return ExampleScalar class ExampleType(pa.ExtensionType): def __init__(self): pa.ExtensionType.__init__(self, pa.int64(), "ExampleExtensionType") def __arrow_ext_serialize__(self): return b"" def __arrow_ext_class__(self): return ExampleArray example_type = ExampleType() arr = pa.array([1, 2, 3]) example_array = pa.ExtensionArray.from_storage(example_type, arr) example_array2 = pa.StructArray.from_arrays([example_array, arr], ["a", "b"]) print("\nExample 1\n=========") print(example_array[0]) print(example_array.type) print(type(example_array[0])) print("\nExample 2\n=========") print(example_array2[0]) print(example_array2[0].type) print(example_array2[0]["a"]) print(example_array2[0]["a"].type)
Returns:
Example 1 ========= ExampleArray.__getitem__[0] -> 1 extension<ExampleExtensionType<ExampleType>> <class 'str'> Example 2 ========= [('a', 1), ('b', 1)] struct<a: extension<ExampleExtensionType<ExampleType>>, b: int64> 1 extension<ExampleExtensionType<ExampleType>>