Uploaded image for project: 'Apache Arrow'
  1. Apache Arrow
  2. ARROW-18394

[CI][Python] Nightly pyhon pandas jobs using latest or upstream_devel fail

    XMLWordPrintableJSON

Details

    Description

      Currently the following jobs fail:

      test-conda-python-3.8-pandas-nightly https://github.com/ursacomputing/crossbow/actions/runs/3532562061/jobs/5927065343
      test-conda-python-3.9-pandas-upstream_devel https://github.com/ursacomputing/crossbow/actions/runs/3532562477/jobs/5927066168

      with:

        _________________ test_roundtrip_with_bytes_unicode[columns0] __________________columns = [b'foo']    @pytest.mark.parametrize('columns', ([b'foo'], ['foo']))
          def test_roundtrip_with_bytes_unicode(columns):
              df = pd.DataFrame(columns=columns)
              table1 = pa.Table.from_pandas(df)
      >       table2 = pa.Table.from_pandas(table1.to_pandas())opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/tests/test_pandas.py:2867: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      pyarrow/array.pxi:830: in pyarrow.lib._PandasConvertible.to_pandas
          ???
      pyarrow/table.pxi:3908: in pyarrow.lib.Table._to_pandas
          ???
      opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:819: in table_to_blockmanager
          columns = _deserialize_column_index(table, all_columns, column_indexes)
      opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:935: in _deserialize_column_index
          columns = _reconstruct_columns_from_metadata(columns, column_indexes)
      opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1154: in _reconstruct_columns_from_metadata
          level = level.astype(dtype)
      opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/indexes/base.py:1029: in astype
          return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)
      opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/indexes/base.py:518: in __new__
          klass = cls._dtype_to_subclass(arr.dtype)
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ cls = <class 'pandas.core.indexes.base.Index'>, dtype = dtype('S3')    @final
          @classmethod
          def _dtype_to_subclass(cls, dtype: DtypeObj):
              # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
          
              if isinstance(dtype, ExtensionDtype):
                  if isinstance(dtype, DatetimeTZDtype):
                      from pandas import DatetimeIndex
          
                      return DatetimeIndex
                  elif isinstance(dtype, CategoricalDtype):
                      from pandas import CategoricalIndex
          
                      return CategoricalIndex
                  elif isinstance(dtype, IntervalDtype):
                      from pandas import IntervalIndex
          
                      return IntervalIndex
                  elif isinstance(dtype, PeriodDtype):
                      from pandas import PeriodIndex
          
                      return PeriodIndex
          
                  return Index
          
              if dtype.kind == "M":
                  from pandas import DatetimeIndex
          
                  return DatetimeIndex
          
              elif dtype.kind == "m":
                  from pandas import TimedeltaIndex
          
                  return TimedeltaIndex
          
              elif dtype.kind == "f":
                  from pandas.core.api import Float64Index
          
                  return Float64Index
              elif dtype.kind == "u":
                  from pandas.core.api import UInt64Index
          
                  return UInt64Index
              elif dtype.kind == "i":
                  from pandas.core.api import Int64Index
          
                  return Int64Index
          
              elif dtype.kind == "O":
                  # NB: assuming away MultiIndex
                  return Index
          
              elif issubclass(
                  dtype.type, (str, bool, np.bool_, complex, np.complex64, np.complex128)
              ):
                  return Index
          
      >       raise NotImplementedError(dtype)
      E       NotImplementedError: |S3opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/indexes/base.py:595: NotImplementedError

      Attachments

        Issue Links

          Activity

            People

              jorisvandenbossche Joris Van den Bossche
              raulcd Raúl Cumplido
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Time Tracking

                  Estimated:
                  Original Estimate - Not Specified
                  Not Specified
                  Remaining:
                  Remaining Estimate - 0h
                  0h
                  Logged:
                  Time Spent - 2h 20m
                  2h 20m