Uploaded image for project: 'Apache Arrow'
  1. Apache Arrow
  2. ARROW-16372

[Python] Tests failing on s390x because they use Parquet

    XMLWordPrintableJSON

Details

    Description

      If I understand correctly, the Parquet implementation does not work on big-endian? So these tests need to be properly marked?

      https://app.travis-ci.com/github/apache/arrow/jobs/568309096

      =================================== FAILURES ===================================
      
      ______________________________ test_dataset_join _______________________________
      
      tempdir = PosixPath('/tmp/pytest-of-root/pytest-0/test_dataset_join0')
      
          @pytest.mark.dataset
      
          def test_dataset_join(tempdir):
      
              t1 = pa.table({
      
                  "colA": [1, 2, 6],
      
                  "col2": ["a", "b", "f"]
      
              })
      
      >       ds.write_dataset(t1, tempdir / "t1", format="parquet")
      
      usr/local/lib/python3.8/dist-packages/pyarrow/tests/test_dataset.py:4428: 
      
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      usr/local/lib/python3.8/dist-packages/pyarrow/dataset.py:880: in write_dataset
      
          format = _ensure_format(format)
      
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      obj = 'parquet'
      
          def _ensure_format(obj):
      
              if isinstance(obj, FileFormat):
      
                  return obj
      
              elif obj == "parquet":
      
                  if not _parquet_available:
      
      >               raise ValueError(_parquet_msg)
      
      E               ValueError: The pyarrow installation is not built with support for the Parquet file format.
      
      usr/local/lib/python3.8/dist-packages/pyarrow/dataset.py:283: ValueError
      
      _________________________ test_dataset_join_unique_key _________________________
      
      tempdir = PosixPath('/tmp/pytest-of-root/pytest-0/test_dataset_join_unique_key0')
      
          @pytest.mark.dataset
      
          def test_dataset_join_unique_key(tempdir):
      
              t1 = pa.table({
      
                  "colA": [1, 2, 6],
      
                  "col2": ["a", "b", "f"]
      
              })
      
      >       ds.write_dataset(t1, tempdir / "t1", format="parquet")
      
      usr/local/lib/python3.8/dist-packages/pyarrow/tests/test_dataset.py:4459: 
      
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      usr/local/lib/python3.8/dist-packages/pyarrow/dataset.py:880: in write_dataset
      
          format = _ensure_format(format)
      
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      obj = 'parquet'
      
          def _ensure_format(obj):
      
              if isinstance(obj, FileFormat):
      
                  return obj
      
              elif obj == "parquet":
      
                  if not _parquet_available:
      
      >               raise ValueError(_parquet_msg)
      
      E               ValueError: The pyarrow installation is not built with support for the Parquet file format.
      
      usr/local/lib/python3.8/dist-packages/pyarrow/dataset.py:283: ValueError
      
      _________________________ test_dataset_join_collisions _________________________
      
      tempdir = PosixPath('/tmp/pytest-of-root/pytest-0/test_dataset_join_collisions0')
      
          @pytest.mark.dataset
      
          def test_dataset_join_collisions(tempdir):
      
              t1 = pa.table({
      
                  "colA": [1, 2, 6],
      
                  "colB": [10, 20, 60],
      
                  "colVals": ["a", "b", "f"]
      
              })
      
      >       ds.write_dataset(t1, tempdir / "t1", format="parquet")
      
      usr/local/lib/python3.8/dist-packages/pyarrow/tests/test_dataset.py:4491: 
      
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      usr/local/lib/python3.8/dist-packages/pyarrow/dataset.py:880: in write_dataset
      
          format = _ensure_format(format)
      
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      obj = 'parquet'
      
          def _ensure_format(obj):
      
              if isinstance(obj, FileFormat):
      
                  return obj
      
              elif obj == "parquet":
      
                  if not _parquet_available:
      
      >               raise ValueError(_parquet_msg)
      
      E               ValueError: The pyarrow installation is not built with support for the Parquet file format.
      
      usr/local/lib/python3.8/dist-packages/pyarrow/dataset.py:283: ValueError
      
      _________________________ test_parquet_invalid_version _________________________
      
      tempdir = PosixPath('/tmp/pytest-of-root/pytest-0/test_parquet_invalid_version0')
      
          def test_parquet_invalid_version(tempdir):
      
              table = pa.table({'a': [1, 2, 3]})
      
              with pytest.raises(ValueError, match="Unsupported Parquet format version"):
      
      >           _write_table(table, tempdir / 'test_version.parquet', version="2.2")
      
      E           NameError: name '_write_table' is not defined
      
      usr/local/lib/python3.8/dist-packages/pyarrow/tests/parquet/test_basic.py:52: NameError

      Attachments

        Issue Links

          Activity

            People

              amol- Alessandro Molina
              lidavidm David Li
              Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Time Tracking

                  Estimated:
                  Original Estimate - Not Specified
                  Not Specified
                  Remaining:
                  Remaining Estimate - 0h
                  0h
                  Logged:
                  Time Spent - 1h 20m
                  1h 20m