[Python-modules-team] Bug#950924: python-feather-format: FTBFS with pandas 1.0: test_boolean_object_nulls / test_sparse_dataframe fail
Rebecca N. Palmer
rebecca_palmer at zoho.com
Sat Feb 8 13:31:52 GMT 2020
Package: python3-feather-format
Version: 0.3.1+dfsg1-3
Control: tags -1 patch
Control: block 950430 by -1
Two tests fail with pandas 1.0 (from experimental):
======================================================================
ERROR: test_boolean_object_nulls
(feather.tests.test_reader.TestFeatherReader)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/build/python-feather-format-0.3.1+dfsg1/feather/tests/test_reader.py",
line 248, in test_boolean_object_nulls
self._check_pandas_roundtrip(df, null_counts=[1 * repeats])
File
"/build/python-feather-format-0.3.1+dfsg1/feather/tests/test_reader.py",
line 70, in _check_pandas_roundtrip
feather.write_dataframe(df, path)
File "/build/python-feather-format-0.3.1+dfsg1/feather/api.py", line
57, in write_dataframe
raise ValueError(msg)
ValueError: cannot serialize column 0 named bools with dtype boolean
======================================================================
ERROR: test_sparse_dataframe (feather.tests.test_reader.TestFeatherReader)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/build/python-feather-format-0.3.1+dfsg1/feather/tests/test_reader.py",
line 346, in test_sparse_dataframe
df = pd.DataFrame(data).to_sparse(fill_value=1)
File "/usr/lib/python3/dist-packages/pandas/core/generic.py", line
5273, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'to_sparse'
----------------------------------------------------------------------
Ran 25 tests in 0.255s
FAILED (errors=2)
Fix:
--- python-feather-format-0.3.1+dfsg1.orig/feather/api.py
+++ python-feather-format-0.3.1+dfsg1/feather/api.py
@@ -39,9 +39,11 @@ def write_dataframe(df, path):
# TODO(wesm): pipeline conversion to Arrow memory layout
for i, name in enumerate(df.columns):
col = df.iloc[:, i]
+ if pandas.api.types.is_sparse(col):
+ col = col.sparse.to_dense()
if pdapi.is_object_dtype(col):
- inferred_type = pandas.api.types.infer_dtype(col)
+ inferred_type = pandas.api.types.infer_dtype(col, skipna=False)
msg = ("cannot serialize column {n} "
"named {name} with dtype {dtype}".format(
n=i, name=name, dtype=inferred_type))
--- python-feather-format-0.3.1+dfsg1.orig/feather/tests/test_reader.py
+++ python-feather-format-0.3.1+dfsg1/feather/tests/test_reader.py
@@ -343,8 +343,8 @@ class TestFeatherReader(unittest.TestCas
# GH #221
data = {'A': [0,1,2],
'B': [1,0,1]}
- df = pd.DataFrame(data).to_sparse(fill_value=1)
- expected = df.to_dense()
+ df = pd.DataFrame(data).astype(pd.SparseDtype(int, fill_value=1))
+ expected = df.sparse.to_dense()
self._check_pandas_roundtrip(df, expected)
def test_duplicate_columns(self):
More information about the Python-modules-team
mailing list