Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / io / test_feather.py

""" test feather-format compat """
from distutils.version import LooseVersion

import numpy as np
import pytest

import pandas as pd
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, ensure_clean

from pandas.io.feather_format import read_feather, to_feather  # noqa:E402

pyarrow = pytest.importorskip('pyarrow')


pyarrow_version = LooseVersion(pyarrow.__version__)


@pytest.mark.single
class TestFeather(object):

    def check_error_on_write(self, df, exc):
        # check that we are raising the exception
        # on writing

        with pytest.raises(exc):
            with ensure_clean() as path:
                to_feather(df, path)

    def check_round_trip(self, df, expected=None, **kwargs):

        if expected is None:
            expected = df

        with ensure_clean() as path:
            to_feather(df, path)

            result = read_feather(path, **kwargs)
            assert_frame_equal(result, expected)

    def test_error(self):

        for obj in [pd.Series([1, 2, 3]), 1, 'foo', pd.Timestamp('20130101'),
                    np.array([1, 2, 3])]:
            self.check_error_on_write(obj, ValueError)

    def test_basic(self):

        df = pd.DataFrame({'string': list('abc'),
                           'int': list(range(1, 4)),
                           'uint': np.arange(3, 6).astype('u1'),
                           'float': np.arange(4.0, 7.0, dtype='float64'),
                           'float_with_null': [1., np.nan, 3],
                           'bool': [True, False, True],
                           'bool_with_null': [True, np.nan, False],
                           'cat': pd.Categorical(list('abc')),
                           'dt': pd.date_range('20130101', periods=3),
                           'dttz': pd.date_range('20130101', periods=3,
                                                 tz='US/Eastern'),
                           'dt_with_null': [pd.Timestamp('20130101'), pd.NaT,
                                            pd.Timestamp('20130103')],
                           'dtns': pd.date_range('20130101', periods=3,
                                                 freq='ns')})

        assert df.dttz.dtype.tz.zone == 'US/Eastern'
        self.check_round_trip(df)

    def test_duplicate_columns(self):

        # https://github.com/wesm/feather/issues/53
        # not currently able to handle duplicate columns
        df = pd.DataFrame(np.arange(12).reshape(4, 3),
                          columns=list('aaa')).copy()
        self.check_error_on_write(df, ValueError)

    def test_stringify_columns(self):

        df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy()
        self.check_error_on_write(df, ValueError)

    def test_read_columns(self):
        # GH 24025
        df = pd.DataFrame({'col1': list('abc'),
                           'col2': list(range(1, 4)),
                           'col3': list('xyz'),
                           'col4': list(range(4, 7))})
        columns = ['col1', 'col3']
        self.check_round_trip(df, expected=df[columns],
                              columns=columns)

    def test_unsupported_other(self):

        # period
        df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
        # Some versions raise ValueError, others raise ArrowInvalid.
        self.check_error_on_write(df, Exception)

    def test_rw_nthreads(self):
        df = pd.DataFrame({'A': np.arange(100000)})
        expected_warning = (
            "the 'nthreads' keyword is deprecated, "
            "use 'use_threads' instead"
        )
        # TODO: make the warning work with check_stacklevel=True
        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False) as w:
            self.check_round_trip(df, nthreads=2)
        # we have an extra FutureWarning because of #GH23752
        assert any(expected_warning in str(x) for x in w)

        # TODO: make the warning work with check_stacklevel=True
        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False) as w:
            self.check_round_trip(df, nthreads=1)
        # we have an extra FutureWarnings because of #GH23752
        assert any(expected_warning in str(x) for x in w)

    def test_rw_use_threads(self):
        df = pd.DataFrame({'A': np.arange(100000)})
        self.check_round_trip(df, use_threads=True)
        self.check_round_trip(df, use_threads=False)

    def test_write_with_index(self):

        df = pd.DataFrame({'A': [1, 2, 3]})
        self.check_round_trip(df)

        # non-default index
        for index in [[2, 3, 4],
                      pd.date_range('20130101', periods=3),
                      list('abc'),
                      [1, 3, 4],
                      pd.MultiIndex.from_tuples([('a', 1), ('a', 2),
                                                 ('b', 1)]),
                      ]:

            df.index = index
            self.check_error_on_write(df, ValueError)

        # index with meta-data
        df.index = [0, 1, 2]
        df.index.name = 'foo'
        self.check_error_on_write(df, ValueError)

        # column multi-index
        df.index = [0, 1, 2]
        df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]),
        self.check_error_on_write(df, ValueError)

    def test_path_pathlib(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result)

    def test_path_localpath(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result)