Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / frame / test_api.py

# -*- coding: utf-8 -*-

from __future__ import print_function

# pylint: disable-msg=W0612,E1101
from copy import deepcopy
import pydoc

import numpy as np
import pytest

from pandas.compat import long, lrange, range

import pandas as pd
from pandas import (
    Categorical, DataFrame, Series, SparseDataFrame, compat, date_range,
    timedelta_range)
import pandas.util.testing as tm
from pandas.util.testing import (
    assert_almost_equal, assert_frame_equal, assert_series_equal)


class SharedWithSparse(object):
    """
    A collection of tests DataFrame and SparseDataFrame can share.

    In generic tests on this class, use ``self._assert_frame_equal()`` and
    ``self._assert_series_equal()`` which are implemented in sub-classes
    and dispatch correctly.
    """
    def _assert_frame_equal(self, left, right):
        """Dispatch to frame class dependent assertion"""
        raise NotImplementedError

    def _assert_series_equal(self, left, right):
        """Dispatch to series class dependent assertion"""
        raise NotImplementedError

    def test_copy_index_name_checking(self, float_frame):
        # don't want to be able to modify the index stored elsewhere after
        # making a copy
        for attr in ('index', 'columns'):
            ind = getattr(float_frame, attr)
            ind.name = None
            cp = float_frame.copy()
            getattr(cp, attr).name = 'foo'
            assert getattr(float_frame, attr).name is None

    def test_getitem_pop_assign_name(self, float_frame):
        s = float_frame['A']
        assert s.name == 'A'

        s = float_frame.pop('A')
        assert s.name == 'A'

        s = float_frame.loc[:, 'B']
        assert s.name == 'B'

        s2 = s.loc[:]
        assert s2.name == 'B'

    def test_get_value(self, float_frame):
        for idx in float_frame.index:
            for col in float_frame.columns:
                with tm.assert_produces_warning(FutureWarning,
                                                check_stacklevel=False):
                    result = float_frame.get_value(idx, col)
                expected = float_frame[col][idx]
                tm.assert_almost_equal(result, expected)

    def test_add_prefix_suffix(self, float_frame):
        with_prefix = float_frame.add_prefix('foo#')
        expected = pd.Index(['foo#%s' % c for c in float_frame.columns])
        tm.assert_index_equal(with_prefix.columns, expected)

        with_suffix = float_frame.add_suffix('#foo')
        expected = pd.Index(['%s#foo' % c for c in float_frame.columns])
        tm.assert_index_equal(with_suffix.columns, expected)

        with_pct_prefix = float_frame.add_prefix('%')
        expected = pd.Index(['%{}'.format(c) for c in float_frame.columns])
        tm.assert_index_equal(with_pct_prefix.columns, expected)

        with_pct_suffix = float_frame.add_suffix('%')
        expected = pd.Index(['{}%'.format(c) for c in float_frame.columns])
        tm.assert_index_equal(with_pct_suffix.columns, expected)

    def test_get_axis(self, float_frame):
        f = float_frame
        assert f._get_axis_number(0) == 0
        assert f._get_axis_number(1) == 1
        assert f._get_axis_number('index') == 0
        assert f._get_axis_number('rows') == 0
        assert f._get_axis_number('columns') == 1

        assert f._get_axis_name(0) == 'index'
        assert f._get_axis_name(1) == 'columns'
        assert f._get_axis_name('index') == 'index'
        assert f._get_axis_name('rows') == 'index'
        assert f._get_axis_name('columns') == 'columns'

        assert f._get_axis(0) is f.index
        assert f._get_axis(1) is f.columns

        with pytest.raises(ValueError, match='No axis named'):
            f._get_axis_number(2)

        with pytest.raises(ValueError, match='No axis.*foo'):
            f._get_axis_name('foo')

        with pytest.raises(ValueError, match='No axis.*None'):
            f._get_axis_name(None)

        with pytest.raises(ValueError, match='No axis named'):
            f._get_axis_number(None)

    def test_keys(self, float_frame):
        getkeys = float_frame.keys
        assert getkeys() is float_frame.columns

    def test_column_contains_typeerror(self, float_frame):
        try:
            float_frame.columns in float_frame
        except TypeError:
            pass

    def test_tab_completion(self):
        # DataFrame whose columns are identifiers shall have them in __dir__.
        df = pd.DataFrame([list('abcd'), list('efgh')], columns=list('ABCD'))
        for key in list('ABCD'):
            assert key in dir(df)
        assert isinstance(df.__getitem__('A'), pd.Series)

        # DataFrame whose first-level columns are identifiers shall have
        # them in __dir__.
        df = pd.DataFrame(
            [list('abcd'), list('efgh')],
            columns=pd.MultiIndex.from_tuples(list(zip('ABCD', 'EFGH'))))
        for key in list('ABCD'):
            assert key in dir(df)
        for key in list('EFGH'):
            assert key not in dir(df)
        assert isinstance(df.__getitem__('A'), pd.DataFrame)

    def test_not_hashable(self, empty_frame):
        df = self.klass([1])
        pytest.raises(TypeError, hash, df)
        pytest.raises(TypeError, hash, empty_frame)

    def test_new_empty_index(self):
        df1 = self.klass(np.random.randn(0, 3))
        df2 = self.klass(np.random.randn(0, 3))
        df1.index.name = 'foo'
        assert df2.index.name is None

    def test_array_interface(self, float_frame):
        with np.errstate(all='ignore'):
            result = np.sqrt(float_frame)
        assert isinstance(result, type(float_frame))
        assert result.index is float_frame.index
        assert result.columns is float_frame.columns

        self._assert_frame_equal(result, float_frame.apply(np.sqrt))

    def test_get_agg_axis(self, float_frame):
        cols = float_frame._get_agg_axis(0)
        assert cols is float_frame.columns

        idx = float_frame._get_agg_axis(1)
        assert idx is float_frame.index

        pytest.raises(ValueError, float_frame._get_agg_axis, 2)

    def test_nonzero(self, float_frame, float_string_frame, empty_frame):
        assert empty_frame.empty

        assert not float_frame.empty
        assert not float_string_frame.empty

        # corner case
        df = DataFrame({'A': [1., 2., 3.],
                        'B': ['a', 'b', 'c']},
                       index=np.arange(3))
        del df['A']
        assert not df.empty

    def test_iteritems(self):
        df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
        for k, v in compat.iteritems(df):
            assert isinstance(v, self.klass._constructor_sliced)

    def test_items(self):
        # GH 17213, GH 13918
        cols = ['a', 'b', 'c']
        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols)
        for c, (k, v) in zip(cols, df.items()):
            assert c == k
            assert isinstance(v, Series)
            assert (df[k] == v).all()

    def test_iter(self, float_frame):
        assert tm.equalContents(list(float_frame), float_frame.columns)

    def test_iterrows(self, float_frame, float_string_frame):
        for k, v in float_frame.iterrows():
            exp = float_frame.loc[k]
            self._assert_series_equal(v, exp)

        for k, v in float_string_frame.iterrows():
            exp = float_string_frame.loc[k]
            self._assert_series_equal(v, exp)

    def test_iterrows_iso8601(self):
        # GH 19671
        if self.klass == SparseDataFrame:
            pytest.xfail(reason='SparseBlock datetime type not implemented.')

        s = self.klass(
            {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'],
             'iso8601': date_range('2000-01-01', periods=4, freq='M')})
        for k, v in s.iterrows():
            exp = s.loc[k]
            self._assert_series_equal(v, exp)

    def test_itertuples(self, float_frame):
        for i, tup in enumerate(float_frame.itertuples()):
            s = self.klass._constructor_sliced(tup[1:])
            s.name = tup[0]
            expected = float_frame.iloc[i, :].reset_index(drop=True)
            self._assert_series_equal(s, expected)

        df = self.klass({'floats': np.random.randn(5),
                         'ints': lrange(5)}, columns=['floats', 'ints'])

        for tup in df.itertuples(index=False):
            assert isinstance(tup[1], (int, long))

        df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
        dfaa = df[['a', 'a']]

        assert (list(dfaa.itertuples()) ==
                [(0, 1, 1), (1, 2, 2), (2, 3, 3)])

        # repr with be int/long on 32-bit/windows
        if not (compat.is_platform_windows() or compat.is_platform_32bit()):
            assert (repr(list(df.itertuples(name=None))) ==
                    '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')

        tup = next(df.itertuples(name='TestName'))
        assert tup._fields == ('Index', 'a', 'b')
        assert (tup.Index, tup.a, tup.b) == tup
        assert type(tup).__name__ == 'TestName'

        df.columns = ['def', 'return']
        tup2 = next(df.itertuples(name='TestName'))
        assert tup2 == (0, 1, 4)
        assert tup2._fields == ('Index', '_1', '_2')

        df3 = DataFrame({'f' + str(i): [i] for i in range(1024)})
        # will raise SyntaxError if trying to create namedtuple
        tup3 = next(df3.itertuples())
        assert not hasattr(tup3, '_fields')
        assert isinstance(tup3, tuple)

    def test_sequence_like_with_categorical(self):

        # GH 7839
        # make sure can iterate
        df = DataFrame({"id": [1, 2, 3, 4, 5, 6],
                        "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
        df['grade'] = Categorical(df['raw_grade'])

        # basic sequencing testing
        result = list(df.grade.values)
        expected = np.array(df.grade.values).tolist()
        tm.assert_almost_equal(result, expected)

        # iteration
        for t in df.itertuples(index=False):
            str(t)

        for row, s in df.iterrows():
            str(s)

        for c, col in df.iteritems():
            str(s)

    def test_len(self, float_frame):
        assert len(float_frame) == len(float_frame.index)

    def test_values(self, float_frame, float_string_frame):
        frame = float_frame
        arr = frame.values

        frame_cols = frame.columns
        for i, row in enumerate(arr):
            for j, value in enumerate(row):
                col = frame_cols[j]
                if np.isnan(value):
                    assert np.isnan(frame[col][i])
                else:
                    assert value == frame[col][i]

        # mixed type
        arr = float_string_frame[['foo', 'A']].values
        assert arr[0, 0] == 'bar'

        df = self.klass({'complex': [1j, 2j, 3j], 'real': [1, 2, 3]})
        arr = df.values
        assert arr[0, 0] == 1j

        # single block corner case
        arr = float_frame[['A', 'B']].values
        expected = float_frame.reindex(columns=['A', 'B']).values
        assert_almost_equal(arr, expected)

    def test_to_numpy(self):
        df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]})
        expected = np.array([[1, 3], [2, 4.5]])
        result = df.to_numpy()
        tm.assert_numpy_array_equal(result, expected)

    def test_to_numpy_dtype(self):
        df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]})
        expected = np.array([[1, 3], [2, 4]], dtype="int64")
        result = df.to_numpy(dtype="int64")
        tm.assert_numpy_array_equal(result, expected)

    def test_to_numpy_copy(self):
        arr = np.random.randn(4, 3)
        df = pd.DataFrame(arr)
        assert df.values.base is arr
        assert df.to_numpy(copy=False).base is arr
        assert df.to_numpy(copy=True).base is None

    def test_transpose(self, float_frame):
        frame = float_frame
        dft = frame.T
        for idx, series in compat.iteritems(dft):
            for col, value in compat.iteritems(series):
                if np.isnan(value):
                    assert np.isnan(frame[col][idx])
                else:
                    assert value == frame[col][idx]
Loading ...