Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / frame / test_combine_concat.py

# -*- coding: utf-8 -*-

from __future__ import print_function

from datetime import datetime

import numpy as np
import pytest

from pandas.compat import lrange

import pandas as pd
from pandas import DataFrame, Index, Series, Timestamp, date_range
from pandas.tests.frame.common import TestData
import pandas.util.testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal


class TestDataFrameConcatCommon(TestData):

    def test_concat_multiple_frames_dtypes(self):

        # GH 2759
        A = DataFrame(data=np.ones((10, 2)), columns=[
                      'foo', 'bar'], dtype=np.float64)
        B = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
        results = pd.concat((A, B), axis=1).get_dtype_counts()
        expected = Series(dict(float64=2, float32=2))
        assert_series_equal(results, expected)

    @pytest.mark.parametrize('data', [
        pd.date_range('2000', periods=4),
        pd.date_range('2000', periods=4, tz="US/Central"),
        pd.period_range('2000', periods=4),
        pd.timedelta_range(0, periods=4),
    ])
    def test_combine_datetlike_udf(self, data):
        # https://github.com/pandas-dev/pandas/issues/23079
        df = pd.DataFrame({"A": data})
        other = df.copy()
        df.iloc[1, 0] = None

        def combiner(a, b):
            return b

        result = df.combine(other, combiner)
        tm.assert_frame_equal(result, other)

    def test_concat_multiple_tzs(self):
        # GH 12467
        # combining datetime tz-aware and naive DataFrames
        ts1 = Timestamp('2015-01-01', tz=None)
        ts2 = Timestamp('2015-01-01', tz='UTC')
        ts3 = Timestamp('2015-01-01', tz='EST')

        df1 = DataFrame(dict(time=[ts1]))
        df2 = DataFrame(dict(time=[ts2]))
        df3 = DataFrame(dict(time=[ts3]))

        results = pd.concat([df1, df2]).reset_index(drop=True)
        expected = DataFrame(dict(time=[ts1, ts2]), dtype=object)
        assert_frame_equal(results, expected)

        results = pd.concat([df1, df3]).reset_index(drop=True)
        expected = DataFrame(dict(time=[ts1, ts3]), dtype=object)
        assert_frame_equal(results, expected)

        results = pd.concat([df2, df3]).reset_index(drop=True)
        expected = DataFrame(dict(time=[ts2, ts3]))
        assert_frame_equal(results, expected)

    @pytest.mark.parametrize(
        't1',
        [
            '2015-01-01',
            pytest.param(pd.NaT, marks=pytest.mark.xfail(
                reason='GH23037 incorrect dtype when concatenating'))])
    def test_concat_tz_NaT(self, t1):
        # GH 22796
        # Concating tz-aware multicolumn DataFrames
        ts1 = Timestamp(t1, tz='UTC')
        ts2 = Timestamp('2015-01-01', tz='UTC')
        ts3 = Timestamp('2015-01-01', tz='UTC')

        df1 = DataFrame([[ts1, ts2]])
        df2 = DataFrame([[ts3]])

        result = pd.concat([df1, df2])
        expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0])

        assert_frame_equal(result, expected)

    def test_concat_tz_not_aligned(self):
        # GH 22796
        ts = pd.to_datetime([1, 2]).tz_localize("UTC")
        a = pd.DataFrame({"A": ts})
        b = pd.DataFrame({"A": ts, "B": ts})
        result = pd.concat([a, b], sort=True, ignore_index=True)
        expected = pd.DataFrame({"A": list(ts) + list(ts),
                                 "B": [pd.NaT, pd.NaT] + list(ts)})
        assert_frame_equal(result, expected)

    def test_concat_tuple_keys(self):
        # GH 14438
        df1 = pd.DataFrame(np.ones((2, 2)), columns=list('AB'))
        df2 = pd.DataFrame(np.ones((3, 2)) * 2, columns=list('AB'))
        results = pd.concat((df1, df2), keys=[('bee', 'bah'), ('bee', 'boo')])
        expected = pd.DataFrame(
            {'A': {('bee', 'bah', 0): 1.0,
                   ('bee', 'bah', 1): 1.0,
                   ('bee', 'boo', 0): 2.0,
                   ('bee', 'boo', 1): 2.0,
                   ('bee', 'boo', 2): 2.0},
             'B': {('bee', 'bah', 0): 1.0,
                   ('bee', 'bah', 1): 1.0,
                   ('bee', 'boo', 0): 2.0,
                   ('bee', 'boo', 1): 2.0,
                   ('bee', 'boo', 2): 2.0}})
        assert_frame_equal(results, expected)

    def test_append_series_dict(self):
        df = DataFrame(np.random.randn(5, 4),
                       columns=['foo', 'bar', 'baz', 'qux'])

        series = df.loc[4]
        msg = 'Indexes have overlapping values'
        with pytest.raises(ValueError, match=msg):
            df.append(series, verify_integrity=True)

        series.name = None
        msg = 'Can only append a Series if ignore_index=True'
        with pytest.raises(TypeError, match=msg):
            df.append(series, verify_integrity=True)

        result = df.append(series[::-1], ignore_index=True)
        expected = df.append(DataFrame({0: series[::-1]}, index=df.columns).T,
                             ignore_index=True)
        assert_frame_equal(result, expected)

        # dict
        result = df.append(series.to_dict(), ignore_index=True)
        assert_frame_equal(result, expected)

        result = df.append(series[::-1][:3], ignore_index=True)
        expected = df.append(DataFrame({0: series[::-1][:3]}).T,
                             ignore_index=True, sort=True)
        assert_frame_equal(result, expected.loc[:, result.columns])

        # can append when name set
        row = df.loc[4]
        row.name = 5
        result = df.append(row)
        expected = df.append(df[-1:], ignore_index=True)
        assert_frame_equal(result, expected)

    def test_append_list_of_series_dicts(self):
        df = DataFrame(np.random.randn(5, 4),
                       columns=['foo', 'bar', 'baz', 'qux'])

        dicts = [x.to_dict() for idx, x in df.iterrows()]

        result = df.append(dicts, ignore_index=True)
        expected = df.append(df, ignore_index=True)
        assert_frame_equal(result, expected)

        # different columns
        dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4},
                 {'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}]
        result = df.append(dicts, ignore_index=True, sort=True)
        expected = df.append(DataFrame(dicts), ignore_index=True, sort=True)
        assert_frame_equal(result, expected)

    def test_append_empty_dataframe(self):

        # Empty df append empty df
        df1 = DataFrame([])
        df2 = DataFrame([])
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        # Non-empty df append empty df
        df1 = DataFrame(np.random.randn(5, 2))
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        # Empty df with columns append empty df
        df1 = DataFrame(columns=['bar', 'foo'])
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        # Non-Empty df with columns append empty df
        df1 = DataFrame(np.random.randn(5, 2), columns=['bar', 'foo'])
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

    def test_append_dtypes(self):

        # GH 5754
        # row appends of different dtypes (so need to do by-item)
        # can sometimes infer the correct type

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(5))
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1))
        df2 = DataFrame({'bar': 'foo'}, index=lrange(1, 2))
        result = df1.append(df2)
        expected = DataFrame({'bar': [Timestamp('20130101'), 'foo']})
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1))
        df2 = DataFrame({'bar': np.nan}, index=lrange(1, 2))
        result = df1.append(df2)
        expected = DataFrame(
            {'bar': Series([Timestamp('20130101'), np.nan], dtype='M8[ns]')})
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1))
        df2 = DataFrame({'bar': np.nan}, index=lrange(1, 2), dtype=object)
        result = df1.append(df2)
        expected = DataFrame(
            {'bar': Series([Timestamp('20130101'), np.nan], dtype='M8[ns]')})
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': np.nan}, index=lrange(1))
        df2 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1, 2))
        result = df1.append(df2)
        expected = DataFrame(
            {'bar': Series([np.nan, Timestamp('20130101')], dtype='M8[ns]')})
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1))
        df2 = DataFrame({'bar': 1}, index=lrange(1, 2), dtype=object)
        result = df1.append(df2)
        expected = DataFrame({'bar': Series([Timestamp('20130101'), 1])})
        assert_frame_equal(result, expected)

    def test_update(self):
        df = DataFrame([[1.5, np.nan, 3.],
                        [1.5, np.nan, 3.],
                        [1.5, np.nan, 3],
                        [1.5, np.nan, 3]])

        other = DataFrame([[3.6, 2., np.nan],
                           [np.nan, np.nan, 7]], index=[1, 3])

        df.update(other)

        expected = DataFrame([[1.5, np.nan, 3],
                              [3.6, 2, 3],
                              [1.5, np.nan, 3],
                              [1.5, np.nan, 7.]])
        assert_frame_equal(df, expected)

    def test_update_dtypes(self):

        # gh 3016
        df = DataFrame([[1., 2., False, True], [4., 5., True, False]],
                       columns=['A', 'B', 'bool1', 'bool2'])

        other = DataFrame([[45, 45]], index=[0], columns=['A', 'B'])
        df.update(other)

        expected = DataFrame([[45., 45., False, True], [4., 5., True, False]],
                             columns=['A', 'B', 'bool1', 'bool2'])
        assert_frame_equal(df, expected)

    def test_update_nooverwrite(self):
        df = DataFrame([[1.5, np.nan, 3.],
                        [1.5, np.nan, 3.],
                        [1.5, np.nan, 3],
                        [1.5, np.nan, 3]])

        other = DataFrame([[3.6, 2., np.nan],
                           [np.nan, np.nan, 7]], index=[1, 3])

        df.update(other, overwrite=False)

        expected = DataFrame([[1.5, np.nan, 3],
                              [1.5, 2, 3],
                              [1.5, np.nan, 3],
                              [1.5, np.nan, 3.]])
        assert_frame_equal(df, expected)

    def test_update_filtered(self):
        df = DataFrame([[1.5, np.nan, 3.],
                        [1.5, np.nan, 3.],
                        [1.5, np.nan, 3],
                        [1.5, np.nan, 3]])

        other = DataFrame([[3.6, 2., np.nan],
                           [np.nan, np.nan, 7]], index=[1, 3])

        df.update(other, filter_func=lambda x: x > 2)

        expected = DataFrame([[1.5, np.nan, 3],
                              [1.5, np.nan, 3],
                              [1.5, np.nan, 3],
                              [1.5, np.nan, 7.]])
        assert_frame_equal(df, expected)

    @pytest.mark.parametrize('bad_kwarg, exception, msg', [
        # errors must be 'ignore' or 'raise'
        ({'errors': 'something'}, ValueError, 'The parameter errors must.*'),
        ({'join': 'inner'}, NotImplementedError, 'Only left join is supported')
    ])
    def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg):
        df = DataFrame([[1.5, 1, 3.]])
        with pytest.raises(exception, match=msg):
            df.update(df, **bad_kwarg)

    def test_update_raise_on_overlap(self):
        df = DataFrame([[1.5, 1, 3.],
                        [1.5, np.nan, 3.],
                        [1.5, np.nan, 3],
                        [1.5, np.nan, 3]])

        other = DataFrame([[2., np.nan],
                           [np.nan, 7]], index=[1, 3], columns=[1, 2])
        with pytest.raises(ValueError, match="Data overlaps"):
            df.update(other, errors='raise')

    @pytest.mark.parametrize('raise_conflict', [True, False])
    def test_update_deprecation(self, raise_conflict):
        df = DataFrame([[1.5, 1, 3.]])
        other = DataFrame()
        with tm.assert_produces_warning(FutureWarning):
            df.update(other, raise_conflict=raise_conflict)

    def test_update_from_non_df(self):
        d = {'a': Series([1, 2, 3, 4]), 'b': Series([5, 6, 7, 8])}
        df = DataFrame(d)

        d['a'] = Series([5, 6, 7, 8])
        df.update(d)
Loading ...