Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / reshape / test_melt.py

# -*- coding: utf-8 -*-
# pylint: disable-msg=W0612,E1101

import numpy as np
from numpy import nan
import pytest

from pandas.compat import range

import pandas as pd
from pandas import DataFrame, lreshape, melt, wide_to_long
import pandas.util.testing as tm


class TestMelt(object):

    def setup_method(self, method):
        self.df = tm.makeTimeDataFrame()[:10]
        self.df['id1'] = (self.df['A'] > 0).astype(np.int64)
        self.df['id2'] = (self.df['B'] > 0).astype(np.int64)

        self.var_name = 'var'
        self.value_name = 'val'

        self.df1 = pd.DataFrame([[1.067683, -1.110463, 0.20867
                                  ], [-1.321405, 0.368915, -1.055342],
                                 [-0.807333, 0.08298, -0.873361]])
        self.df1.columns = [list('ABC'), list('abc')]
        self.df1.columns.names = ['CAP', 'low']

    def test_top_level_method(self):
        result = melt(self.df)
        assert result.columns.tolist() == ['variable', 'value']

    def test_method_signatures(self):
        tm.assert_frame_equal(self.df.melt(),
                              melt(self.df))

        tm.assert_frame_equal(self.df.melt(id_vars=['id1', 'id2'],
                                           value_vars=['A', 'B']),
                              melt(self.df,
                                   id_vars=['id1', 'id2'],
                                   value_vars=['A', 'B']))

        tm.assert_frame_equal(self.df.melt(var_name=self.var_name,
                                           value_name=self.value_name),
                              melt(self.df,
                                   var_name=self.var_name,
                                   value_name=self.value_name))

        tm.assert_frame_equal(self.df1.melt(col_level=0),
                              melt(self.df1, col_level=0))

    def test_default_col_names(self):
        result = self.df.melt()
        assert result.columns.tolist() == ['variable', 'value']

        result1 = self.df.melt(id_vars=['id1'])
        assert result1.columns.tolist() == ['id1', 'variable', 'value']

        result2 = self.df.melt(id_vars=['id1', 'id2'])
        assert result2.columns.tolist() == ['id1', 'id2', 'variable', 'value']

    def test_value_vars(self):
        result3 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A')
        assert len(result3) == 10

        result4 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'])
        expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                               'id2': self.df['id2'].tolist() * 2,
                               'variable': ['A'] * 10 + ['B'] * 10,
                               'value': (self.df['A'].tolist() +
                                         self.df['B'].tolist())},
                              columns=['id1', 'id2', 'variable', 'value'])
        tm.assert_frame_equal(result4, expected4)

    def test_value_vars_types(self):
        # GH 15348
        expected = DataFrame({'id1': self.df['id1'].tolist() * 2,
                              'id2': self.df['id2'].tolist() * 2,
                              'variable': ['A'] * 10 + ['B'] * 10,
                              'value': (self.df['A'].tolist() +
                                        self.df['B'].tolist())},
                             columns=['id1', 'id2', 'variable', 'value'])

        for type_ in (tuple, list, np.array):
            result = self.df.melt(id_vars=['id1', 'id2'],
                                  value_vars=type_(('A', 'B')))
            tm.assert_frame_equal(result, expected)

    def test_vars_work_with_multiindex(self):
        expected = DataFrame({
            ('A', 'a'): self.df1[('A', 'a')],
            'CAP': ['B'] * len(self.df1),
            'low': ['b'] * len(self.df1),
            'value': self.df1[('B', 'b')],
        }, columns=[('A', 'a'), 'CAP', 'low', 'value'])

        result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')])
        tm.assert_frame_equal(result, expected)

    def test_single_vars_work_with_multiindex(self):
        expected = DataFrame({
            'A': {0: 1.067683, 1: -1.321405, 2: -0.807333},
            'CAP': {0: 'B', 1: 'B', 2: 'B'},
            'value': {0: -1.110463, 1: 0.368915, 2: 0.08298}})
        result = self.df1.melt(['A'], ['B'], col_level=0)
        tm.assert_frame_equal(result, expected)

    def test_tuple_vars_fail_with_multiindex(self):
        # melt should fail with an informative error message if
        # the columns have a MultiIndex and a tuple is passed
        # for id_vars or value_vars.
        tuple_a = ('A', 'a')
        list_a = [tuple_a]
        tuple_b = ('B', 'b')
        list_b = [tuple_b]

        msg = (r"(id|value)_vars must be a list of tuples when columns are"
               " a MultiIndex")
        for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b),
                                    (tuple_a, tuple_b)):
            with pytest.raises(ValueError, match=msg):
                self.df1.melt(id_vars=id_vars, value_vars=value_vars)

    def test_custom_var_name(self):
        result5 = self.df.melt(var_name=self.var_name)
        assert result5.columns.tolist() == ['var', 'value']

        result6 = self.df.melt(id_vars=['id1'], var_name=self.var_name)
        assert result6.columns.tolist() == ['id1', 'var', 'value']

        result7 = self.df.melt(id_vars=['id1', 'id2'], var_name=self.var_name)
        assert result7.columns.tolist() == ['id1', 'id2', 'var', 'value']

        result8 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A',
                               var_name=self.var_name)
        assert result8.columns.tolist() == ['id1', 'id2', 'var', 'value']

        result9 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'],
                               var_name=self.var_name)
        expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                               'id2': self.df['id2'].tolist() * 2,
                               self.var_name: ['A'] * 10 + ['B'] * 10,
                               'value': (self.df['A'].tolist() +
                                         self.df['B'].tolist())},
                              columns=['id1', 'id2', self.var_name, 'value'])
        tm.assert_frame_equal(result9, expected9)

    def test_custom_value_name(self):
        result10 = self.df.melt(value_name=self.value_name)
        assert result10.columns.tolist() == ['variable', 'val']

        result11 = self.df.melt(id_vars=['id1'], value_name=self.value_name)
        assert result11.columns.tolist() == ['id1', 'variable', 'val']

        result12 = self.df.melt(id_vars=['id1', 'id2'],
                                value_name=self.value_name)
        assert result12.columns.tolist() == ['id1', 'id2', 'variable', 'val']

        result13 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A',
                                value_name=self.value_name)
        assert result13.columns.tolist() == ['id1', 'id2', 'variable', 'val']

        result14 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'],
                                value_name=self.value_name)
        expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                                'id2': self.df['id2'].tolist() * 2,
                                'variable': ['A'] * 10 + ['B'] * 10,
                                self.value_name: (self.df['A'].tolist() +
                                                  self.df['B'].tolist())},
                               columns=['id1', 'id2', 'variable',
                                        self.value_name])
        tm.assert_frame_equal(result14, expected14)

    def test_custom_var_and_value_name(self):

        result15 = self.df.melt(var_name=self.var_name,
                                value_name=self.value_name)
        assert result15.columns.tolist() == ['var', 'val']

        result16 = self.df.melt(id_vars=['id1'], var_name=self.var_name,
                                value_name=self.value_name)
        assert result16.columns.tolist() == ['id1', 'var', 'val']

        result17 = self.df.melt(id_vars=['id1', 'id2'],
                                var_name=self.var_name,
                                value_name=self.value_name)
        assert result17.columns.tolist() == ['id1', 'id2', 'var', 'val']

        result18 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A',
                                var_name=self.var_name,
                                value_name=self.value_name)
        assert result18.columns.tolist() == ['id1', 'id2', 'var', 'val']

        result19 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'],
                                var_name=self.var_name,
                                value_name=self.value_name)
        expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2,
                                'id2': self.df['id2'].tolist() * 2,
                                self.var_name: ['A'] * 10 + ['B'] * 10,
                                self.value_name: (self.df['A'].tolist() +
                                                  self.df['B'].tolist())},
                               columns=['id1', 'id2', self.var_name,
                                        self.value_name])
        tm.assert_frame_equal(result19, expected19)

        df20 = self.df.copy()
        df20.columns.name = 'foo'
        result20 = df20.melt()
        assert result20.columns.tolist() == ['foo', 'value']

    def test_col_level(self):
        res1 = self.df1.melt(col_level=0)
        res2 = self.df1.melt(col_level='CAP')
        assert res1.columns.tolist() == ['CAP', 'value']
        assert res2.columns.tolist() == ['CAP', 'value']

    def test_multiindex(self):
        res = self.df1.melt()
        assert res.columns.tolist() == ['CAP', 'low', 'value']

    @pytest.mark.parametrize("col", [
        pd.Series(pd.date_range('2010', periods=5, tz='US/Pacific')),
        pd.Series(["a", "b", "c", "a", "d"], dtype="category"),
        pd.Series([0, 1, 0, 0, 0])])
    def test_pandas_dtypes(self, col):
        # GH 15785
        df = DataFrame({'klass': range(5),
                        'col': col,
                        'attr1': [1, 0, 0, 0, 0],
                        'attr2': col})
        expected_value = pd.concat([pd.Series([1, 0, 0, 0, 0]), col],
                                   ignore_index=True)
        result = melt(df, id_vars=['klass', 'col'], var_name='attribute',
                      value_name='value')
        expected = DataFrame({0: list(range(5)) * 2,
                              1: pd.concat([col] * 2, ignore_index=True),
                              2: ['attr1'] * 5 + ['attr2'] * 5,
                              3: expected_value})
        expected.columns = ['klass', 'col', 'attribute', 'value']
        tm.assert_frame_equal(result, expected)

    def test_melt_missing_columns_raises(self):
        # GH-23575
        # This test is to ensure that pandas raises an error if melting is
        # attempted with column names absent from the dataframe

        # Generate data
        df = pd.DataFrame(np.random.randn(5, 4), columns=list('abcd'))

        # Try to melt with missing `value_vars` column name
        msg = "The following '{Var}' are not present in the DataFrame: {Col}"
        with pytest.raises(
                KeyError,
                match=msg.format(Var='value_vars', Col="\\['C'\\]")):
            df.melt(['a', 'b'], ['C', 'd'])

        # Try to melt with missing `id_vars` column name
        with pytest.raises(
                KeyError,
                match=msg.format(Var='id_vars', Col="\\['A'\\]")):
            df.melt(['A', 'b'], ['c', 'd'])

        # Multiple missing
        with pytest.raises(
                KeyError,
                match=msg.format(Var='id_vars',
                                 Col="\\['not_here', 'or_there'\\]")):
            df.melt(['a', 'b', 'not_here', 'or_there'], ['c', 'd'])

        # Multiindex melt fails if column is missing from multilevel melt
        multi = df.copy()
        multi.columns = [list('ABCD'), list('abcd')]
        with pytest.raises(
            KeyError,
            match=msg.format(Var='id_vars',
                             Col="\\['E'\\]")):
            multi.melt([('E', 'a')], [('B', 'b')])
        # Multiindex fails if column is missing from single level melt
        with pytest.raises(
            KeyError,
            match=msg.format(Var='value_vars',
                             Col="\\['F'\\]")):
            multi.melt(['A'], ['F'], col_level=0)


class TestLreshape(object):

    def test_pairs(self):
        data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', '21dec2008',
                            '11jan2009'],
                'birthwt': [1766, 3301, 1454, 3139, 4133],
                'id': [101, 102, 103, 104, 105],
                'sex': ['Male', 'Female', 'Female', 'Female', 'Female'],
                'visitdt1': ['11jan2009', '22dec2008', '04jan2009',
                             '29dec2008', '20jan2009'],
                'visitdt2':
                ['21jan2009', nan, '22jan2009', '31dec2008', '03feb2009'],
                'visitdt3': ['05feb2009', nan, nan, '02jan2009', '15feb2009'],
                'wt1': [1823, 3338, 1549, 3298, 4306],
                'wt2': [2011.0, nan, 1892.0, 3338.0, 4575.0],
                'wt3': [2293.0, nan, nan, 3377.0, 4805.0]}

        df = DataFrame(data)

        spec = {'visitdt': ['visitdt%d' % i for i in range(1, 4)],
                'wt': ['wt%d' % i for i in range(1, 4)]}
        result = lreshape(df, spec)

        exp_data = {'birthdt':
                    ['08jan2009', '20dec2008', '30dec2008', '21dec2008',
                     '11jan2009', '08jan2009', '30dec2008', '21dec2008',
                     '11jan2009', '08jan2009', '21dec2008', '11jan2009'],
                    'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 1454, 3139,
                                4133, 1766, 3139, 4133],
                    'id': [101, 102, 103, 104, 105, 101, 103, 104, 105, 101,
                           104, 105],
                    'sex': ['Male', 'Female', 'Female', 'Female', 'Female',
                            'Male', 'Female', 'Female', 'Female', 'Male',
                            'Female', 'Female'],
                    'visitdt': ['11jan2009', '22dec2008', '04jan2009',
                                '29dec2008', '20jan2009', '21jan2009',
                                '22jan2009', '31dec2008', '03feb2009',
                                '05feb2009', '02jan2009', '15feb2009'],
                    'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0,
                           1892.0, 3338.0, 4575.0, 2293.0, 3377.0, 4805.0]}
        exp = DataFrame(exp_data, columns=result.columns)
        tm.assert_frame_equal(result, exp)

        result = lreshape(df, spec, dropna=False)
        exp_data = {'birthdt':
                    ['08jan2009', '20dec2008', '30dec2008', '21dec2008',
                     '11jan2009', '08jan2009', '20dec2008', '30dec2008',
                     '21dec2008', '11jan2009', '08jan2009', '20dec2008',
                     '30dec2008', '21dec2008', '11jan2009'],
                    'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 3301, 1454,
                                3139, 4133, 1766, 3301, 1454, 3139, 4133],
                    'id': [101, 102, 103, 104, 105, 101, 102, 103, 104, 105,
                           101, 102, 103, 104, 105],
                    'sex': ['Male', 'Female', 'Female', 'Female', 'Female',
                            'Male', 'Female', 'Female', 'Female', 'Female',
                            'Male', 'Female', 'Female', 'Female', 'Female'],
                    'visitdt': ['11jan2009', '22dec2008', '04jan2009',
                                '29dec2008', '20jan2009', '21jan2009', nan,
Loading ...