Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
pandas / tests / io / parser / index_col.py
Size: Mime:
# -*- coding: utf-8 -*-

"""
Tests that the specified index column (a.k.a 'index_col')
is properly handled or inferred during parsing for all of
the parsers defined in parsers.py
"""

import pytest

import pandas.util.testing as tm

from pandas import DataFrame, Index, MultiIndex
from pandas.compat import StringIO


class IndexColTests(object):

    def test_index_col_named(self):
        no_header = """\
KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000"""  # noqa

        h = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n"  # noqa
        data = h + no_header
        rs = self.read_csv(StringIO(data), index_col='ID')
        xp = self.read_csv(StringIO(data), header=0).set_index('ID')
        tm.assert_frame_equal(rs, xp)

        pytest.raises(ValueError, self.read_csv, StringIO(no_header),
                      index_col='ID')

        data = """\
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo
"""
        names = ['a', 'b', 'c', 'd', 'message']
        xp = DataFrame({'a': [1, 5, 9], 'b': [2, 6, 10], 'c': [3, 7, 11],
                        'd': [4, 8, 12]},
                       index=Index(['hello', 'world', 'foo'], name='message'))
        rs = self.read_csv(StringIO(data), names=names, index_col=['message'])
        tm.assert_frame_equal(xp, rs)
        assert xp.index.name == rs.index.name

        rs = self.read_csv(StringIO(data), names=names, index_col='message')
        tm.assert_frame_equal(xp, rs)
        assert xp.index.name == rs.index.name

    def test_index_col_is_true(self):
        # see gh-9798
        pytest.raises(ValueError, self.read_csv,
                      StringIO(self.ts_data), index_col=True)

    def test_infer_index_col(self):
        data = """A,B,C
foo,1,2,3
bar,4,5,6
baz,7,8,9
"""
        data = self.read_csv(StringIO(data))
        assert data.index.equals(Index(['foo', 'bar', 'baz']))

    def test_empty_index_col_scenarios(self):
        data = 'x,y,z'

        # None, no index
        index_col, expected = None, DataFrame([], columns=list('xyz')),
        tm.assert_frame_equal(self.read_csv(
            StringIO(data), index_col=index_col), expected)

        # False, no index
        index_col, expected = False, DataFrame([], columns=list('xyz')),
        tm.assert_frame_equal(self.read_csv(
            StringIO(data), index_col=index_col), expected)

        # int, first column
        index_col, expected = 0, DataFrame(
            [], columns=['y', 'z'], index=Index([], name='x'))
        tm.assert_frame_equal(self.read_csv(
            StringIO(data), index_col=index_col), expected)

        # int, not first column
        index_col, expected = 1, DataFrame(
            [], columns=['x', 'z'], index=Index([], name='y'))
        tm.assert_frame_equal(self.read_csv(
            StringIO(data), index_col=index_col), expected)

        # str, first column
        index_col, expected = 'x', DataFrame(
            [], columns=['y', 'z'], index=Index([], name='x'))
        tm.assert_frame_equal(self.read_csv(
            StringIO(data), index_col=index_col), expected)

        # str, not the first column
        index_col, expected = 'y', DataFrame(
            [], columns=['x', 'z'], index=Index([], name='y'))
        tm.assert_frame_equal(self.read_csv(
            StringIO(data), index_col=index_col), expected)

        # list of int
        index_col, expected = [0, 1], DataFrame(
            [], columns=['z'], index=MultiIndex.from_arrays(
                [[]] * 2, names=['x', 'y']))
        tm.assert_frame_equal(self.read_csv(
            StringIO(data), index_col=index_col),
            expected, check_index_type=False)

        # list of str
        index_col = ['x', 'y']
        expected = DataFrame([], columns=['z'],
                             index=MultiIndex.from_arrays(
                                 [[]] * 2, names=['x', 'y']))
        tm.assert_frame_equal(self.read_csv(StringIO(
            data), index_col=index_col),
            expected, check_index_type=False)

        # list of int, reversed sequence
        index_col = [1, 0]
        expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays(
            [[]] * 2, names=['y', 'x']))
        tm.assert_frame_equal(self.read_csv(
            StringIO(data), index_col=index_col),
            expected, check_index_type=False)

        # list of str, reversed sequence
        index_col = ['y', 'x']
        expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays(
            [[]] * 2, names=['y', 'x']))
        tm.assert_frame_equal(self.read_csv(StringIO(
            data), index_col=index_col),
            expected, check_index_type=False)

    def test_empty_with_index_col_false(self):
        # see gh-10413
        data = 'x,y'
        result = self.read_csv(StringIO(data), index_col=False)
        expected = DataFrame([], columns=['x', 'y'])
        tm.assert_frame_equal(result, expected)