Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
pandas / tests / io / parser / test_converters.py
Size: Mime:
# -*- coding: utf-8 -*-

"""
Tests column conversion functionality during parsing
for all of the parsers defined in parsers.py
"""

import numpy as np
import pytest

from pandas.compat import StringIO, lmap, parse_date

import pandas as pd
from pandas import DataFrame, Index
import pandas.util.testing as tm


def test_converters_type_must_be_dict(all_parsers):
    parser = all_parsers
    data = """index,A,B,C,D
foo,2,3,4,5
"""

    with pytest.raises(TypeError, match="Type converters.+"):
        parser.read_csv(StringIO(data), converters=0)


@pytest.mark.parametrize("column", [3, "D"])
@pytest.mark.parametrize("converter", [
    parse_date,
    lambda x: int(x.split("/")[2])  # Produce integer.
])
def test_converters(all_parsers, column, converter):
    parser = all_parsers
    data = """A,B,C,D
a,1,2,01/01/2009
b,3,4,01/02/2009
c,4,5,01/03/2009
"""
    result = parser.read_csv(StringIO(data), converters={column: converter})

    expected = parser.read_csv(StringIO(data))
    expected["D"] = expected["D"].map(converter)

    tm.assert_frame_equal(result, expected)


def test_converters_no_implicit_conv(all_parsers):
    # see gh-2184
    parser = all_parsers
    data = """000102,1.2,A\n001245,2,B"""

    converters = {0: lambda x: x.strip()}
    result = parser.read_csv(StringIO(data), header=None,
                             converters=converters)

    # Column 0 should not be casted to numeric and should remain as object.
    expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
    tm.assert_frame_equal(result, expected)


def test_converters_euro_decimal_format(all_parsers):
    # see gh-583
    converters = dict()
    parser = all_parsers

    data = """Id;Number1;Number2;Text1;Text2;Number3
1;1521,1541;187101,9543;ABC;poi;4,7387
2;121,12;14897,76;DEF;uyt;0,3773
3;878,158;108013,434;GHI;rez;2,7356"""
    converters["Number1"] = converters["Number2"] =\
        converters["Number3"] = lambda x: float(x.replace(",", "."))

    result = parser.read_csv(StringIO(data), sep=";", converters=converters)
    expected = DataFrame([[1, 1521.1541, 187101.9543, "ABC", "poi", 4.7387],
                          [2, 121.12, 14897.76, "DEF", "uyt", 0.3773],
                          [3, 878.158, 108013.434, "GHI", "rez", 2.7356]],
                         columns=["Id", "Number1", "Number2",
                                  "Text1", "Text2", "Number3"])
    tm.assert_frame_equal(result, expected)


def test_converters_corner_with_nans(all_parsers):
    parser = all_parsers
    data = """id,score,days
1,2,12
2,2-5,
3,,14+
4,6-12,2"""

    # Example converters.
    def convert_days(x):
        x = x.strip()

        if not x:
            return np.nan

        is_plus = x.endswith("+")

        if is_plus:
            x = int(x[:-1]) + 1
        else:
            x = int(x)

        return x

    def convert_days_sentinel(x):
        x = x.strip()

        if not x:
            return np.nan

        is_plus = x.endswith("+")

        if is_plus:
            x = int(x[:-1]) + 1
        else:
            x = int(x)

        return x

    def convert_score(x):
        x = x.strip()

        if not x:
            return np.nan

        if x.find("-") > 0:
            val_min, val_max = lmap(int, x.split("-"))
            val = 0.5 * (val_min + val_max)
        else:
            val = float(x)

        return val

    results = []

    for day_converter in [convert_days, convert_days_sentinel]:
        result = parser.read_csv(StringIO(data),
                                 converters={"score": convert_score,
                                             "days": day_converter},
                                 na_values=["", None])
        assert pd.isna(result["days"][1])
        results.append(result)

    tm.assert_frame_equal(results[0], results[1])


def test_converter_index_col_bug(all_parsers):
    # see gh-1835
    parser = all_parsers
    data = "A;B\n1;2\n3;4"

    rs = parser.read_csv(StringIO(data), sep=";", index_col="A",
                         converters={"A": lambda x: x})

    xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
    tm.assert_frame_equal(rs, xp)