Repository URL to install this package:
Version:
0.24.2 ▾
|
# -*- coding: utf-8 -*-
"""
Tests column conversion functionality during parsing
for all of the parsers defined in parsers.py
"""
import numpy as np
import pytest
from pandas.compat import StringIO, lmap, parse_date
import pandas as pd
from pandas import DataFrame, Index
import pandas.util.testing as tm
def test_converters_type_must_be_dict(all_parsers):
parser = all_parsers
data = """index,A,B,C,D
foo,2,3,4,5
"""
with pytest.raises(TypeError, match="Type converters.+"):
parser.read_csv(StringIO(data), converters=0)
@pytest.mark.parametrize("column", [3, "D"])
@pytest.mark.parametrize("converter", [
parse_date,
lambda x: int(x.split("/")[2]) # Produce integer.
])
def test_converters(all_parsers, column, converter):
parser = all_parsers
data = """A,B,C,D
a,1,2,01/01/2009
b,3,4,01/02/2009
c,4,5,01/03/2009
"""
result = parser.read_csv(StringIO(data), converters={column: converter})
expected = parser.read_csv(StringIO(data))
expected["D"] = expected["D"].map(converter)
tm.assert_frame_equal(result, expected)
def test_converters_no_implicit_conv(all_parsers):
# see gh-2184
parser = all_parsers
data = """000102,1.2,A\n001245,2,B"""
converters = {0: lambda x: x.strip()}
result = parser.read_csv(StringIO(data), header=None,
converters=converters)
# Column 0 should not be casted to numeric and should remain as object.
expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
tm.assert_frame_equal(result, expected)
def test_converters_euro_decimal_format(all_parsers):
# see gh-583
converters = dict()
parser = all_parsers
data = """Id;Number1;Number2;Text1;Text2;Number3
1;1521,1541;187101,9543;ABC;poi;4,7387
2;121,12;14897,76;DEF;uyt;0,3773
3;878,158;108013,434;GHI;rez;2,7356"""
converters["Number1"] = converters["Number2"] =\
converters["Number3"] = lambda x: float(x.replace(",", "."))
result = parser.read_csv(StringIO(data), sep=";", converters=converters)
expected = DataFrame([[1, 1521.1541, 187101.9543, "ABC", "poi", 4.7387],
[2, 121.12, 14897.76, "DEF", "uyt", 0.3773],
[3, 878.158, 108013.434, "GHI", "rez", 2.7356]],
columns=["Id", "Number1", "Number2",
"Text1", "Text2", "Number3"])
tm.assert_frame_equal(result, expected)
def test_converters_corner_with_nans(all_parsers):
parser = all_parsers
data = """id,score,days
1,2,12
2,2-5,
3,,14+
4,6-12,2"""
# Example converters.
def convert_days(x):
x = x.strip()
if not x:
return np.nan
is_plus = x.endswith("+")
if is_plus:
x = int(x[:-1]) + 1
else:
x = int(x)
return x
def convert_days_sentinel(x):
x = x.strip()
if not x:
return np.nan
is_plus = x.endswith("+")
if is_plus:
x = int(x[:-1]) + 1
else:
x = int(x)
return x
def convert_score(x):
x = x.strip()
if not x:
return np.nan
if x.find("-") > 0:
val_min, val_max = lmap(int, x.split("-"))
val = 0.5 * (val_min + val_max)
else:
val = float(x)
return val
results = []
for day_converter in [convert_days, convert_days_sentinel]:
result = parser.read_csv(StringIO(data),
converters={"score": convert_score,
"days": day_converter},
na_values=["", None])
assert pd.isna(result["days"][1])
results.append(result)
tm.assert_frame_equal(results[0], results[1])
def test_converter_index_col_bug(all_parsers):
# see gh-1835
parser = all_parsers
data = "A;B\n1;2\n3;4"
rs = parser.read_csv(StringIO(data), sep=";", index_col="A",
converters={"A": lambda x: x})
xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
tm.assert_frame_equal(rs, xp)