Repository URL to install this package:
Version:
1.0.5 ▾
|
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import DataFrame, Series
import pandas._testing as tm
class TestSeriesCombine:
def test_combine_scalar(self):
# GH 21248
# Note - combine() with another Series is tested elsewhere because
# it is used when testing operators
s = pd.Series([i * 10 for i in range(5)])
result = s.combine(3, lambda x, y: x + y)
expected = pd.Series([i * 10 + 3 for i in range(5)])
tm.assert_series_equal(result, expected)
result = s.combine(22, lambda x, y: min(x, y))
expected = pd.Series([min(i * 10, 22) for i in range(5)])
tm.assert_series_equal(result, expected)
def test_combine_first(self):
values = tm.makeIntIndex(20).values.astype(float)
series = Series(values, index=tm.makeIntIndex(20))
series_copy = series * 2
series_copy[::2] = np.NaN
# nothing used from the input
combined = series.combine_first(series_copy)
tm.assert_series_equal(combined, series)
# Holes filled from input
combined = series_copy.combine_first(series)
assert np.isfinite(combined).all()
tm.assert_series_equal(combined[::2], series[::2])
tm.assert_series_equal(combined[1::2], series_copy[1::2])
# mixed types
index = tm.makeStringIndex(20)
floats = Series(tm.randn(20), index=index)
strings = Series(tm.makeStringIndex(10), index=index[::2])
combined = strings.combine_first(floats)
tm.assert_series_equal(strings, combined.loc[index[::2]])
tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]])
# corner case
s = Series([1.0, 2, 3], index=[0, 1, 2])
empty = Series([], index=[], dtype=object)
result = s.combine_first(empty)
s.index = s.index.astype("O")
tm.assert_series_equal(s, result)
def test_update(self):
s = Series([1.5, np.nan, 3.0, 4.0, np.nan])
s2 = Series([np.nan, 3.5, np.nan, 5.0])
s.update(s2)
expected = Series([1.5, 3.5, 3.0, 5.0, np.nan])
tm.assert_series_equal(s, expected)
# GH 3217
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
df["c"] = np.nan
df["c"].update(Series(["foo"], index=[0]))
expected = DataFrame(
[[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"other, dtype, expected",
[
# other is int
([61, 63], "int32", pd.Series([10, 61, 12], dtype="int32")),
([61, 63], "int64", pd.Series([10, 61, 12])),
([61, 63], float, pd.Series([10.0, 61.0, 12.0])),
([61, 63], object, pd.Series([10, 61, 12], dtype=object)),
# other is float, but can be cast to int
([61.0, 63.0], "int32", pd.Series([10, 61, 12], dtype="int32")),
([61.0, 63.0], "int64", pd.Series([10, 61, 12])),
([61.0, 63.0], float, pd.Series([10.0, 61.0, 12.0])),
([61.0, 63.0], object, pd.Series([10, 61.0, 12], dtype=object)),
# others is float, cannot be cast to int
([61.1, 63.1], "int32", pd.Series([10.0, 61.1, 12.0])),
([61.1, 63.1], "int64", pd.Series([10.0, 61.1, 12.0])),
([61.1, 63.1], float, pd.Series([10.0, 61.1, 12.0])),
([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)),
# other is object, cannot be cast
([(61,), (63,)], "int32", pd.Series([10, (61,), 12])),
([(61,), (63,)], "int64", pd.Series([10, (61,), 12])),
([(61,), (63,)], float, pd.Series([10.0, (61,), 12.0])),
([(61,), (63,)], object, pd.Series([10, (61,), 12])),
],
)
def test_update_dtypes(self, other, dtype, expected):
s = Series([10, 11, 12], dtype=dtype)
other = Series(other, index=[1, 3])
s.update(other)
tm.assert_series_equal(s, expected)
def test_concat_empty_series_dtypes_roundtrips(self):
# round-tripping with self & like self
dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"])
for dtype in dtypes:
assert pd.concat([Series(dtype=dtype)]).dtype == dtype
assert pd.concat([Series(dtype=dtype), Series(dtype=dtype)]).dtype == dtype
def int_result_type(dtype, dtype2):
typs = {dtype.kind, dtype2.kind}
if not len(typs - {"i", "u", "b"}) and (
dtype.kind == "i" or dtype2.kind == "i"
):
return "i"
elif not len(typs - {"u", "b"}) and (
dtype.kind == "u" or dtype2.kind == "u"
):
return "u"
return None
def float_result_type(dtype, dtype2):
typs = {dtype.kind, dtype2.kind}
if not len(typs - {"f", "i", "u"}) and (
dtype.kind == "f" or dtype2.kind == "f"
):
return "f"
return None
def get_result_type(dtype, dtype2):
result = float_result_type(dtype, dtype2)
if result is not None:
return result
result = int_result_type(dtype, dtype2)
if result is not None:
return result
return "O"
for dtype in dtypes:
for dtype2 in dtypes:
if dtype == dtype2:
continue
expected = get_result_type(dtype, dtype2)
result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
assert result.kind == expected
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
ser1 = pd.Series(
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
name="ser1",
)
ser2 = pd.Series(
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture),
index=[2, 3, 4],
name="ser2",
)
result = ser1.combine_first(ser2)
exp_vals = pd.DatetimeIndex(
["20150101", "20150102", "20150103", "20160515", "20160516"],
tz=tz_naive_fixture,
)
exp = pd.Series(exp_vals, name="ser1")
tm.assert_series_equal(exp, result)
def test_concat_empty_series_dtypes(self):
# booleans
assert (
pd.concat([Series(dtype=np.bool_), Series(dtype=np.int32)]).dtype
== np.int32
)
assert (
pd.concat([Series(dtype=np.bool_), Series(dtype=np.float32)]).dtype
== np.object_
)
# datetime-like
assert (
pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.bool)]).dtype
== np.object_
)
assert (
pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.int64)]).dtype
== np.object_
)
assert (
pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.bool)]).dtype
== np.object_
)
assert (
pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.int64)]).dtype
== np.object_
)
assert (
pd.concat(
[Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)]
).dtype
== np.object_
)
# categorical
assert (
pd.concat([Series(dtype="category"), Series(dtype="category")]).dtype
== "category"
)
# GH 18515
assert (
pd.concat(
[Series(np.array([]), dtype="category"), Series(dtype="float64")]
).dtype
== "float64"
)
assert (
pd.concat([Series(dtype="category"), Series(dtype="object")]).dtype
== "object"
)
# sparse
# TODO: move?
result = pd.concat(
[
Series(dtype="float64").astype("Sparse"),
Series(dtype="float64").astype("Sparse"),
]
)
assert result.dtype == "Sparse[float64]"
result = pd.concat(
[Series(dtype="float64").astype("Sparse"), Series(dtype="float64")]
)
# TODO: release-note: concat sparse dtype
expected = pd.SparseDtype(np.float64)
assert result.dtype == expected
result = pd.concat(
[Series(dtype="float64").astype("Sparse"), Series(dtype="object")]
)
# TODO: release-note: concat sparse dtype
expected = pd.SparseDtype("object")
assert result.dtype == expected
def test_combine_first_dt64(self):
from pandas.core.tools.datetimes import to_datetime
s0 = to_datetime(Series(["2010", np.NaN]))
s1 = to_datetime(Series([np.NaN, "2011"]))
rs = s0.combine_first(s1)
xp = to_datetime(Series(["2010", "2011"]))
tm.assert_series_equal(rs, xp)
s0 = to_datetime(Series(["2010", np.NaN]))
s1 = Series([np.NaN, "2011"])
rs = s0.combine_first(s1)
xp = Series([datetime(2010, 1, 1), "2011"])
tm.assert_series_equal(rs, xp)