Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / pandas   python

Repository URL to install this package:

Version: 1.1.1 

/ tests / series / test_internals.py

from datetime import datetime

import numpy as np
import pytest

import pandas as pd
from pandas import NaT, Series, Timestamp
import pandas._testing as tm
from pandas.core.internals.blocks import IntBlock


class TestSeriesInternals:

    # GH 10265
    def test_convert(self):
        # Tests: All to nans, coerce, true
        # Test coercion returns correct type
        s = Series(["a", "b", "c"])
        results = s._convert(datetime=True, coerce=True)
        expected = Series([NaT] * 3)
        tm.assert_series_equal(results, expected)

        results = s._convert(numeric=True, coerce=True)
        expected = Series([np.nan] * 3)
        tm.assert_series_equal(results, expected)

        expected = Series([NaT] * 3, dtype=np.dtype("m8[ns]"))
        results = s._convert(timedelta=True, coerce=True)
        tm.assert_series_equal(results, expected)

        dt = datetime(2001, 1, 1, 0, 0)
        td = dt - datetime(2000, 1, 1, 0, 0)

        # Test coercion with mixed types
        s = Series(["a", "3.1415", dt, td])
        results = s._convert(datetime=True, coerce=True)
        expected = Series([NaT, NaT, dt, NaT])
        tm.assert_series_equal(results, expected)

        results = s._convert(numeric=True, coerce=True)
        expected = Series([np.nan, 3.1415, np.nan, np.nan])
        tm.assert_series_equal(results, expected)

        results = s._convert(timedelta=True, coerce=True)
        expected = Series([NaT, NaT, NaT, td], dtype=np.dtype("m8[ns]"))
        tm.assert_series_equal(results, expected)

        # Test standard conversion returns original
        results = s._convert(datetime=True)
        tm.assert_series_equal(results, s)
        results = s._convert(numeric=True)
        expected = Series([np.nan, 3.1415, np.nan, np.nan])
        tm.assert_series_equal(results, expected)
        results = s._convert(timedelta=True)
        tm.assert_series_equal(results, s)

        # test pass-through and non-conversion when other types selected
        s = Series(["1.0", "2.0", "3.0"])
        results = s._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([1.0, 2.0, 3.0])
        tm.assert_series_equal(results, expected)
        results = s._convert(True, False, True)
        tm.assert_series_equal(results, s)

        s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O")
        results = s._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)])
        tm.assert_series_equal(results, expected)
        results = s._convert(datetime=False, numeric=True, timedelta=True)
        tm.assert_series_equal(results, s)

        td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
        s = Series([td, td], dtype="O")
        results = s._convert(datetime=True, numeric=True, timedelta=True)
        expected = Series([td, td])
        tm.assert_series_equal(results, expected)
        results = s._convert(True, True, False)
        tm.assert_series_equal(results, s)

        s = Series([1.0, 2, 3], index=["a", "b", "c"])
        result = s._convert(numeric=True)
        tm.assert_series_equal(result, s)

        # force numeric conversion
        r = s.copy().astype("O")
        r["a"] = "1"
        result = r._convert(numeric=True)
        tm.assert_series_equal(result, s)

        r = s.copy().astype("O")
        r["a"] = "1."
        result = r._convert(numeric=True)
        tm.assert_series_equal(result, s)

        r = s.copy().astype("O")
        r["a"] = "garbled"
        result = r._convert(numeric=True)
        expected = s.copy()
        expected["a"] = np.nan
        tm.assert_series_equal(result, expected)

        # GH 4119, not converting a mixed type (e.g.floats and object)
        s = Series([1, "na", 3, 4])
        result = s._convert(datetime=True, numeric=True)
        expected = Series([1, np.nan, 3, 4])
        tm.assert_series_equal(result, expected)

        s = Series([1, "", 3, 4])
        result = s._convert(datetime=True, numeric=True)
        tm.assert_series_equal(result, expected)

        # dates
        s = Series(
            [
                datetime(2001, 1, 1, 0, 0),
                datetime(2001, 1, 2, 0, 0),
                datetime(2001, 1, 3, 0, 0),
            ]
        )
        s2 = Series(
            [
                datetime(2001, 1, 1, 0, 0),
                datetime(2001, 1, 2, 0, 0),
                datetime(2001, 1, 3, 0, 0),
                "foo",
                1.0,
                1,
                Timestamp("20010104"),
                "20010105",
            ],
            dtype="O",
        )

        result = s._convert(datetime=True)
        expected = Series(
            [Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")],
            dtype="M8[ns]",
        )
        tm.assert_series_equal(result, expected)

        result = s._convert(datetime=True, coerce=True)
        tm.assert_series_equal(result, expected)

        expected = Series(
            [
                Timestamp("20010101"),
                Timestamp("20010102"),
                Timestamp("20010103"),
                NaT,
                NaT,
                NaT,
                Timestamp("20010104"),
                Timestamp("20010105"),
            ],
            dtype="M8[ns]",
        )
        result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True)
        tm.assert_series_equal(result, expected)
        result = s2._convert(datetime=True, coerce=True)
        tm.assert_series_equal(result, expected)

        s = Series(["foo", "bar", 1, 1.0], dtype="O")
        result = s._convert(datetime=True, coerce=True)
        expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
        tm.assert_series_equal(result, expected)

        # preserver if non-object
        s = Series([1], dtype="float32")
        result = s._convert(datetime=True, coerce=True)
        tm.assert_series_equal(result, s)

        # FIXME: dont leave commented-out
        # r = s.copy()
        # r[0] = np.nan
        # result = r._convert(convert_dates=True,convert_numeric=False)
        # assert result.dtype == 'M8[ns]'

        # dateutil parses some single letters into today's value as a date
        expected = Series([NaT])
        for x in "abcdefghijklmnopqrstuvwxyz":
            s = Series([x])
            result = s._convert(datetime=True, coerce=True)
            tm.assert_series_equal(result, expected)
            s = Series([x.upper()])
            result = s._convert(datetime=True, coerce=True)
            tm.assert_series_equal(result, expected)

    def test_convert_no_arg_error(self):
        s = Series(["1.0", "2"])
        msg = r"At least one of datetime, numeric or timedelta must be True\."
        with pytest.raises(ValueError, match=msg):
            s._convert()

    def test_convert_preserve_bool(self):
        s = Series([1, True, 3, 5], dtype=object)
        r = s._convert(datetime=True, numeric=True)
        e = Series([1, 1, 3, 5], dtype="i8")
        tm.assert_series_equal(r, e)

    def test_convert_preserve_all_bool(self):
        s = Series([False, True, False, False], dtype=object)
        r = s._convert(datetime=True, numeric=True)
        e = Series([False, True, False, False], dtype=bool)
        tm.assert_series_equal(r, e)

    def test_constructor_no_pandas_array(self):
        ser = pd.Series([1, 2, 3])
        result = pd.Series(ser.array)
        tm.assert_series_equal(ser, result)
        assert isinstance(result._mgr.blocks[0], IntBlock)

    def test_astype_no_pandas_dtype(self):
        # https://github.com/pandas-dev/pandas/pull/24866
        ser = pd.Series([1, 2], dtype="int64")
        # Don't have PandasDtype in the public API, so we use `.array.dtype`,
        # which is a PandasDtype.
        result = ser.astype(ser.array.dtype)
        tm.assert_series_equal(result, ser)

    def test_from_array(self):
        result = pd.Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]"))
        assert result._mgr.blocks[0].is_extension is False

        result = pd.Series(pd.array(["2015"], dtype="datetime64[ns]"))
        assert result._mgr.blocks[0].is_extension is False

    def test_from_list_dtype(self):
        result = pd.Series(["1H", "2H"], dtype="timedelta64[ns]")
        assert result._mgr.blocks[0].is_extension is False

        result = pd.Series(["2015"], dtype="datetime64[ns]")
        assert result._mgr.blocks[0].is_extension is False


def test_hasnans_uncached_for_series():
    # GH#19700
    idx = pd.Index([0, 1])
    assert idx.hasnans is False
    assert "hasnans" in idx._cache
    ser = idx.to_series()
    assert ser.hasnans is False
    assert not hasattr(ser, "_cache")
    ser.iloc[-1] = np.nan
    assert ser.hasnans is True
    assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__