Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
modin / pandas / series_utils.py
Size: Mime:
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""
Implement Series's accessors public API as pandas does.

Accessors: `Series.cat`, `Series.str`, `Series.dt`
"""

import sys
import numpy as np
import pandas
from modin.logging import LoggerMetaClass
from modin.utils import _inherit_docstrings
from .series import Series

if sys.version_info[0] == 3 and sys.version_info[1] >= 7:
    # Python >= 3.7
    from re import Pattern as _pattern_type
else:
    # Python <= 3.6
    from re import _pattern_type


@_inherit_docstrings(pandas.core.arrays.categorical.CategoricalAccessor)
class CategoryMethods(object, metaclass=LoggerMetaClass):
    def __init__(self, series):
        self._series = series
        self._query_compiler = series._query_compiler

    @property
    def categories(self):
        return self._series._default_to_pandas(pandas.Series.cat).categories

    @categories.setter
    def categories(self, categories):
        def set_categories(series, categories):
            series.cat.categories = categories

        self._series._default_to_pandas(set_categories, categories=categories)

    @property
    def ordered(self):
        return self._series._default_to_pandas(pandas.Series.cat).ordered

    @property
    def codes(self):
        return Series(query_compiler=self._query_compiler.cat_codes())

    def rename_categories(self, new_categories, inplace=False):
        return self._default_to_pandas(
            pandas.Series.cat.rename_categories, new_categories, inplace=inplace
        )

    def reorder_categories(self, new_categories, ordered=None, inplace=False):
        return self._default_to_pandas(
            pandas.Series.cat.reorder_categories,
            new_categories,
            ordered=ordered,
            inplace=inplace,
        )

    def add_categories(self, new_categories, inplace=False):
        return self._default_to_pandas(
            pandas.Series.cat.add_categories, new_categories, inplace=inplace
        )

    def remove_categories(self, removals, inplace=False):
        return self._default_to_pandas(
            pandas.Series.cat.remove_categories, removals, inplace=inplace
        )

    def remove_unused_categories(self, inplace=False):
        return self._default_to_pandas(
            pandas.Series.cat.remove_unused_categories, inplace=inplace
        )

    def set_categories(self, new_categories, ordered=None, rename=False, inplace=False):
        return self._default_to_pandas(
            pandas.Series.cat.set_categories,
            new_categories,
            ordered=ordered,
            rename=rename,
            inplace=inplace,
        )

    def as_ordered(self, inplace=False):
        return self._default_to_pandas(pandas.Series.cat.as_ordered, inplace=inplace)

    def as_unordered(self, inplace=False):
        return self._default_to_pandas(pandas.Series.cat.as_unordered, inplace=inplace)

    def _default_to_pandas(self, op, *args, **kwargs):
        """
        Convert `self` to pandas type and call a pandas cat.`op` on it.

        Parameters
        ----------
        op : str
            Name of pandas function.
        *args : list
            Additional positional arguments to be passed in `op`.
        **kwargs : dict
            Additional keywords arguments to be passed in `op`.

        Returns
        -------
        object
            Result of operation.
        """
        return self._series._default_to_pandas(
            lambda series: op(series.cat, *args, **kwargs)
        )


@_inherit_docstrings(pandas.core.strings.StringMethods)
class StringMethods(object, metaclass=LoggerMetaClass):
    def __init__(self, series):
        # Check if dtypes is objects

        self._series = series
        self._query_compiler = series._query_compiler

    def casefold(self):
        return self._default_to_pandas(pandas.Series.str.casefold)

    def cat(self, others=None, sep=None, na_rep=None, join=None):
        if isinstance(others, Series):
            others = others._to_pandas()
        return self._default_to_pandas(
            pandas.Series.str.cat, others=others, sep=sep, na_rep=na_rep, join=join
        )

    def decode(self, encoding, errors="strict"):
        return self._default_to_pandas(
            pandas.Series.str.decode, encoding, errors=errors
        )

    def split(self, pat=None, n=-1, expand=False):
        if not pat and pat is not None:
            raise ValueError("split() requires a non-empty pattern match.")

        if expand:
            return self._default_to_pandas(
                pandas.Series.str.split, pat=pat, n=n, expand=expand
            )
        else:
            return Series(
                query_compiler=self._query_compiler.str_split(
                    pat=pat, n=n, expand=expand
                )
            )

    def rsplit(self, pat=None, n=-1, expand=False):
        if not pat and pat is not None:
            raise ValueError("rsplit() requires a non-empty pattern match.")

        if expand:
            return self._default_to_pandas(
                pandas.Series.str.rsplit, pat=pat, n=n, expand=expand
            )
        else:
            return Series(
                query_compiler=self._query_compiler.str_rsplit(
                    pat=pat, n=n, expand=expand
                )
            )

    def get(self, i):
        return Series(query_compiler=self._query_compiler.str_get(i))

    def join(self, sep):
        if sep is None:
            raise AttributeError("'NoneType' object has no attribute 'join'")
        return Series(query_compiler=self._query_compiler.str_join(sep))

    def get_dummies(self, sep="|"):
        return self._default_to_pandas(pandas.Series.str.get_dummies, sep=sep)

    def contains(self, pat, case=True, flags=0, na=np.NaN, regex=True):
        if pat is None and not case:
            raise AttributeError("'NoneType' object has no attribute 'upper'")
        return Series(
            query_compiler=self._query_compiler.str_contains(
                pat, case=case, flags=flags, na=na, regex=regex
            )
        )

    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
        if not (isinstance(repl, str) or callable(repl)):
            raise TypeError("repl must be a string or callable")
        return Series(
            query_compiler=self._query_compiler.str_replace(
                pat, repl, n=n, case=case, flags=flags, regex=regex
            )
        )

    def pad(self, width, side="left", fillchar=" "):
        if len(fillchar) != 1:
            raise TypeError("fillchar must be a character, not str")
        return Series(
            query_compiler=self._query_compiler.str_pad(
                width, side=side, fillchar=fillchar
            )
        )

    def center(self, width, fillchar=" "):
        if len(fillchar) != 1:
            raise TypeError("fillchar must be a character, not str")
        return Series(
            query_compiler=self._query_compiler.str_center(width, fillchar=fillchar)
        )

    def ljust(self, width, fillchar=" "):
        if len(fillchar) != 1:
            raise TypeError("fillchar must be a character, not str")
        return Series(
            query_compiler=self._query_compiler.str_ljust(width, fillchar=fillchar)
        )

    def rjust(self, width, fillchar=" "):
        if len(fillchar) != 1:
            raise TypeError("fillchar must be a character, not str")
        return Series(
            query_compiler=self._query_compiler.str_rjust(width, fillchar=fillchar)
        )

    def zfill(self, width):
        return Series(query_compiler=self._query_compiler.str_zfill(width))

    def wrap(self, width, **kwargs):
        if width <= 0:
            raise ValueError("invalid width {} (must be > 0)".format(width))
        return Series(query_compiler=self._query_compiler.str_wrap(width, **kwargs))

    def slice(self, start=None, stop=None, step=None):
        if step == 0:
            raise ValueError("slice step cannot be zero")
        return Series(
            query_compiler=self._query_compiler.str_slice(
                start=start, stop=stop, step=step
            )
        )

    def slice_replace(self, start=None, stop=None, repl=None):
        return Series(
            query_compiler=self._query_compiler.str_slice_replace(
                start=start, stop=stop, repl=repl
            )
        )

    def count(self, pat, flags=0, **kwargs):
        if not isinstance(pat, (str, _pattern_type)):
            raise TypeError("first argument must be string or compiled pattern")
        return Series(
            query_compiler=self._query_compiler.str_count(pat, flags=flags, **kwargs)
        )

    def startswith(self, pat, na=np.NaN):
        return Series(query_compiler=self._query_compiler.str_startswith(pat, na=na))

    def encode(self, encoding, errors="strict"):
        return self._default_to_pandas(
            pandas.Series.str.encode, encoding, errors=errors
        )

    def endswith(self, pat, na=np.NaN):
        return Series(query_compiler=self._query_compiler.str_endswith(pat, na=na))

    def findall(self, pat, flags=0, **kwargs):
        if not isinstance(pat, (str, _pattern_type)):
            raise TypeError("first argument must be string or compiled pattern")
        return Series(
            query_compiler=self._query_compiler.str_findall(pat, flags=flags, **kwargs)
        )

    def match(self, pat, case=True, flags=0, na=np.NaN):
        if not isinstance(pat, (str, _pattern_type)):
            raise TypeError("first argument must be string or compiled pattern")
        return Series(
            query_compiler=self._query_compiler.str_match(pat, flags=flags, na=na)
        )

    def extract(self, pat, flags=0, expand=True):
        return self._default_to_pandas(
            pandas.Series.str.extract, pat, flags=flags, expand=expand
        )

    def extractall(self, pat, flags=0):
        return self._default_to_pandas(pandas.Series.str.extractall, pat, flags=flags)

    def len(self):
        return Series(query_compiler=self._query_compiler.str_len())

    def strip(self, to_strip=None):
        return Series(query_compiler=self._query_compiler.str_strip(to_strip=to_strip))

    def rstrip(self, to_strip=None):
        return Series(query_compiler=self._query_compiler.str_rstrip(to_strip=to_strip))

    def lstrip(self, to_strip=None):
        return Series(query_compiler=self._query_compiler.str_lstrip(to_strip=to_strip))

    def partition(self, sep=" ", expand=True):
        if sep is not None and len(sep) == 0:
            raise ValueError("empty separator")

        if expand:
            return self._default_to_pandas(
                pandas.Series.str.partition, sep=sep, expand=expand
            )
        else:
            return Series(
                query_compiler=self._query_compiler.str_partition(
                    sep=sep, expand=expand
                )
            )

    def repeat(self, repeats):
        return self._default_to_pandas(pandas.Series.str.repeat, repeats)

    def rpartition(self, sep=" ", expand=True):
        if sep is not None and len(sep) == 0:
            raise ValueError("empty separator")

        if expand:
            return self._default_to_pandas(
                pandas.Series.str.rpartition, sep=sep, expand=expand
            )
        else:
            return Series(
                query_compiler=self._query_compiler.str_rpartition(
                    sep=sep, expand=expand
                )
            )

    def lower(self):
        return Series(query_compiler=self._query_compiler.str_lower())

    def upper(self):
        return Series(query_compiler=self._query_compiler.str_upper())

    def title(self):
        return Series(query_compiler=self._query_compiler.str_title())

    def find(self, sub, start=0, end=None):
        if not isinstance(sub, str):
            raise TypeError(
                "expected a string object, not {0}".format(type(sub).__name__)
            )
        return Series(
            query_compiler=self._query_compiler.str_find(sub, start=start, end=end)
        )

    def rfind(self, sub, start=0, end=None):
        if not isinstance(sub, str):
            raise TypeError(
                "expected a string object, not {0}".format(type(sub).__name__)
            )
        return Series(
            query_compiler=self._query_compiler.str_rfind(sub, start=start, end=end)
        )

    def index(self, sub, start=0, end=None):
        if not isinstance(sub, str):
            raise TypeError(
                "expected a string object, not {0}".format(type(sub).__name__)
            )
        return Series(
            query_compiler=self._query_compiler.str_index(sub, start=start, end=end)
        )

    def rindex(self, sub, start=0, end=None):
        if not isinstance(sub, str):
            raise TypeError(
                "expected a string object, not {0}".format(type(sub).__name__)
            )
        return Series(
            query_compiler=self._query_compiler.str_rindex(sub, start=start, end=end)
        )

    def capitalize(self):
        return Series(query_compiler=self._query_compiler.str_capitalize())

    def swapcase(self):
        return Series(query_compiler=self._query_compiler.str_swapcase())

    def normalize(self, form):
        return Series(query_compiler=self._query_compiler.str_normalize(form))

    def translate(self, table):
        return Series(query_compiler=self._query_compiler.str_translate(table))

    def isalnum(self):
        return Series(query_compiler=self._query_compiler.str_isalnum())

    def isalpha(self):
        return Series(query_compiler=self._query_compiler.str_isalpha())

    def isdigit(self):
        return Series(query_compiler=self._query_compiler.str_isdigit())

    def isspace(self):
        return Series(query_compiler=self._query_compiler.str_isspace())

    def islower(self):
        return Series(query_compiler=self._query_compiler.str_islower())

    def isupper(self):
        return Series(query_compiler=self._query_compiler.str_isupper())

    def istitle(self):
        return Series(query_compiler=self._query_compiler.str_istitle())

    def isnumeric(self):
        return Series(query_compiler=self._query_compiler.str_isnumeric())

    def isdecimal(self):
        return Series(query_compiler=self._query_compiler.str_isdecimal())

    def __getitem__(self, key):
        return Series(query_compiler=self._query_compiler.str___getitem__(key))

    def _default_to_pandas(self, op, *args, **kwargs):
        """
        Convert `self` to pandas type and call a pandas str.`op` on it.

        Parameters
        ----------
        op : str
            Name of pandas function.
        *args : list
            Additional positional arguments to be passed in `op`.
        **kwargs : dict
            Additional keywords arguments to be passed in `op`.

        Returns
        -------
        object
            Result of operation.
        """
        return self._series._default_to_pandas(
            lambda series: op(series.str, *args, **kwargs)
        )


@_inherit_docstrings(pandas.core.indexes.accessors.CombinedDatetimelikeProperties)
class DatetimeProperties(object, metaclass=LoggerMetaClass):
    def __init__(self, series):
        self._series = series
        self._query_compiler = series._query_compiler

    @property
    def date(self):
        return Series(query_compiler=self._query_compiler.dt_date())

    @property
    def time(self):
        return Series(query_compiler=self._query_compiler.dt_time())

    @property
    def timetz(self):
        return Series(query_compiler=self._query_compiler.dt_timetz())

    @property
    def year(self):
        return Series(query_compiler=self._query_compiler.dt_year())

    @property
    def month(self):
        return Series(query_compiler=self._query_compiler.dt_month())

    @property
    def day(self):
        return Series(query_compiler=self._query_compiler.dt_day())

    @property
    def hour(self):
        return Series(query_compiler=self._query_compiler.dt_hour())

    @property
    def minute(self):
        return Series(query_compiler=self._query_compiler.dt_minute())

    @property
    def second(self):
        return Series(query_compiler=self._query_compiler.dt_second())

    @property
    def microsecond(self):
        return Series(query_compiler=self._query_compiler.dt_microsecond())

    @property
    def nanosecond(self):
        return Series(query_compiler=self._query_compiler.dt_nanosecond())

    @property
    def week(self):
        return Series(query_compiler=self._query_compiler.dt_week())

    @property
    def weekofyear(self):
        return Series(query_compiler=self._query_compiler.dt_weekofyear())

    @property
    def dayofweek(self):
        return Series(query_compiler=self._query_compiler.dt_dayofweek())

    @property
    def weekday(self):
        return Series(query_compiler=self._query_compiler.dt_weekday())

    @property
    def dayofyear(self):
        return Series(query_compiler=self._query_compiler.dt_dayofyear())

    @property
    def quarter(self):
        return Series(query_compiler=self._query_compiler.dt_quarter())

    @property
    def is_month_start(self):
        return Series(query_compiler=self._query_compiler.dt_is_month_start())

    @property
    def is_month_end(self):
        return Series(query_compiler=self._query_compiler.dt_is_month_end())

    @property
    def is_quarter_start(self):
        return Series(query_compiler=self._query_compiler.dt_is_quarter_start())

    @property
    def is_quarter_end(self):
        return Series(query_compiler=self._query_compiler.dt_is_quarter_end())

    @property
    def is_year_start(self):
        return Series(query_compiler=self._query_compiler.dt_is_year_start())

    @property
    def is_year_end(self):
        return Series(query_compiler=self._query_compiler.dt_is_year_end())

    @property
    def is_leap_year(self):
        return Series(query_compiler=self._query_compiler.dt_is_leap_year())

    @property
    def daysinmonth(self):
        return Series(query_compiler=self._query_compiler.dt_daysinmonth())

    @property
    def days_in_month(self):
        return Series(query_compiler=self._query_compiler.dt_days_in_month())

    @property
    def tz(self):
        return self._query_compiler.dt_tz().to_pandas().squeeze()

    @property
    def freq(self):
        return self._query_compiler.dt_freq().to_pandas().squeeze()

    def to_period(self, *args, **kwargs):
        return Series(query_compiler=self._query_compiler.dt_to_period(*args, **kwargs))

    def to_pydatetime(self):
        return Series(query_compiler=self._query_compiler.dt_to_pydatetime()).to_numpy()

    def tz_localize(self, *args, **kwargs):
        return Series(
            query_compiler=self._query_compiler.dt_tz_localize(*args, **kwargs)
        )

    def tz_convert(self, *args, **kwargs):
        return Series(
            query_compiler=self._query_compiler.dt_tz_convert(*args, **kwargs)
        )

    def normalize(self, *args, **kwargs):
        return Series(query_compiler=self._query_compiler.dt_normalize(*args, **kwargs))

    def strftime(self, *args, **kwargs):
        return Series(query_compiler=self._query_compiler.dt_strftime(*args, **kwargs))

    def round(self, *args, **kwargs):
        return Series(query_compiler=self._query_compiler.dt_round(*args, **kwargs))

    def floor(self, *args, **kwargs):
        return Series(query_compiler=self._query_compiler.dt_floor(*args, **kwargs))

    def ceil(self, *args, **kwargs):
        return Series(query_compiler=self._query_compiler.dt_ceil(*args, **kwargs))

    def month_name(self, *args, **kwargs):
        return Series(
            query_compiler=self._query_compiler.dt_month_name(*args, **kwargs)
        )

    def day_name(self, *args, **kwargs):
        return Series(query_compiler=self._query_compiler.dt_day_name(*args, **kwargs))

    def total_seconds(self, *args, **kwargs):
        return Series(
            query_compiler=self._query_compiler.dt_total_seconds(*args, **kwargs)
        )

    def to_pytimedelta(self):
        return self._query_compiler.default_to_pandas(
            lambda df: pandas.Series.dt.to_pytimedelta(df.squeeze(axis=1).dt)
        )

    @property
    def seconds(self):
        return Series(query_compiler=self._query_compiler.dt_seconds())

    @property
    def days(self):
        return Series(query_compiler=self._query_compiler.dt_days())

    @property
    def microseconds(self):
        return Series(query_compiler=self._query_compiler.dt_microseconds())

    @property
    def nanoseconds(self):
        return Series(query_compiler=self._query_compiler.dt_nanoseconds())

    @property
    def components(self):
        from .dataframe import DataFrame

        return DataFrame(query_compiler=self._query_compiler.dt_components())

    @property
    def qyear(self):
        return Series(query_compiler=self._query_compiler.dt_qyear())

    @property
    def start_time(self):
        return Series(query_compiler=self._query_compiler.dt_start_time())

    @property
    def end_time(self):
        return Series(query_compiler=self._query_compiler.dt_end_time())

    def to_timestamp(self, *args, **kwargs):
        return Series(
            query_compiler=self._query_compiler.dt_to_timestamp(*args, **kwargs)
        )