Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

/ core / tools / datetimes.py

from datetime import datetime, time
from functools import partial

import numpy as np

from pandas._libs import tslib, tslibs
from pandas._libs.tslibs import Timestamp, conversion, parsing
from pandas._libs.tslibs.parsing import (  # noqa
    DateParseError, _format_is_iso, _guess_datetime_format, parse_time_string)
from pandas._libs.tslibs.strptime import array_strptime
from pandas.compat import zip

from pandas.core.dtypes.common import (
    ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype,
    is_datetime64tz_dtype, is_float, is_integer, is_integer_dtype,
    is_list_like, is_numeric_dtype, is_object_dtype, is_scalar)
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import notna

from pandas import compat
from pandas.core import algorithms


def _guess_datetime_format_for_array(arr, **kwargs):
    # Try to guess the format based on the first non-NaN element
    non_nan_elements = notna(arr).nonzero()[0]
    if len(non_nan_elements):
        return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)


def _maybe_cache(arg, format, cache, convert_listlike):
    """
    Create a cache of unique dates from an array of dates

    Parameters
    ----------
    arg : integer, float, string, datetime, list, tuple, 1-d array, Series
    format : string
        Strftime format to parse time
    cache : boolean
        True attempts to create a cache of converted values
    convert_listlike : function
        Conversion function to apply on dates

    Returns
    -------
    cache_array : Series
        Cache of converted, unique dates. Can be empty
    """
    from pandas import Series
    cache_array = Series()
    if cache:
        # Perform a quicker unique check
        from pandas import Index
        if not Index(arg).is_unique:
            unique_dates = algorithms.unique(arg)
            cache_dates = convert_listlike(unique_dates, True, format)
            cache_array = Series(cache_dates, index=unique_dates)
    return cache_array


def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
    """
    Convert array of dates with a cache and box the result

    Parameters
    ----------
    arg : integer, float, string, datetime, list, tuple, 1-d array, Series
    cache_array : Series
        Cache of converted, unique dates
    box : boolean
        True boxes result as an Index-like, False returns an ndarray
    errors : string
        'ignore' plus box=True will convert result to Index
    name : string, default None
        Name for a DatetimeIndex

    Returns
    -------
    result : datetime of converted dates
        Returns:

        - Index-like if box=True
        - ndarray if box=False
    """
    from pandas import Series, DatetimeIndex, Index
    result = Series(arg).map(cache_array)
    if box:
        if errors == 'ignore':
            return Index(result, name=name)
        else:
            return DatetimeIndex(result, name=name)
    return result.values


def _return_parsed_timezone_results(result, timezones, box, tz, name):
    """
    Return results from array_strptime if a %z or %Z directive was passed.

    Parameters
    ----------
    result : ndarray
        int64 date representations of the dates
    timezones : ndarray
        pytz timezone objects
    box : boolean
        True boxes result as an Index-like, False returns an ndarray
    tz : object
        None or pytz timezone object
    name : string, default None
        Name for a DatetimeIndex

    Returns
    -------
    tz_result : ndarray of parsed dates with timezone
        Returns:

        - Index-like if box=True
        - ndarray of Timestamps if box=False

    """
    if tz is not None:
        raise ValueError("Cannot pass a tz argument when "
                         "parsing strings with timezone "
                         "information.")
    tz_results = np.array([Timestamp(res).tz_localize(zone) for res, zone
                           in zip(result, timezones)])
    if box:
        from pandas import Index
        return Index(tz_results, name=name)
    return tz_results


def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
                                unit=None, errors=None,
                                infer_datetime_format=None, dayfirst=None,
                                yearfirst=None, exact=None):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parced
    box : boolean
        True boxes result as an Index-like, False returns an ndarray
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    ndarray of parsed dates
        Returns:

        - Index-like if box=True
        - ndarray of Timestamps if box=False
    """
    from pandas import DatetimeIndex
    from pandas.core.arrays import DatetimeArray
    from pandas.core.arrays.datetimes import (
        maybe_convert_dtype, objects_to_datetime64ns)

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype='O')

    # these are shortcutable
    if is_datetime64tz_dtype(arg):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == 'utc':
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg):
        if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, 'values', arg)
        result = tslib.array_with_unit_to_datetime(arg, unit,
                                                   errors=errors)
        if box:
            if errors == 'ignore':
                from pandas import Index
                result = Index(result, name=name)
                # GH 23758: We may still need to localize the result with tz
                try:
                    return result.tz_localize(tz)
                except AttributeError:
                    return result

            return DatetimeIndex(result, tz=tz, name=name)
        return result
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a string, datetime, list, tuple, '
                        '1-d array, or Series')

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    arg, _ = maybe_convert_dtype(arg, copy=False)

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == '%Y%m%d':
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
                    raise ValueError("cannot convert the input to "
                                     "'%Y%m%d' date format")

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(
                        arg, format, exact=exact, errors=errors)
                    if '%Z' in format or '%z' in format:
                        return _return_parsed_timezone_results(
                            result, timezones, box, tz, name)
                except tslibs.OutOfBoundsDatetime:
                    if errors == 'raise':
                        raise
                    elif errors == 'coerce':
                        result = np.empty(arg.shape, dtype='M8[ns]')
                        iresult = result.view('i8')
                        iresult.fill(tslibs.iNaT)
                    else:
                        result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == 'raise':
                            raise
                        elif errors == 'coerce':
                            result = np.empty(arg.shape, dtype='M8[ns]')
                            iresult = result.view('i8')
                            iresult.fill(tslibs.iNaT)
                        else:
                            result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == 'utc'
        result, tz_parsed = objects_to_datetime64ns(
            arg, dayfirst=dayfirst, yearfirst=yearfirst,
            utc=utc, errors=errors, require_iso8601=require_iso8601,
            allow_object=True)

    if tz_parsed is not None:
        if box:
            # We can take a shortcut since the datetime64 numpy array
            # is in UTC
            return DatetimeIndex._simple_new(result, name=name,
                                             tz=tz_parsed)
        else:
            # Convert the datetime64 numpy array to an numpy array
            # of datetime objects
            result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
                      for ts in result]
            return np.array(result, dtype=object)

    if box:
        # Ensure we return an Index in all cases where box=True
        if is_datetime64_dtype(result):
            return DatetimeIndex(result, tz=tz, name=name)
        elif is_object_dtype(result):
            # e.g. an Index of datetime objects
            from pandas import Index
            return Index(result, name=name)
    return result


def _adjust_to_origin(arg, origin, unit):
    """
    Helper function for to_datetime.
    Adjust input argument to the specified origin

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be adjusted
    origin : 'julian' or Timestamp
        origin offset for the arg
    unit : string
        passed unit from to_datetime, must be 'D'

    Returns
    -------
    ndarray or scalar of adjusted date(s)
    """
Loading ...