from datetime import datetime, time
from functools import partial
import numpy as np
from pandas._libs import tslib, tslibs
from pandas._libs.tslibs import Timestamp, conversion, parsing
from pandas._libs.tslibs.parsing import ( # noqa
DateParseError, _format_is_iso, _guess_datetime_format, parse_time_string)
from pandas._libs.tslibs.strptime import array_strptime
from pandas.compat import zip
from pandas.core.dtypes.common import (
ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype,
is_datetime64tz_dtype, is_float, is_integer, is_integer_dtype,
is_list_like, is_numeric_dtype, is_object_dtype, is_scalar)
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import notna
from pandas import compat
from pandas.core import algorithms
def _guess_datetime_format_for_array(arr, **kwargs):
# Try to guess the format based on the first non-NaN element
non_nan_elements = notna(arr).nonzero()[0]
if len(non_nan_elements):
return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
def _maybe_cache(arg, format, cache, convert_listlike):
"""
Create a cache of unique dates from an array of dates
Parameters
----------
arg : integer, float, string, datetime, list, tuple, 1-d array, Series
format : string
Strftime format to parse time
cache : boolean
True attempts to create a cache of converted values
convert_listlike : function
Conversion function to apply on dates
Returns
-------
cache_array : Series
Cache of converted, unique dates. Can be empty
"""
from pandas import Series
cache_array = Series()
if cache:
# Perform a quicker unique check
from pandas import Index
if not Index(arg).is_unique:
unique_dates = algorithms.unique(arg)
cache_dates = convert_listlike(unique_dates, True, format)
cache_array = Series(cache_dates, index=unique_dates)
return cache_array
def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
"""
Convert array of dates with a cache and box the result
Parameters
----------
arg : integer, float, string, datetime, list, tuple, 1-d array, Series
cache_array : Series
Cache of converted, unique dates
box : boolean
True boxes result as an Index-like, False returns an ndarray
errors : string
'ignore' plus box=True will convert result to Index
name : string, default None
Name for a DatetimeIndex
Returns
-------
result : datetime of converted dates
Returns:
- Index-like if box=True
- ndarray if box=False
"""
from pandas import Series, DatetimeIndex, Index
result = Series(arg).map(cache_array)
if box:
if errors == 'ignore':
return Index(result, name=name)
else:
return DatetimeIndex(result, name=name)
return result.values
def _return_parsed_timezone_results(result, timezones, box, tz, name):
"""
Return results from array_strptime if a %z or %Z directive was passed.
Parameters
----------
result : ndarray
int64 date representations of the dates
timezones : ndarray
pytz timezone objects
box : boolean
True boxes result as an Index-like, False returns an ndarray
tz : object
None or pytz timezone object
name : string, default None
Name for a DatetimeIndex
Returns
-------
tz_result : ndarray of parsed dates with timezone
Returns:
- Index-like if box=True
- ndarray of Timestamps if box=False
"""
if tz is not None:
raise ValueError("Cannot pass a tz argument when "
"parsing strings with timezone "
"information.")
tz_results = np.array([Timestamp(res).tz_localize(zone) for res, zone
in zip(result, timezones)])
if box:
from pandas import Index
return Index(tz_results, name=name)
return tz_results
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
unit=None, errors=None,
infer_datetime_format=None, dayfirst=None,
yearfirst=None, exact=None):
"""
Helper function for to_datetime. Performs the conversions of 1D listlike
of dates
Parameters
----------
arg : list, tuple, ndarray, Series, Index
date to be parced
box : boolean
True boxes result as an Index-like, False returns an ndarray
name : object
None or string for the Index name
tz : object
None or 'utc'
unit : string
None or string of the frequency of the passed data
errors : string
error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
infer_datetime_format : boolean
inferring format behavior from to_datetime
dayfirst : boolean
dayfirst parsing behavior from to_datetime
yearfirst : boolean
yearfirst parsing behavior from to_datetime
exact : boolean
exact format matching behavior from to_datetime
Returns
-------
ndarray of parsed dates
Returns:
- Index-like if box=True
- ndarray of Timestamps if box=False
"""
from pandas import DatetimeIndex
from pandas.core.arrays import DatetimeArray
from pandas.core.arrays.datetimes import (
maybe_convert_dtype, objects_to_datetime64ns)
if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype='O')
# these are shortcutable
if is_datetime64tz_dtype(arg):
if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
return DatetimeIndex(arg, tz=tz, name=name)
if tz == 'utc':
arg = arg.tz_convert(None).tz_localize(tz)
return arg
elif is_datetime64_ns_dtype(arg):
if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
try:
return DatetimeIndex(arg, tz=tz, name=name)
except ValueError:
pass
return arg
elif unit is not None:
if format is not None:
raise ValueError("cannot specify both format and unit")
arg = getattr(arg, 'values', arg)
result = tslib.array_with_unit_to_datetime(arg, unit,
errors=errors)
if box:
if errors == 'ignore':
from pandas import Index
result = Index(result, name=name)
# GH 23758: We may still need to localize the result with tz
try:
return result.tz_localize(tz)
except AttributeError:
return result
return DatetimeIndex(result, tz=tz, name=name)
return result
elif getattr(arg, 'ndim', 1) > 1:
raise TypeError('arg must be a string, datetime, list, tuple, '
'1-d array, or Series')
# warn if passing timedelta64, raise for PeriodDtype
# NB: this must come after unit transformation
orig_arg = arg
arg, _ = maybe_convert_dtype(arg, copy=False)
arg = ensure_object(arg)
require_iso8601 = False
if infer_datetime_format and format is None:
format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
if format is not None:
# There is a special fast-path for iso8601 formatted
# datetime strings, so in those cases don't use the inferred
# format because this path makes process slower in this
# special case
format_is_iso8601 = _format_is_iso(format)
if format_is_iso8601:
require_iso8601 = not infer_datetime_format
format = None
tz_parsed = None
result = None
if format is not None:
try:
# shortcut formatting here
if format == '%Y%m%d':
try:
# pass orig_arg as float-dtype may have been converted to
# datetime64[ns]
orig_arg = ensure_object(orig_arg)
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
raise ValueError("cannot convert the input to "
"'%Y%m%d' date format")
# fallback
if result is None:
try:
result, timezones = array_strptime(
arg, format, exact=exact, errors=errors)
if '%Z' in format or '%z' in format:
return _return_parsed_timezone_results(
result, timezones, box, tz, name)
except tslibs.OutOfBoundsDatetime:
if errors == 'raise':
raise
elif errors == 'coerce':
result = np.empty(arg.shape, dtype='M8[ns]')
iresult = result.view('i8')
iresult.fill(tslibs.iNaT)
else:
result = arg
except ValueError:
# if format was inferred, try falling back
# to array_to_datetime - terminate here
# for specified formats
if not infer_datetime_format:
if errors == 'raise':
raise
elif errors == 'coerce':
result = np.empty(arg.shape, dtype='M8[ns]')
iresult = result.view('i8')
iresult.fill(tslibs.iNaT)
else:
result = arg
except ValueError as e:
# Fallback to try to convert datetime objects if timezone-aware
# datetime objects are found without passing `utc=True`
try:
values, tz = conversion.datetime_to_datetime64(arg)
return DatetimeIndex._simple_new(values, name=name, tz=tz)
except (ValueError, TypeError):
raise e
if result is None:
assert format is None or infer_datetime_format
utc = tz == 'utc'
result, tz_parsed = objects_to_datetime64ns(
arg, dayfirst=dayfirst, yearfirst=yearfirst,
utc=utc, errors=errors, require_iso8601=require_iso8601,
allow_object=True)
if tz_parsed is not None:
if box:
# We can take a shortcut since the datetime64 numpy array
# is in UTC
return DatetimeIndex._simple_new(result, name=name,
tz=tz_parsed)
else:
# Convert the datetime64 numpy array to an numpy array
# of datetime objects
result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
for ts in result]
return np.array(result, dtype=object)
if box:
# Ensure we return an Index in all cases where box=True
if is_datetime64_dtype(result):
return DatetimeIndex(result, tz=tz, name=name)
elif is_object_dtype(result):
# e.g. an Index of datetime objects
from pandas import Index
return Index(result, name=name)
return result
def _adjust_to_origin(arg, origin, unit):
"""
Helper function for to_datetime.
Adjust input argument to the specified origin
Parameters
----------
arg : list, tuple, ndarray, Series, Index
date to be adjusted
origin : 'julian' or Timestamp
origin offset for the arg
unit : string
passed unit from to_datetime, must be 'D'
Returns
-------
ndarray or scalar of adjusted date(s)
"""
Loading ...