Repository URL to install this package:
|
Version:
0.3.2 ▾
|
"""Format helpers"""
import math
import pandas as pd
import pandas.lib as lib
import numpy as np
pd_is_datetime_arraylike = None
try:
from pandas.core.common import is_datetime_arraylike as pd_is_datetime_arraylike
except:
pass
from functools import partial
def is_datetime_arraylike(arr):
if isinstance(arr, pd.DataFrame):
return arr.apply(pd_is_datetime_arraylike).all()
elif pd_is_datetime_arraylike is not None:
return pd_is_datetime_arraylike(arr)
elif isinstance(arr, pd.DatetimeIndex):
return True
else:
inferred = lib.infer_dtype(arr)
return "datetime" in inferred
class DateTimeFormat(object):
def __init__(self, fmtstr, coerce=True):
self.fmtstr = fmtstr
self.coerce = coerce
def __call__(self, value):
if isinstance(value, pd.Series):
return value.apply(self.__call__)
else:
if not hasattr(value, "strftime"):
if self.coerce:
value = pd.to_datetime(value)
if not hasattr(value, "strftime"):
raise ValueError(
"failed to coerce %s type=%s to datetime"
% (value, type(value))
)
else: #
raise ValueError(
"%s type(%s) has not method strftime" % (value, type(value))
)
return (value == value and value.strftime(self.fmtstr)) or str(value)
class NumberFormat(object):
def __init__(
self,
precision=2,
commas=True,
parens=True,
suffix=None,
kind="f",
coerce=True,
transform=None,
nan="nan",
prefix=None,
lpad_zero=1,
do_raise=0,
trunc_dot_zeros=0,
):
"""
Parameters
----------
precision : int, defaults to 2
Number of decimals places to show
commas : bool, default to True
If true then show commas, else do not
parens : bool, default to True
If True then use parenthesis for showing negative numbers
suffix:
kind:
coerce:
transform:
nan:
prefix:
lpad_zero:
do_raise:
trunc_dot_zeros: bool, default to false
if True and precision is greater than 0, a number such as 3.0 will be returned as just 3
"""
self.transform = transform
self.coerce = coerce
# build format string
self.precision = precision
self.commas = commas
self.parens = parens
self.suffix = suffix or ""
self.prefix = prefix or ""
self.kind = kind
self.nan = nan
self.lpad_zero = lpad_zero
self.do_raise = do_raise
self.trunc_dot_zeros = trunc_dot_zeros
def __call__(self, value, **kwargs):
# apply any overrides
for k, v in kwargs.items():
if hasattr(self, k):
setattr(self, k, v)
self_with_args = partial(self.__call__, **kwargs)
if isinstance(value, pd.Series):
return value.apply(self_with_args)
elif isinstance(value, pd.DataFrame):
return value.applymap(self_with_args)
elif isinstance(value, (list, tuple)):
return list(map(self_with_args, value))
elif isinstance(value, np.ndarray):
if value.ndim == 2:
return self_with_args(pd.DataFrame(value)).values
elif value.ndim == 1:
return self_with_args(pd.Series(value)).values
elif not issubclass(type(value), (float, int)):
if not self.coerce:
raise ValueError(
"NumberFormat expected number type not %s" % (type(value))
)
else:
if self.coerce and not issubclass(type(value), (float, int)):
try:
value = float(value)
except ValueError:
if self.do_raise:
raise
else:
# return the value without doing anything
return value
if np.isnan(value):
return self.nan
# apply transform
value = value if self.transform is None else self.transform(value)
# Build format string
fmt = (
"{:"
+ (self.lpad_zero and "0" or "")
+ (self.commas and "," or "")
+ "."
+ str(self.precision)
+ self.kind
+ "}"
)
txt = fmt.format(value)
if self.precision > 0 and self.trunc_dot_zeros:
txt = txt.replace("." + "0" * self.precision, "")
if self.parens:
isneg = txt[0] == "-"
lp, rp = isneg and ("(", ")") or ("", "")
txt = isneg and txt[1:] or txt
return "{prefix}{lp}{txt}{suffix}{rp}".format(
prefix=self.prefix, txt=txt, suffix=self.suffix, lp=lp, rp=rp
)
else:
return "{prefix}{txt}{suffix}".format(
prefix=self.prefix, txt=txt, suffix=self.suffix
)
def new_int_formatter(
commas=True,
parens=True,
prefix=None,
suffix=None,
coerce=True,
nan="nan",
trunc_dot_zeros=0,
):
precision = 0
return NumberFormat(**locals())
def new_float_formatter(
precision=2,
commas=True,
parens=True,
prefix=None,
suffix=None,
coerce=True,
nan="nan",
trunc_dot_zeros=0,
):
return NumberFormat(**locals())
def new_thousands_formatter(
precision=1,
commas=True,
parens=True,
nan="nan",
prefix=None,
trunc_dot_zeros=0,
suffix="k",
):
transform = lambda v: v * 1e-3
return NumberFormat(**locals())
def new_millions_formatter(
precision=1,
commas=True,
parens=True,
nan="nan",
prefix=None,
trunc_dot_zeros=0,
suffix="M",
):
transform = lambda v: v * 1e-6
return NumberFormat(**locals())
def new_billions_formatter(
precision=1,
commas=True,
parens=True,
nan="nan",
prefix=None,
trunc_dot_zeros=0,
suffix="B",
):
transform = lambda v: v * 1e-9
return NumberFormat(**locals())
def new_trillions_formatter(
precision=1, commas=True, parens=True, nan="nan", prefix=None, trunc_dot_zeros=0
):
transform = lambda v: v * 1e-12
suffix = "T"
return NumberFormat(**locals())
def new_percent_formatter(
precision=2,
commas=True,
parens=True,
prefix=None,
suffix=None,
coerce=True,
transform=lambda v: v,
nan="nan",
trunc_dot_zeros=0,
):
kind = "%"
return NumberFormat(**locals())
def new_datetime_formatter(fmtstr="%d-%b-%y", coerce=True):
return DateTimeFormat(**locals())
def guess_formatter(
values,
precision=1,
commas=True,
parens=True,
nan="nan",
prefix=None,
pcts=0,
trunc_dot_zeros=0,
):
"""Based on the values, return the most suitable formatter
Parameters
----------
values : Series, DataFrame, scalar, list, tuple, or ndarray
Values used to determine which formatter is the best fit
"""
formatter_args = dict(
precision=precision,
commas=commas,
parens=parens,
nan=nan,
prefix=prefix,
trunc_dot_zeros=trunc_dot_zeros,
)
try:
if isinstance(values, pd.datetime) and values.hour == 0 and values.minute == 0:
return new_datetime_formatter()
elif is_datetime_arraylike(values):
# basic date formatter if no hours or minutes
if hasattr(values, "dt"):
if (values.dt.hour == 0).all() and (values.dt.minute == 0).all():
return new_datetime_formatter()
elif isinstance(values, pd.Series):
if (
values.dropna().apply(lambda d: d.hour == 0).all()
and values.apply(lambda d: d.minute == 0).all()
):
return new_datetime_formatter()
elif isinstance(values, pd.DataFrame):
if (
values.dropna()
.applymap(lambda d: d != d or (d.hour == 0 and d.minute == 0))
.all()
.all()
):
return new_datetime_formatter()
elif isinstance(values, pd.Series):
aval = values.abs()
vmax, vmin = aval.max(), aval.min()
elif isinstance(values, np.ndarray):
if values.ndim == 2:
avalues = pd.DataFrame(values).abs()
vmax = avalues.max().max()
vmin = avalues.min().min()
elif values.ndim == 1:
aval = pd.Series(values).abs()
vmax, vmin = aval.max(), aval.min()
else:
raise ValueError("cannot accept frame with more than 2-dimensions")
elif isinstance(values, pd.DataFrame):
avalues = values.abs()
vmax = avalues.max().max()
vmin = avalues.min().min()
elif isinstance(values, (list, tuple)):
vmax = max(values)
vmin = min(values)
else:
vmax = vmin = abs(values)
if np.isnan(vmin):
return new_float_formatter(**formatter_args)
else:
min_digits = 0 if vmin == 0 else math.floor(math.log10(vmin))
# max_digits = math.floor(math.log10(vmax))
if min_digits >= 12:
return new_trillions_formatter(**formatter_args)
elif min_digits >= 9:
return new_billions_formatter(**formatter_args)
elif min_digits >= 6:
return new_millions_formatter(**formatter_args)
elif min_digits >= 3:
return new_thousands_formatter(**formatter_args)
elif pcts and min_digits < 0 and vmax < 1:
return new_percent_formatter(**formatter_args)
else:
if isinstance(vmax, int):
formatter_args.pop("precision")
return new_int_formatter(**formatter_args)
else:
return new_float_formatter(**formatter_args)
except:
# import sys
# e = sys.exc_info()[0]
return lambda x: x
class DynamicNumberFormat(object):
def __init__(self, method=None, **formatter_args):
"""
:param method: None, cell, col
:param formatter_args:
:return:
"""
if method and method not in ("cell", "col", "row"):
raise ValueError("method must be None, cell, row, or col")
self.formatter_args = formatter_args
self.method = method
def __call__(self, value, **kwargs):
for k in list(kwargs.keys()):
if hasattr(self, k):
setattr(self, k, kwargs[k])
kwargs.pop(k)
method = self.method
self_with_args = partial(self.__call__, **kwargs)
if method is not None and isinstance(value, pd.DataFrame):
if method == "cell":
return value.applymap(self_with_args)
elif method == "row":
return value.T.apply(self_with_args).T
else:
return value.apply(self_with_args)
elif method == "cell" and isinstance(value, pd.Series):
return value.apply(self_with_args)
else:
return guess_formatter(value, **self.formatter_args)(value, **kwargs)
def new_dynamic_formatter(
method=None,
precision=1,
commas=True,
parens=True,
nan="nan",
prefix=None,
pcts=0,
trunc_dot_zeros=0,
):
return DynamicNumberFormat(**locals())
# Common Formats
IntFormatter = new_int_formatter()
FloatFormatter = new_float_formatter()
PercentFormatter = new_percent_formatter()
ThousandsFormatter = new_thousands_formatter()
MillionsFormatter = new_millions_formatter()
BillionsFormatter = new_billions_formatter()
TrillionsFormatter = new_trillions_formatter()
DollarCentsFormatter = new_float_formatter(prefix="$")
DollarFormatter = new_int_formatter(prefix="$")
ThousandDollarsFormatter = new_thousands_formatter(prefix="$")
MillionDollarsFormatter = new_millions_formatter(prefix="$")
BillionDollarsFormatter = new_billions_formatter(prefix="$")
TrillionDollarsFormatter = new_trillions_formatter(prefix="$")
YmdFormatter = new_datetime_formatter("%Y%m%d", True)
Y_m_dFormatter = new_datetime_formatter("%Y_%m_%d", True)
DynamicNumberFormatter = DynamicNumberFormat(method="col", pcts=1, trunc_dot_zeros=1)
DynamicRowFormatter = DynamicNumberFormat(method="row", pcts=1, trunc_dot_zeros=1)
DynamicColumnFormatter = DynamicNumberFormat(method="col", pcts=1, trunc_dot_zeros=1)
DynamicCellFormatter = DynamicNumberFormat(method="cell", pcts=1, trunc_dot_zeros=1)