# -*- coding: utf-8 -*-
"""
Internal module for formatting output data in csv, html,
and latex files. This module also applies to display formatting.
"""
from __future__ import print_function
from functools import partial
import numpy as np
from pandas._libs import lib
from pandas._libs.tslib import format_array_from_datetime
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
from pandas.compat import StringIO, lzip, map, u, zip
from pandas.core.dtypes.common import (
is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
is_extension_array_dtype, is_float, is_float_dtype, is_integer,
is_integer_dtype, is_list_like, is_numeric_dtype, is_scalar,
is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCIndexClass, ABCMultiIndex, ABCSeries, ABCSparseArray)
from pandas.core.dtypes.missing import isna, notna
from pandas import compat
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.config import get_option, set_option
from pandas.core.index import Index, ensure_index
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.io.common import _expand_user, _stringify_path
from pandas.io.formats.printing import adjoin, justify, pprint_thing
from pandas.io.formats.terminal import get_terminal_size
# pylint: disable=W0141
common_docstring = """
Parameters
----------
buf : StringIO-like, optional
Buffer to write to.
columns : sequence, optional, default None
The subset of columns to write. Writes all columns by default.
col_space : int, optional
The minimum width of each column.
header : bool, optional
%(header)s.
index : bool, optional, default True
Whether to print index (row) labels.
na_rep : str, optional, default 'NaN'
String representation of NAN to use.
formatters : list or dict of one-param. functions, optional
Formatter functions to apply to columns' elements by position or
name.
The result of each function must be a unicode string.
List must be of length equal to the number of columns.
float_format : one-parameter function, optional, default None
Formatter function to apply to columns' elements if they are
floats. The result of this function must be a unicode string.
sparsify : bool, optional, default True
Set to False for a DataFrame with a hierarchical index to print
every multiindex key at each row.
index_names : bool, optional, default True
Prints the names of the indexes.
justify : str, default None
How to justify the column labels. If None uses the option from
the print configuration (controlled by set_option), 'right' out
of the box. Valid values are
* left
* right
* center
* justify
* justify-all
* start
* end
* inherit
* match-parent
* initial
* unset.
max_rows : int, optional
Maximum number of rows to display in the console.
max_cols : int, optional
Maximum number of columns to display in the console.
show_dimensions : bool, default False
Display DataFrame dimensions (number of rows by number of columns).
decimal : str, default '.'
Character recognized as decimal separator, e.g. ',' in Europe.
.. versionadded:: 0.18.0
"""
_VALID_JUSTIFY_PARAMETERS = ("left", "right", "center", "justify",
"justify-all", "start", "end", "inherit",
"match-parent", "initial", "unset")
return_docstring = """
Returns
-------
str (or unicode, depending on data and options)
String representation of the dataframe.
"""
class CategoricalFormatter(object):
def __init__(self, categorical, buf=None, length=True, na_rep='NaN',
footer=True):
self.categorical = categorical
self.buf = buf if buf is not None else StringIO(u(""))
self.na_rep = na_rep
self.length = length
self.footer = footer
def _get_footer(self):
footer = ''
if self.length:
if footer:
footer += ', '
footer += "Length: {length}".format(length=len(self.categorical))
level_info = self.categorical._repr_categories_info()
# Levels are added in a newline
if footer:
footer += '\n'
footer += level_info
return compat.text_type(footer)
def _get_formatted_values(self):
return format_array(self.categorical.get_values(), None,
float_format=None, na_rep=self.na_rep)
def to_string(self):
categorical = self.categorical
if len(categorical) == 0:
if self.footer:
return self._get_footer()
else:
return u('')
fmt_values = self._get_formatted_values()
result = [u('{i}').format(i=i) for i in fmt_values]
result = [i.strip() for i in result]
result = u(', ').join(result)
result = [u('[') + result + u(']')]
if self.footer:
footer = self._get_footer()
if footer:
result.append(footer)
return compat.text_type(u('\n').join(result))
class SeriesFormatter(object):
def __init__(self, series, buf=None, length=True, header=True, index=True,
na_rep='NaN', name=False, float_format=None, dtype=True,
max_rows=None):
self.series = series
self.buf = buf if buf is not None else StringIO()
self.name = name
self.na_rep = na_rep
self.header = header
self.length = length
self.index = index
self.max_rows = max_rows
if float_format is None:
float_format = get_option("display.float_format")
self.float_format = float_format
self.dtype = dtype
self.adj = _get_adjustment()
self._chk_truncate()
def _chk_truncate(self):
from pandas.core.reshape.concat import concat
max_rows = self.max_rows
truncate_v = max_rows and (len(self.series) > max_rows)
series = self.series
if truncate_v:
if max_rows == 1:
row_num = max_rows
series = series.iloc[:max_rows]
else:
row_num = max_rows // 2
series = concat((series.iloc[:row_num],
series.iloc[-row_num:]))
self.tr_row_num = row_num
self.tr_series = series
self.truncate_v = truncate_v
def _get_footer(self):
name = self.series.name
footer = u('')
if getattr(self.series.index, 'freq', None) is not None:
footer += 'Freq: {freq}'.format(freq=self.series.index.freqstr)
if self.name is not False and name is not None:
if footer:
footer += ', '
series_name = pprint_thing(name,
escape_chars=('\t', '\r', '\n'))
footer += ((u"Name: {sname}".format(sname=series_name))
if name is not None else "")
if (self.length is True or
(self.length == 'truncate' and self.truncate_v)):
if footer:
footer += ', '
footer += 'Length: {length}'.format(length=len(self.series))
if self.dtype is not False and self.dtype is not None:
name = getattr(self.tr_series.dtype, 'name', None)
if name:
if footer:
footer += ', '
footer += u'dtype: {typ}'.format(typ=pprint_thing(name))
# level infos are added to the end and in a new line, like it is done
# for Categoricals
if is_categorical_dtype(self.tr_series.dtype):
level_info = self.tr_series._values._repr_categories_info()
if footer:
footer += "\n"
footer += level_info
return compat.text_type(footer)
def _get_formatted_index(self):
index = self.tr_series.index
is_multi = isinstance(index, ABCMultiIndex)
if is_multi:
have_header = any(name for name in index.names)
fmt_index = index.format(names=True)
else:
have_header = index.name is not None
fmt_index = index.format(name=True)
return fmt_index, have_header
def _get_formatted_values(self):
values_to_format = self.tr_series._formatting_values()
return format_array(values_to_format, None,
float_format=self.float_format, na_rep=self.na_rep)
def to_string(self):
series = self.tr_series
footer = self._get_footer()
if len(series) == 0:
return 'Series([], ' + footer + ')'
fmt_index, have_header = self._get_formatted_index()
fmt_values = self._get_formatted_values()
if self.truncate_v:
n_header_rows = 0
row_num = self.tr_row_num
width = self.adj.len(fmt_values[row_num - 1])
if width > 3:
dot_str = '...'
else:
dot_str = '..'
# Series uses mode=center because it has single value columns
# DataFrame uses mode=left
dot_str = self.adj.justify([dot_str], width, mode='center')[0]
fmt_values.insert(row_num + n_header_rows, dot_str)
fmt_index.insert(row_num + 1, '')
if self.index:
result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
else:
result = self.adj.adjoin(3, fmt_values)
if self.header and have_header:
result = fmt_index[0] + '\n' + result
if footer:
result += '\n' + footer
return compat.text_type(u('').join(result))
class TextAdjustment(object):
def __init__(self):
self.encoding = get_option("display.encoding")
def len(self, text):
return compat.strlen(text, encoding=self.encoding)
def justify(self, texts, max_len, mode='right'):
return justify(texts, max_len, mode=mode)
def adjoin(self, space, *lists, **kwargs):
return adjoin(space, *lists, strlen=self.len,
justfunc=self.justify, **kwargs)
class EastAsianTextAdjustment(TextAdjustment):
def __init__(self):
super(EastAsianTextAdjustment, self).__init__()
if get_option("display.unicode.ambiguous_as_wide"):
self.ambiguous_width = 2
else:
self.ambiguous_width = 1
def len(self, text):
return compat.east_asian_len(text, encoding=self.encoding,
ambiguous_width=self.ambiguous_width)
def justify(self, texts, max_len, mode='right'):
# re-calculate padding space per str considering East Asian Width
def _get_pad(t):
return max_len - self.len(t) + len(t)
if mode == 'left':
return [x.ljust(_get_pad(x)) for x in texts]
elif mode == 'center':
return [x.center(_get_pad(x)) for x in texts]
else:
return [x.rjust(_get_pad(x)) for x in texts]
def _get_adjustment():
use_east_asian_width = get_option("display.unicode.east_asian_width")
if use_east_asian_width:
return EastAsianTextAdjustment()
else:
return TextAdjustment()
Loading ...