Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ io / formats / format.py

# -*- coding: utf-8 -*-
"""
Internal module for formatting output data in csv, html,
and latex files. This module also applies to display formatting.
"""

from __future__ import print_function

from functools import partial

import numpy as np

from pandas._libs import lib
from pandas._libs.tslib import format_array_from_datetime
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
from pandas.compat import StringIO, lzip, map, u, zip

from pandas.core.dtypes.common import (
    is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
    is_extension_array_dtype, is_float, is_float_dtype, is_integer,
    is_integer_dtype, is_list_like, is_numeric_dtype, is_scalar,
    is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
    ABCIndexClass, ABCMultiIndex, ABCSeries, ABCSparseArray)
from pandas.core.dtypes.missing import isna, notna

from pandas import compat
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.config import get_option, set_option
from pandas.core.index import Index, ensure_index
from pandas.core.indexes.datetimes import DatetimeIndex

from pandas.io.common import _expand_user, _stringify_path
from pandas.io.formats.printing import adjoin, justify, pprint_thing
from pandas.io.formats.terminal import get_terminal_size

# pylint: disable=W0141


common_docstring = """
        Parameters
        ----------
        buf : StringIO-like, optional
            Buffer to write to.
        columns : sequence, optional, default None
            The subset of columns to write. Writes all columns by default.
        col_space : int, optional
            The minimum width of each column.
        header : bool, optional
            %(header)s.
        index : bool, optional, default True
            Whether to print index (row) labels.
        na_rep : str, optional, default 'NaN'
            String representation of NAN to use.
        formatters : list or dict of one-param. functions, optional
            Formatter functions to apply to columns' elements by position or
            name.
            The result of each function must be a unicode string.
            List must be of length equal to the number of columns.
        float_format : one-parameter function, optional, default None
            Formatter function to apply to columns' elements if they are
            floats. The result of this function must be a unicode string.
        sparsify : bool, optional, default True
            Set to False for a DataFrame with a hierarchical index to print
            every multiindex key at each row.
        index_names : bool, optional, default True
            Prints the names of the indexes.
        justify : str, default None
            How to justify the column labels. If None uses the option from
            the print configuration (controlled by set_option), 'right' out
            of the box. Valid values are

            * left
            * right
            * center
            * justify
            * justify-all
            * start
            * end
            * inherit
            * match-parent
            * initial
            * unset.
        max_rows : int, optional
            Maximum number of rows to display in the console.
        max_cols : int, optional
            Maximum number of columns to display in the console.
        show_dimensions : bool, default False
            Display DataFrame dimensions (number of rows by number of columns).
        decimal : str, default '.'
            Character recognized as decimal separator, e.g. ',' in Europe.

            .. versionadded:: 0.18.0
    """

_VALID_JUSTIFY_PARAMETERS = ("left", "right", "center", "justify",
                             "justify-all", "start", "end", "inherit",
                             "match-parent", "initial", "unset")

return_docstring = """
        Returns
        -------
        str (or unicode, depending on data and options)
            String representation of the dataframe.
    """


class CategoricalFormatter(object):

    def __init__(self, categorical, buf=None, length=True, na_rep='NaN',
                 footer=True):
        self.categorical = categorical
        self.buf = buf if buf is not None else StringIO(u(""))
        self.na_rep = na_rep
        self.length = length
        self.footer = footer

    def _get_footer(self):
        footer = ''

        if self.length:
            if footer:
                footer += ', '
            footer += "Length: {length}".format(length=len(self.categorical))

        level_info = self.categorical._repr_categories_info()

        # Levels are added in a newline
        if footer:
            footer += '\n'
        footer += level_info

        return compat.text_type(footer)

    def _get_formatted_values(self):
        return format_array(self.categorical.get_values(), None,
                            float_format=None, na_rep=self.na_rep)

    def to_string(self):
        categorical = self.categorical

        if len(categorical) == 0:
            if self.footer:
                return self._get_footer()
            else:
                return u('')

        fmt_values = self._get_formatted_values()

        result = [u('{i}').format(i=i) for i in fmt_values]
        result = [i.strip() for i in result]
        result = u(', ').join(result)
        result = [u('[') + result + u(']')]
        if self.footer:
            footer = self._get_footer()
            if footer:
                result.append(footer)

        return compat.text_type(u('\n').join(result))


class SeriesFormatter(object):

    def __init__(self, series, buf=None, length=True, header=True, index=True,
                 na_rep='NaN', name=False, float_format=None, dtype=True,
                 max_rows=None):
        self.series = series
        self.buf = buf if buf is not None else StringIO()
        self.name = name
        self.na_rep = na_rep
        self.header = header
        self.length = length
        self.index = index
        self.max_rows = max_rows

        if float_format is None:
            float_format = get_option("display.float_format")
        self.float_format = float_format
        self.dtype = dtype
        self.adj = _get_adjustment()

        self._chk_truncate()

    def _chk_truncate(self):
        from pandas.core.reshape.concat import concat
        max_rows = self.max_rows
        truncate_v = max_rows and (len(self.series) > max_rows)
        series = self.series
        if truncate_v:
            if max_rows == 1:
                row_num = max_rows
                series = series.iloc[:max_rows]
            else:
                row_num = max_rows // 2
                series = concat((series.iloc[:row_num],
                                 series.iloc[-row_num:]))
            self.tr_row_num = row_num
        self.tr_series = series
        self.truncate_v = truncate_v

    def _get_footer(self):
        name = self.series.name
        footer = u('')

        if getattr(self.series.index, 'freq', None) is not None:
            footer += 'Freq: {freq}'.format(freq=self.series.index.freqstr)

        if self.name is not False and name is not None:
            if footer:
                footer += ', '

            series_name = pprint_thing(name,
                                       escape_chars=('\t', '\r', '\n'))
            footer += ((u"Name: {sname}".format(sname=series_name))
                       if name is not None else "")

        if (self.length is True or
                (self.length == 'truncate' and self.truncate_v)):
            if footer:
                footer += ', '
            footer += 'Length: {length}'.format(length=len(self.series))

        if self.dtype is not False and self.dtype is not None:
            name = getattr(self.tr_series.dtype, 'name', None)
            if name:
                if footer:
                    footer += ', '
                footer += u'dtype: {typ}'.format(typ=pprint_thing(name))

        # level infos are added to the end and in a new line, like it is done
        # for Categoricals
        if is_categorical_dtype(self.tr_series.dtype):
            level_info = self.tr_series._values._repr_categories_info()
            if footer:
                footer += "\n"
            footer += level_info

        return compat.text_type(footer)

    def _get_formatted_index(self):
        index = self.tr_series.index
        is_multi = isinstance(index, ABCMultiIndex)

        if is_multi:
            have_header = any(name for name in index.names)
            fmt_index = index.format(names=True)
        else:
            have_header = index.name is not None
            fmt_index = index.format(name=True)
        return fmt_index, have_header

    def _get_formatted_values(self):
        values_to_format = self.tr_series._formatting_values()
        return format_array(values_to_format, None,
                            float_format=self.float_format, na_rep=self.na_rep)

    def to_string(self):
        series = self.tr_series
        footer = self._get_footer()

        if len(series) == 0:
            return 'Series([], ' + footer + ')'

        fmt_index, have_header = self._get_formatted_index()
        fmt_values = self._get_formatted_values()

        if self.truncate_v:
            n_header_rows = 0
            row_num = self.tr_row_num
            width = self.adj.len(fmt_values[row_num - 1])
            if width > 3:
                dot_str = '...'
            else:
                dot_str = '..'
            # Series uses mode=center because it has single value columns
            # DataFrame uses mode=left
            dot_str = self.adj.justify([dot_str], width, mode='center')[0]
            fmt_values.insert(row_num + n_header_rows, dot_str)
            fmt_index.insert(row_num + 1, '')

        if self.index:
            result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
        else:
            result = self.adj.adjoin(3, fmt_values)

        if self.header and have_header:
            result = fmt_index[0] + '\n' + result

        if footer:
            result += '\n' + footer

        return compat.text_type(u('').join(result))


class TextAdjustment(object):

    def __init__(self):
        self.encoding = get_option("display.encoding")

    def len(self, text):
        return compat.strlen(text, encoding=self.encoding)

    def justify(self, texts, max_len, mode='right'):
        return justify(texts, max_len, mode=mode)

    def adjoin(self, space, *lists, **kwargs):
        return adjoin(space, *lists, strlen=self.len,
                      justfunc=self.justify, **kwargs)


class EastAsianTextAdjustment(TextAdjustment):

    def __init__(self):
        super(EastAsianTextAdjustment, self).__init__()
        if get_option("display.unicode.ambiguous_as_wide"):
            self.ambiguous_width = 2
        else:
            self.ambiguous_width = 1

    def len(self, text):
        return compat.east_asian_len(text, encoding=self.encoding,
                                     ambiguous_width=self.ambiguous_width)

    def justify(self, texts, max_len, mode='right'):
        # re-calculate padding space per str considering East Asian Width
        def _get_pad(t):
            return max_len - self.len(t) + len(t)

        if mode == 'left':
            return [x.ljust(_get_pad(x)) for x in texts]
        elif mode == 'center':
            return [x.center(_get_pad(x)) for x in texts]
        else:
            return [x.rjust(_get_pad(x)) for x in texts]


def _get_adjustment():
    use_east_asian_width = get_option("display.unicode.east_asian_width")
    if use_east_asian_width:
        return EastAsianTextAdjustment()
    else:
        return TextAdjustment()

Loading ...