Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
chaco / scales / formatters.py
Size: Mime:
"""
Classes for formatting labels for values or times.
"""

from math import ceil, floor, fmod, log10

import six
import six.moves as sm

from numpy import abs, all, array, asarray, amax, amin
from .safetime import strftime, time, safe_fromtimestamp, localtime
import warnings


__all__ = ['NullFormatter', 'BasicFormatter', 'IntegerFormatter',
           'OffsetFormatter', 'TimeFormatter', 'strftimeEx']

class NullFormatter(object):
    """ Formatter for empty labels.
    """
    def format(ticks, numlabels=None, char_width=None):
        """ Returns a list containing an empty label for each item in *ticks*.
        """
        return [""] * len(ticks)

    def estimate_width(start, end, numlabels=None, char_width=None):
        """ Returns 0 for width and 0 for number of labels.
        """
        return 0, 0


class BasicFormatter(object):
    """ Formatter for numeric labels.
    """
    # This is a class-level default that is related to the algorithm in format()
    avg_label_width = 7.0

    # Toggles whether or not to use scientific notation when the values exceed
    # scientific_limits
    use_scientific = True

    # Any number smaller than 10 ** limits[0] or larger than 10 ** limits[1]
    # will be represented using scientific notiation.
    scientific_limits = (-3, 5)

    def __init__(self, **kwds):
        # Allow the user to override the class-level defaults.
        self.__dict__.update(kwds)

    def oldformat(self, ticks, numlabels=None, char_width=None):
        """ This function is adapted from matplotlib's "OldScalarFormatter".

        Parameters
        ----------
        ticks : array of numbers
            The tick values to be formatted.
        numlabels
            Not used.
        char_width
            Not used.

        Returns
        -------
        List of formatted labels.
        """
        labels = []
        if len(ticks) == 0:
            return []

        d = abs(ticks[-1] - ticks[0])
        for x in ticks:
            if abs(x)<1e4 and x==int(x):
                labels.append('%d' % x)
                continue

            if d < 1e-2: fmt = '%1.3e'
            elif d < 1e-1: fmt = '%1.3f'
            elif d > 1e5: fmt = '%1.1e'
            elif d > 10 : fmt = '%1.1f'
            elif d > 1 : fmt = '%1.2f'
            else: fmt = '%1.3f'
            s =  fmt % x
            tup = s.split('e')
            if len(tup)==2:
                mantissa = tup[0].rstrip('0').rstrip('.')
                sign = tup[1][0].replace('+', '')
                exponent = tup[1][1:].lstrip('0')
                if sign or exponent:
                    s = '%se%s%s' %(mantissa, sign, exponent)
                else:
                    s = mantissa
            else:
                s = s.rstrip('0').rstrip('.')
            labels.append(s)
        return labels

    def format(self, ticks, numlabels=None, char_width=None, fill_ratio=0.3):
        """ Does "nice" formatting of floating-point numbers.  *numlabels* is
        ignored in this method.
        """
        if len(ticks) == 0:
            return []

        ticks = asarray(ticks)
        if self.use_scientific:
            scientific = (((ticks % 10 ** self.scientific_limits[1]) == 0) |
                          (abs(ticks) <= 10 ** self.scientific_limits[0])).all()
        else:
            scientific = False

        if scientific:
            if char_width is not None:
                # We need to determine how many digits we can use in the
                # mantissa based on the order of magnitude of the exponent.
                chars_per_label = int(char_width * fill_ratio / len(ticks))
                maxtick = amax(abs(ticks))
                if maxtick > 0:
                    exp_oom = str(int(floor(log10(maxtick))))
                else:
                    exp_oom = "0"
                emax = len(exp_oom)

                if chars_per_label < emax:
                    # We're sort of hosed.  Use a minimum 3 chars for the mantissa.
                    mmax = 3
                else:
                    mmax = chars_per_label - emax - 1
            else:
                mmax = -1
            labels = [self._nice_sci(x, mmax) for x in ticks]

        else:
            # For decimal mode,
            if not (ticks % 1).any():
                labels = list(sm.map(str, ticks.astype(int)))
            else:
                labels = list(sm.map(str, ticks))

        return labels

    def _nice_sci(self, val, mdigits, force_sign=False):
        """ Formats *val* nicely using scientific notation.  *mdigits* is the
        max number of digits to use for the mantissa.  If *force_sign* is True,
        then always show the sign of the mantissa, otherwise only show the sign
        if *val* is negative.
        """
        if val != 0:
            e = int(floor(log10(abs(val))))
        else:
            e = 0
        m = val / float(10**e)
        m_str = str(m)

        # Safely truncating the mantissa is somewhat tricky.  The minimum
        # length of the mantissa is everything up to (but not including) the
        # period.  If the m_str doesn't have a decimal point, then we have to
        # ignore mdigits.
        if mdigits > 0 and "." in m_str:
            max_len = max(m_str.index("."), mdigits)
            m_str = m_str[:max_len]

            # Strip off a trailing decimal
            if m_str[-1] == ".":
                m_str = m_str[:-1]

            # It's not sufficient just to truncate the string; we need to
            # handle proper rounding

        else:
            # Always strip off a trailing decimal
            if m_str[-1] == ".":
                m_str = m_str[:-1]

        if force_sign and not m_str.startswith("-"):
            m_str = "+" + m_str

        if e != 0:
            # Clean up the exponent
            e_str = str(e)

            if e_str.startswith("+") and not force_sign:
                e_str = e_str[1:]
            m_str += "e" + e_str

        return m_str


    def estimate_width(self, start, end, numlabels=None, char_width=None,
                       fill_ratio=0.3, ticker=None):
        """ Returns an estimate of the total number of characters used by the
        the labels for the given set of inputs, as well as the number of labels.

        Parameters
        ----------
        start : number
            The beginning of the interval.
        end : number
            The end of the interval.
        numlabels : number
            The ideal number of labels to generate on the interval.
        char_width : number
            The total character width available for labelling the interval.
        fill_ratio : 0.0 < float <= 1.0
            Ratio of the available width that will be occupied by label text.
        ticker : AbstractScale object
            Object that can calculate the number of labels needed.

        Returns
        -------
        (numlabels, total label width)
        """
        if numlabels == 0 or char_width == 0:
            return 0, 0

        # use the start and end points as ticks and average their label sizes
        labelsizes = sm.map(len, self.format([start, end]))
        avg_size = sum(labelsizes) / 2.0

        if ticker:
            if numlabels:
                initial_estimate = numlabels
            elif char_width:
                initial_estimate = round(fill_ratio * char_width / avg_size)

            est_ticks = ticker.num_ticks(start, end, initial_estimate)

        elif numlabels:
            est_ticks = numlabels

        elif char_width:
            est_ticks = round(fill_ratio * char_width / avg_size)

        return est_ticks, est_ticks * avg_size


class IntegerFormatter(BasicFormatter):
    """ Format integer tick labels as integers.
    """

    def format(self, ticks, numlabels=None, char_width=None, fill_ratio=0.3):
        """ Formats integer tick labels.
        """
        return list(sm.map(str, sm.map(int, ticks)))


class OffsetFormatter(BasicFormatter):
    """ This formatter is like BasicFormatter, but it supports formatting
    ticks using an offset.  This is useful for viewing small ranges within
    big numbers.
    """

    # Whether or not to use offsets when labelling the ticks.  Note that
    # even if this is true, offset are only used when the ratio of the data
    # range to the average data value is smaller than a threshold.
    use_offset = False

    # The threshold ratio of the data range to the average data value, below
    # which "offset" display mode will be used if use_offset is True.
    offset_threshold = 1e-3

    # Determines which ticks to display the offset value at.  Can be "all",
    # "firstlast", or "none".
    offset_display = "firstlast"

    # Determines which format to use to display the end labels.  Can be
    # "offset" or "sci".
    end_label_format = "offset"

    # Specifies the threshold values
    offset_limits = (-3, 4)

    # There are two possible formats for the offset.
    #
    # "sci"
    #     uses scientific notation for the offset
    # "decimal"
    #     pads with zeroes left or right until the decimal
    #
    # The following table shows some example ranges and how an intermediate
    # tick will be displayed.  These all assume an offset_display value of
    # "none" or "firstlast".
    #
    #  ============     ==========       =========      =========
    #     start            end             sci          decimal
    #  ============     ==========       =========      =========
    #    90.0004         90.0008         5.0e-4          .0005
    #    90.0004         90.0015         1.2e-3          .0012
    #   -1200015        -1200003           12              12
    #    2300015000     2300015030       1.502e4         15020
    #  ============     ==========       =========      =========
    #
    offset_format = "sci"

    # The offset generated by the last call to format()
    offset = None


    def _compute_offset(self, ticks):
        first, last = ticks[0], ticks[-1]
        data_range = ticks[-1] - ticks[0]
        range_oom = int(ceil(log10(data_range)))
        pow_of_ten = 10 ** range_oom
        if all(asarray(ticks) < 0):
            return ceil(amax(ticks) / pow_of_ten) * pow_of_ten
        else:
            return floor(amin(ticks) / pow_of_ten) * pow_of_ten


    def format(self, ticks, numlabels=None, char_width=None):
        if len(ticks) == 0:
            return []

        data_range = ticks[-1] - ticks[0]
        avg_data = sum(abs(ticks)) / len(ticks)
        if self.use_offset and data_range/avg_data < self.offset_threshold:
            offset = self._compute_offset(ticks)
            intermed_ticks = asarray(ticks) - offset

            if self.offset_format == "sci":
                labels = BasicFormatter.format(self, intermed_ticks)
            else:
                # have to decide between %d and %f here.  also have to
                # strip trailing "0"s.. test with %g.
                labels = ["%g" % i for i in intermed_ticks]

            if offset > 0:
                sign = "+"
            else:
                sign = ""
            offset_str = BasicFormatter.format(self, [offset])[0] + sign
            if self.offset_display == "firstlast":
                if self.end_label_format == "offset":
                    labels[0] = offset_str + labels[0]
                    labels[-1] = offset_str + labels[-1]
                else:
                    labels[0] = BasicFormatter.format(self, [ticks[0]])[0]
                    labels[-1] = BasicFormatter.format(self, [ticks[-1]])[0]

            elif self.offset_display == "all":
                labels = [offset_str + label for label in labels]

            return labels
        else:
            return BasicFormatter.format(self, ticks, numlabels, char_width)

    def estimate_width(self, start, end, numlabels=None, char_width=None,
                       fill_ratio=0.3, ticker=None):
        if numlabels == 0 or char_width == 0:
            return (0, 0)

        if ticker:
            if numlabels:
                initial_estimate = numlabels
            elif char_width:
                avg_size = len("%g%g" % (start, end)) / 2.0
                initial_estimate = round(fill_ratio * char_width / avg_size)
            else:
                raise ValueError(
                    "num_labels and char_width should not both be None."
                )
            est_ticks = int(ticker.num_ticks(start, end, initial_estimate))

        elif numlabels:
            est_ticks = numlabels

        # FIXME BUG HERE
        elif char_width:
            est_ticks = round(fill_ratio * char_width / avg_size)

        start, mid, end = sm.map(len, self.format([start, (start+end)/2.0, end]))
        if est_ticks > 2:
            size = start + end + (est_ticks-2) * mid
        else:
            size = start + end

        return est_ticks, size


def strftimeEx(fmt, t, timetuple=None):
    """
    Extends time.strftime() to format milliseconds and microseconds.

    Expects input to be a floating-point number of seconds since epoch.
    The additional formats are:

    - ``%(ms)``:  milliseconds (uses round())
    - ``%(ms_)``: milliseconds (uses floor())
    - ``%(us)``:  microseconds (uses round())

    The format may also be a callable which will bypass time.strftime() entirely.
    """
    if callable(fmt):
        return fmt(t)

    if "%(ms)" in fmt:
        # Assume that fmt does not also contain %(ms_) and %(us).
        # (It really doesn't make sense to mix %(ms) with those.)
        secs, frac = divmod(round(t,3), 1)
        ms = int(round(1e3*frac))
        fmt = fmt.replace("%(ms)", "%03d" % ms)
    else:
        # Assume fmt contains %(ms_) and %(us).
        secs, frac = divmod(round(t,6), 1)
        ms = int(round(1e3*frac))
        ms_, us = divmod(int(round(1e6*frac)),1000)
        fmt = fmt.replace("%(ms_)", "%03d" % ms_)
        fmt = fmt.replace("%(us)", "%03d" % us)

    if not timetuple:
        timetuple = localtime(secs)

    return strftime(fmt, timetuple)


def _two_digit_year(t):
    """ Round to the nearest Jan 1, roughly.
    """
    dt = safe_fromtimestamp(t)
    year = dt.year
    if dt.month >= 7:
        year += 1
    return "'%02d" % (year % 100)

def _four_digit_year(t):
    """ Round to the nearest Jan 1, roughly.
    """
    dt = safe_fromtimestamp(t)
    year = dt.year
    if dt.month >= 7:
        year += 1
    return str(year)


class TimeFormatter(object):
    """ Formatter for time values.
    """
    # This table of format is convert into the 'formats' dict.  Each tuple of
    # formats must be ordered from shortest to longest.
    _formats = {
        'microseconds': ('%(us)us', '%(ms_).%(us)ms'),
        'milliseconds': ('%(ms)ms', '%S.%(ms)s'),
        'seconds': (':%S', '%Ss'),
        'minsec': ('%M:%S',), # '%Mm%S', '%Mm%Ss'),
        'minutes': ('%Mm',),
        'hourmin': ('%H:%M',), #'%Hh%M', '%Hh%Mm', '%H:%M:%S','%Hh %Mm %Ss'),
        'hours': ('%Hh', '%H:%M'),
        'days': ('%m/%d', '%a%d',),
        'months': ('%m/%Y', '%b%y'),
        'years': (_two_digit_year, _four_digit_year),
        }

    # Labels of time units, from finest to coarsest.
    format_order = ['microseconds', 'milliseconds', 'seconds', 'minsec', 'minutes',
                    'hourmin', 'hours', 'days', 'months', 'years']

    # A dict whose are keys are the strings in **format_order**; each value is
    # two arrays, (widths, format strings/functions).
    formats = {}

    # Whether or not to strip the leading zeros on tick labels.
    strip_leading_zeros = True

    def __init__(self, **kwds):
        self.__dict__.update(kwds)
        self._compute_format_weights()

    def _compute_format_weights(self):
        if self.formats:
            return

        for fmt_name, fmt_strings in self._formats.items():
            sizes = []
            tmptime = time()
            for s in fmt_strings:
                size = len(strftimeEx(s, tmptime))
                sizes.append(size)
            self.formats[fmt_name] = (array(sizes), fmt_strings)
        return

    def _get_resolution(self, resolution, interval):
        r = resolution
        span = interval
        if r < 5e-4:
            resol = "microseconds"
        elif r < 0.5:
            resol = "milliseconds"
        elif r < 60:
            if span > 60:
                resol = "minsec"
            else:
                resol = "seconds"
        elif r < 3600:
            if span > 3600:
                resol = "hourmin"
            else:
                resol = "minutes"
        elif r < 24*3600:
            resol = "hours"
        elif r < 30*24*3600:
            resol = "days"
        elif r < 365*24*3600:
            resol = "months"
        else:
            resol = "years"
        return resol

    def format(self, ticks, numlabels=None, char_width=None, fill_ratio = 0.3,
               ticker=None):
        """ Formats a set of time values.

        Parameters
        ----------
        ticks : array of numbers
            The tick values to be formatted
        numlabels
            Not used.
        char_width : number
            The total character width available for labelling the interval.
        fill_ratio : 0.0 < float <= 1.0
            Ratio of the available width that will be occupied by label text.
        ticker : AbstractScale object
            Object that can calculate the number of labels needed.

        Returns
        -------
        List of formatted labels.

        """
        # In order to pick the right set of labels, we need to determine
        # the resolution of the ticks.  We can do this using a ticker if
        # it's provided, or by computing the resolution from the actual
        # ticks we've been given.
        if len(ticks) == 0:
            return []

        span = abs(ticks[-1] - ticks[0])
        if ticker:
            r = ticker.resolution
        else:
            r = span / (len(ticks) - 1)
        resol = self._get_resolution(r, span)

        widths, formats = self.formats[resol]
        format = formats[0]
        if char_width:
            # If a width is provided, then we pick the most appropriate scale,
            # otherwise just use the widest format
            good_formats = array(formats)[widths * len(ticks) < fill_ratio * char_width]
            if len(good_formats) > 0:
                format = good_formats[-1]

        # Apply the format to the tick values
        labels = []
        resol_ndx = self.format_order.index(resol)

        # This dictionary maps the name of a time resolution (in self.format_order)
        # to its index in a time.localtime() timetuple.  The default is to map
        # everything to index 0, which is year.  This is not ideal; it might cause
        # a problem with the tick at midnight, january 1st, 0 a.d. being incorrectly
        # promoted at certain tick resolutions.
        time_tuple_ndx_for_resol = dict.fromkeys(self.format_order, 0)
        time_tuple_ndx_for_resol.update( {
                "seconds" : 5,
                "minsec" : 4,
                "minutes" : 4,
                "hourmin" : 3,
                "hours" : 3,
                })

        # As we format each tick, check to see if we are at a boundary of the
        # next higher unit of time.  If so, replace the current format with one
        # from that resolution.  This is not the best heuristic in the world,
        # but it works!  There is some trickiness here due to having to deal
        # with hybrid formats in a reasonable manner.
        for t in ticks:
            try:
                tm = localtime(t)
                s = strftimeEx(format, t, tm)
            except ValueError as e:
                warnings.warn("Unable to convert tick for timestamp " + str(t))
                labels.append("ERR")
                continue

            hybrid_handled = False
            next_ndx = resol_ndx

            # The way to check that we are at the boundary of the next unit of
            # time is by checking that we have 0 units of the resolution, i.e.
            # we are at zero minutes, so display hours, or we are at zero seconds,
            # so display minutes (and if that is zero as well, then display hours).
            while tm[ time_tuple_ndx_for_resol[self.format_order[next_ndx]] ] == 0:
                next_ndx += 1
                if next_ndx == len(self.format_order):
                    break
                if resol in ("minsec", "hourmin") and not hybrid_handled:
                    if (resol == "minsec" and tm.tm_min == 0 and tm.tm_sec != 0) or \
                        (resol == "hourmin" and tm.tm_hour == 0 and tm.tm_min != 0):
                        next_format = self.formats[self.format_order[resol_ndx-1]][1][0]
                        s = strftimeEx(next_format, t, tm)
                        break
                    else:
                        hybrid_handled = True

                next_format = self.formats[self.format_order[next_ndx]][1][0]
                s = strftimeEx(next_format, t, tm)

            if self.strip_leading_zeros:
                ss = s.lstrip('0')
                if ss != s and (ss == '' or not ss[0].isdigit()):
                    # A label such as '000ms' should leave one zero.
                    ss = '0' + ss
                labels.append(ss)
            else:
                labels.append(s)

        return labels

    def estimate_width(self, start, end, numlabels=None, char_width=None,
                       fill_ratio = 0.2, ticker=None):
        """ Returns an estimate of the total number of characters used by the
        the labels for the given set of inputs, as well as the number of labels.

        Parameters
        ----------
        start : number
            The beginning of the interval.
        end : number
            The end of the interval.
        numlabels : number
            The ideal number of labels to generate on the interval.
        char_width : number
            The total character width available for labelling the interval.
        fill_ratio : 0.0 < float <= 1.0
            Ratio of the available width that will be occupied by label text.
        ticker : AbstractScale object
            Object that can calculate the number of labels needed.

        Returns
        -------
        (numlabels, total label width)
        """
        if numlabels == 0 or char_width == 0:
            return 0, 0

        if ticker is None or not hasattr(ticker, "unit"):
            raise ValueError("TimeFormatter requires a scale.")

        if not numlabels:
            numlabels = ticker.num_ticks(start, end)

        span = abs(end - start)
        if ticker:
            r = ticker.resolution
        else:
            r = span / numlabels
        unit = self._get_resolution(r, span)

        if unit == "milliseconds":
            return numlabels, numlabels * 6

        widths, strings = self.formats[unit]

        if char_width:
            # Find an appropriate resolution in self.formats and pick between
            # the various format strings
            good_widths = widths[widths * numlabels < fill_ratio * char_width]
            if len(good_widths) == 0:
                # All too big, pick the first label
                width = widths[0]
            else:
                # Pick the largest label that fits
                width = good_widths[-1]
            width *= numlabels
        else:
            # Just pick the middle of the pack of format widths
            width = widths[ int(len(widths) / 2) ] * numlabels

        return numlabels, width