Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / numpy   python

Repository URL to install this package:

/ lib / tests / test_histograms.py

from __future__ import division, absolute_import, print_function

import numpy as np

from numpy.lib.histograms import histogram, histogramdd, histogram_bin_edges
from numpy.testing import (
    assert_, assert_equal, assert_array_equal, assert_almost_equal,
    assert_array_almost_equal, assert_raises, assert_allclose,
    assert_array_max_ulp, assert_raises_regex, suppress_warnings,
    )


class TestHistogram(object):

    def setup(self):
        pass

    def teardown(self):
        pass

    def test_simple(self):
        n = 100
        v = np.random.rand(n)
        (a, b) = histogram(v)
        # check if the sum of the bins equals the number of samples
        assert_equal(np.sum(a, axis=0), n)
        # check that the bin counts are evenly spaced when the data is from
        # a linear function
        (a, b) = histogram(np.linspace(0, 10, 100))
        assert_array_equal(a, 10)

    def test_one_bin(self):
        # Ticket 632
        hist, edges = histogram([1, 2, 3, 4], [1, 2])
        assert_array_equal(hist, [2, ])
        assert_array_equal(edges, [1, 2])
        assert_raises(ValueError, histogram, [1, 2], bins=0)
        h, e = histogram([1, 2], bins=1)
        assert_equal(h, np.array([2]))
        assert_allclose(e, np.array([1., 2.]))

    def test_normed(self):
        sup = suppress_warnings()
        with sup:
            rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*')
            # Check that the integral of the density equals 1.
            n = 100
            v = np.random.rand(n)
            a, b = histogram(v, normed=True)
            area = np.sum(a * np.diff(b))
            assert_almost_equal(area, 1)
            assert_equal(len(rec), 1)

        sup = suppress_warnings()
        with sup:
            rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*')
            # Check with non-constant bin widths (buggy but backwards
            # compatible)
            v = np.arange(10)
            bins = [0, 1, 5, 9, 10]
            a, b = histogram(v, bins, normed=True)
            area = np.sum(a * np.diff(b))
            assert_almost_equal(area, 1)
            assert_equal(len(rec), 1)

    def test_density(self):
        # Check that the integral of the density equals 1.
        n = 100
        v = np.random.rand(n)
        a, b = histogram(v, density=True)
        area = np.sum(a * np.diff(b))
        assert_almost_equal(area, 1)

        # Check with non-constant bin widths
        v = np.arange(10)
        bins = [0, 1, 3, 6, 10]
        a, b = histogram(v, bins, density=True)
        assert_array_equal(a, .1)
        assert_equal(np.sum(a * np.diff(b)), 1)

        # Test that passing False works too
        a, b = histogram(v, bins, density=False)
        assert_array_equal(a, [1, 2, 3, 4])

        # Variale bin widths are especially useful to deal with
        # infinities.
        v = np.arange(10)
        bins = [0, 1, 3, 6, np.inf]
        a, b = histogram(v, bins, density=True)
        assert_array_equal(a, [.1, .1, .1, 0.])

        # Taken from a bug report from N. Becker on the numpy-discussion
        # mailing list Aug. 6, 2010.
        counts, dmy = np.histogram(
            [1, 2, 3, 4], [0.5, 1.5, np.inf], density=True)
        assert_equal(counts, [.25, 0])

    def test_outliers(self):
        # Check that outliers are not tallied
        a = np.arange(10) + .5

        # Lower outliers
        h, b = histogram(a, range=[0, 9])
        assert_equal(h.sum(), 9)

        # Upper outliers
        h, b = histogram(a, range=[1, 10])
        assert_equal(h.sum(), 9)

        # Normalization
        h, b = histogram(a, range=[1, 9], density=True)
        assert_almost_equal((h * np.diff(b)).sum(), 1, decimal=15)

        # Weights
        w = np.arange(10) + .5
        h, b = histogram(a, range=[1, 9], weights=w, density=True)
        assert_equal((h * np.diff(b)).sum(), 1)

        h, b = histogram(a, bins=8, range=[1, 9], weights=w)
        assert_equal(h, w[1:-1])

    def test_arr_weights_mismatch(self):
        a = np.arange(10) + .5
        w = np.arange(11) + .5
        with assert_raises_regex(ValueError, "same shape as"):
            h, b = histogram(a, range=[1, 9], weights=w, density=True)


    def test_type(self):
        # Check the type of the returned histogram
        a = np.arange(10) + .5
        h, b = histogram(a)
        assert_(np.issubdtype(h.dtype, np.integer))

        h, b = histogram(a, density=True)
        assert_(np.issubdtype(h.dtype, np.floating))

        h, b = histogram(a, weights=np.ones(10, int))
        assert_(np.issubdtype(h.dtype, np.integer))

        h, b = histogram(a, weights=np.ones(10, float))
        assert_(np.issubdtype(h.dtype, np.floating))

    def test_f32_rounding(self):
        # gh-4799, check that the rounding of the edges works with float32
        x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32)
        y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32)
        counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100)
        assert_equal(counts_hist.sum(), 3.)

    def test_bool_conversion(self):
        # gh-12107
        # Reference integer histogram
        a = np.array([1, 1, 0], dtype=np.uint8)
        int_hist, int_edges = np.histogram(a)

        # Should raise an warning on booleans
        # Ensure that the histograms are equivalent, need to suppress
        # the warnings to get the actual outputs
        with suppress_warnings() as sup:
            rec = sup.record(RuntimeWarning, 'Converting input from .*')
            hist, edges = np.histogram([True, True, False])
            # A warning should be issued
            assert_equal(len(rec), 1)
            assert_array_equal(hist, int_hist)
            assert_array_equal(edges, int_edges)

    def test_weights(self):
        v = np.random.rand(100)
        w = np.ones(100) * 5
        a, b = histogram(v)
        na, nb = histogram(v, density=True)
        wa, wb = histogram(v, weights=w)
        nwa, nwb = histogram(v, weights=w, density=True)
        assert_array_almost_equal(a * 5, wa)
        assert_array_almost_equal(na, nwa)

        # Check weights are properly applied.
        v = np.linspace(0, 10, 10)
        w = np.concatenate((np.zeros(5), np.ones(5)))
        wa, wb = histogram(v, bins=np.arange(11), weights=w)
        assert_array_almost_equal(wa, w)

        # Check with integer weights
        wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1])
        assert_array_equal(wa, [4, 5, 0, 1])
        wa, wb = histogram(
            [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], density=True)
        assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4)

        # Check weights with non-uniform bin widths
        a, b = histogram(
            np.arange(9), [0, 1, 3, 6, 10],
            weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True)
        assert_almost_equal(a, [.2, .1, .1, .075])

    def test_exotic_weights(self):

        # Test the use of weights that are not integer or floats, but e.g.
        # complex numbers or object types.

        # Complex weights
        values = np.array([1.3, 2.5, 2.3])
        weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2])

        # Check with custom bins
        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))

        # Check with even bins
        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
        assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))

        # Decimal weights
        from decimal import Decimal
        values = np.array([1.3, 2.5, 2.3])
        weights = np.array([Decimal(1), Decimal(2), Decimal(3)])

        # Check with custom bins
        wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])

        # Check with even bins
        wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
        assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])

    def test_no_side_effects(self):
        # This is a regression test that ensures that values passed to
        # ``histogram`` are unchanged.
        values = np.array([1.3, 2.5, 2.3])
        np.histogram(values, range=[-10, 10], bins=100)
        assert_array_almost_equal(values, [1.3, 2.5, 2.3])

    def test_empty(self):
        a, b = histogram([], bins=([0, 1]))
        assert_array_equal(a, np.array([0]))
        assert_array_equal(b, np.array([0, 1]))

    def test_error_binnum_type (self):
        # Tests if right Error is raised if bins argument is float
        vals = np.linspace(0.0, 1.0, num=100)
        histogram(vals, 5)
        assert_raises(TypeError, histogram, vals, 2.4)

    def test_finite_range(self):
        # Normal ranges should be fine
        vals = np.linspace(0.0, 1.0, num=100)
        histogram(vals, range=[0.25,0.75])
        assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
        assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])

    def test_invalid_range(self):
        # start of range must be < end of range
        vals = np.linspace(0.0, 1.0, num=100)
        with assert_raises_regex(ValueError, "max must be larger than"):
            np.histogram(vals, range=[0.1, 0.01])

    def test_bin_edge_cases(self):
        # Ensure that floating-point computations correctly place edge cases.
        arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
        hist, edges = np.histogram(arr, bins=8296, range=(2, 2280))
        mask = hist > 0
        left_edges = edges[:-1][mask]
        right_edges = edges[1:][mask]
        for x, left, right in zip(arr, left_edges, right_edges):
            assert_(x >= left)
            assert_(x < right)

    def test_last_bin_inclusive_range(self):
        arr = np.array([0.,  0.,  0.,  1.,  2.,  3.,  3.,  4.,  5.])
        hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
        assert_equal(hist[-1], 1)

    def test_bin_array_dims(self):
        # gracefully handle bins object > 1 dimension
        vals = np.linspace(0.0, 1.0, num=100)
        bins = np.array([[0, 0.5], [0.6, 1.0]])
        with assert_raises_regex(ValueError, "must be 1d"):
            np.histogram(vals, bins=bins)

    def test_unsigned_monotonicity_check(self):
        # Ensures ValueError is raised if bins not increasing monotonically
        # when bins contain unsigned values (see #9222)
        arr = np.array([2])
        bins = np.array([1, 3, 1], dtype='uint64')
        with assert_raises(ValueError):
            hist, edges = np.histogram(arr, bins=bins)

    def test_object_array_of_0d(self):
        # gh-7864
        assert_raises(ValueError,
            histogram, [np.array(0.4) for i in range(10)] + [-np.inf])
        assert_raises(ValueError,
            histogram, [np.array(0.4) for i in range(10)] + [np.inf])

        # these should not crash
        np.histogram([np.array(0.5) for i in range(10)] + [.500000000000001])
        np.histogram([np.array(0.5) for i in range(10)] + [.5])

    def test_some_nan_values(self):
        # gh-7503
        one_nan = np.array([0, 1, np.nan])
        all_nan = np.array([np.nan, np.nan])

        # the internal comparisons with NaN give warnings
        sup = suppress_warnings()
        sup.filter(RuntimeWarning)
        with sup:
            # can't infer range with nan
            assert_raises(ValueError, histogram, one_nan, bins='auto')
            assert_raises(ValueError, histogram, all_nan, bins='auto')

            # explicit range solves the problem
            h, b = histogram(one_nan, bins='auto', range=(0, 1))
            assert_equal(h.sum(), 2)  # nan is not counted
            h, b = histogram(all_nan, bins='auto', range=(0, 1))
            assert_equal(h.sum(), 0)  # nan is not counted

            # as does an explicit set of bins
            h, b = histogram(one_nan, bins=[0, 1])
            assert_equal(h.sum(), 2)  # nan is not counted
            h, b = histogram(all_nan, bins=[0, 1])
            assert_equal(h.sum(), 0)  # nan is not counted

    def test_datetime(self):
        begin = np.datetime64('2000-01-01', 'D')
        offsets = np.array([0, 0, 1, 1, 2, 3, 5, 10, 20])
        bins = np.array([0, 2, 7, 20])
        dates = begin + offsets
        date_bins = begin + bins

        td = np.dtype('timedelta64[D]')

        # Results should be the same for integer offsets or datetime values.
        # For now, only explicit bins are supported, since linspace does not
        # work on datetimes or timedeltas
        d_count, d_edge = histogram(dates, bins=date_bins)
        t_count, t_edge = histogram(offsets.astype(td), bins=bins.astype(td))
        i_count, i_edge = histogram(offsets, bins=bins)

        assert_equal(d_count, i_count)
        assert_equal(t_count, i_count)

        assert_equal((d_edge - begin).astype(int), i_edge)
        assert_equal(t_edge.astype(int), i_edge)
Loading ...