from __future__ import division, absolute_import, print_function
import numpy as np
from numpy.lib.histograms import histogram, histogramdd, histogram_bin_edges
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_almost_equal,
assert_array_almost_equal, assert_raises, assert_allclose,
assert_array_max_ulp, assert_raises_regex, suppress_warnings,
)
class TestHistogram(object):
def setup(self):
pass
def teardown(self):
pass
def test_simple(self):
n = 100
v = np.random.rand(n)
(a, b) = histogram(v)
# check if the sum of the bins equals the number of samples
assert_equal(np.sum(a, axis=0), n)
# check that the bin counts are evenly spaced when the data is from
# a linear function
(a, b) = histogram(np.linspace(0, 10, 100))
assert_array_equal(a, 10)
def test_one_bin(self):
# Ticket 632
hist, edges = histogram([1, 2, 3, 4], [1, 2])
assert_array_equal(hist, [2, ])
assert_array_equal(edges, [1, 2])
assert_raises(ValueError, histogram, [1, 2], bins=0)
h, e = histogram([1, 2], bins=1)
assert_equal(h, np.array([2]))
assert_allclose(e, np.array([1., 2.]))
def test_normed(self):
sup = suppress_warnings()
with sup:
rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*')
# Check that the integral of the density equals 1.
n = 100
v = np.random.rand(n)
a, b = histogram(v, normed=True)
area = np.sum(a * np.diff(b))
assert_almost_equal(area, 1)
assert_equal(len(rec), 1)
sup = suppress_warnings()
with sup:
rec = sup.record(np.VisibleDeprecationWarning, '.*normed.*')
# Check with non-constant bin widths (buggy but backwards
# compatible)
v = np.arange(10)
bins = [0, 1, 5, 9, 10]
a, b = histogram(v, bins, normed=True)
area = np.sum(a * np.diff(b))
assert_almost_equal(area, 1)
assert_equal(len(rec), 1)
def test_density(self):
# Check that the integral of the density equals 1.
n = 100
v = np.random.rand(n)
a, b = histogram(v, density=True)
area = np.sum(a * np.diff(b))
assert_almost_equal(area, 1)
# Check with non-constant bin widths
v = np.arange(10)
bins = [0, 1, 3, 6, 10]
a, b = histogram(v, bins, density=True)
assert_array_equal(a, .1)
assert_equal(np.sum(a * np.diff(b)), 1)
# Test that passing False works too
a, b = histogram(v, bins, density=False)
assert_array_equal(a, [1, 2, 3, 4])
# Variale bin widths are especially useful to deal with
# infinities.
v = np.arange(10)
bins = [0, 1, 3, 6, np.inf]
a, b = histogram(v, bins, density=True)
assert_array_equal(a, [.1, .1, .1, 0.])
# Taken from a bug report from N. Becker on the numpy-discussion
# mailing list Aug. 6, 2010.
counts, dmy = np.histogram(
[1, 2, 3, 4], [0.5, 1.5, np.inf], density=True)
assert_equal(counts, [.25, 0])
def test_outliers(self):
# Check that outliers are not tallied
a = np.arange(10) + .5
# Lower outliers
h, b = histogram(a, range=[0, 9])
assert_equal(h.sum(), 9)
# Upper outliers
h, b = histogram(a, range=[1, 10])
assert_equal(h.sum(), 9)
# Normalization
h, b = histogram(a, range=[1, 9], density=True)
assert_almost_equal((h * np.diff(b)).sum(), 1, decimal=15)
# Weights
w = np.arange(10) + .5
h, b = histogram(a, range=[1, 9], weights=w, density=True)
assert_equal((h * np.diff(b)).sum(), 1)
h, b = histogram(a, bins=8, range=[1, 9], weights=w)
assert_equal(h, w[1:-1])
def test_arr_weights_mismatch(self):
a = np.arange(10) + .5
w = np.arange(11) + .5
with assert_raises_regex(ValueError, "same shape as"):
h, b = histogram(a, range=[1, 9], weights=w, density=True)
def test_type(self):
# Check the type of the returned histogram
a = np.arange(10) + .5
h, b = histogram(a)
assert_(np.issubdtype(h.dtype, np.integer))
h, b = histogram(a, density=True)
assert_(np.issubdtype(h.dtype, np.floating))
h, b = histogram(a, weights=np.ones(10, int))
assert_(np.issubdtype(h.dtype, np.integer))
h, b = histogram(a, weights=np.ones(10, float))
assert_(np.issubdtype(h.dtype, np.floating))
def test_f32_rounding(self):
# gh-4799, check that the rounding of the edges works with float32
x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32)
y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32)
counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100)
assert_equal(counts_hist.sum(), 3.)
def test_bool_conversion(self):
# gh-12107
# Reference integer histogram
a = np.array([1, 1, 0], dtype=np.uint8)
int_hist, int_edges = np.histogram(a)
# Should raise an warning on booleans
# Ensure that the histograms are equivalent, need to suppress
# the warnings to get the actual outputs
with suppress_warnings() as sup:
rec = sup.record(RuntimeWarning, 'Converting input from .*')
hist, edges = np.histogram([True, True, False])
# A warning should be issued
assert_equal(len(rec), 1)
assert_array_equal(hist, int_hist)
assert_array_equal(edges, int_edges)
def test_weights(self):
v = np.random.rand(100)
w = np.ones(100) * 5
a, b = histogram(v)
na, nb = histogram(v, density=True)
wa, wb = histogram(v, weights=w)
nwa, nwb = histogram(v, weights=w, density=True)
assert_array_almost_equal(a * 5, wa)
assert_array_almost_equal(na, nwa)
# Check weights are properly applied.
v = np.linspace(0, 10, 10)
w = np.concatenate((np.zeros(5), np.ones(5)))
wa, wb = histogram(v, bins=np.arange(11), weights=w)
assert_array_almost_equal(wa, w)
# Check with integer weights
wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1])
assert_array_equal(wa, [4, 5, 0, 1])
wa, wb = histogram(
[1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], density=True)
assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4)
# Check weights with non-uniform bin widths
a, b = histogram(
np.arange(9), [0, 1, 3, 6, 10],
weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True)
assert_almost_equal(a, [.2, .1, .1, .075])
def test_exotic_weights(self):
# Test the use of weights that are not integer or floats, but e.g.
# complex numbers or object types.
# Complex weights
values = np.array([1.3, 2.5, 2.3])
weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2])
# Check with custom bins
wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
# Check with even bins
wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3]))
# Decimal weights
from decimal import Decimal
values = np.array([1.3, 2.5, 2.3])
weights = np.array([Decimal(1), Decimal(2), Decimal(3)])
# Check with custom bins
wa, wb = histogram(values, bins=[0, 2, 3], weights=weights)
assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
# Check with even bins
wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights)
assert_array_almost_equal(wa, [Decimal(1), Decimal(5)])
def test_no_side_effects(self):
# This is a regression test that ensures that values passed to
# ``histogram`` are unchanged.
values = np.array([1.3, 2.5, 2.3])
np.histogram(values, range=[-10, 10], bins=100)
assert_array_almost_equal(values, [1.3, 2.5, 2.3])
def test_empty(self):
a, b = histogram([], bins=([0, 1]))
assert_array_equal(a, np.array([0]))
assert_array_equal(b, np.array([0, 1]))
def test_error_binnum_type (self):
# Tests if right Error is raised if bins argument is float
vals = np.linspace(0.0, 1.0, num=100)
histogram(vals, 5)
assert_raises(TypeError, histogram, vals, 2.4)
def test_finite_range(self):
# Normal ranges should be fine
vals = np.linspace(0.0, 1.0, num=100)
histogram(vals, range=[0.25,0.75])
assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
def test_invalid_range(self):
# start of range must be < end of range
vals = np.linspace(0.0, 1.0, num=100)
with assert_raises_regex(ValueError, "max must be larger than"):
np.histogram(vals, range=[0.1, 0.01])
def test_bin_edge_cases(self):
# Ensure that floating-point computations correctly place edge cases.
arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
hist, edges = np.histogram(arr, bins=8296, range=(2, 2280))
mask = hist > 0
left_edges = edges[:-1][mask]
right_edges = edges[1:][mask]
for x, left, right in zip(arr, left_edges, right_edges):
assert_(x >= left)
assert_(x < right)
def test_last_bin_inclusive_range(self):
arr = np.array([0., 0., 0., 1., 2., 3., 3., 4., 5.])
hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
assert_equal(hist[-1], 1)
def test_bin_array_dims(self):
# gracefully handle bins object > 1 dimension
vals = np.linspace(0.0, 1.0, num=100)
bins = np.array([[0, 0.5], [0.6, 1.0]])
with assert_raises_regex(ValueError, "must be 1d"):
np.histogram(vals, bins=bins)
def test_unsigned_monotonicity_check(self):
# Ensures ValueError is raised if bins not increasing monotonically
# when bins contain unsigned values (see #9222)
arr = np.array([2])
bins = np.array([1, 3, 1], dtype='uint64')
with assert_raises(ValueError):
hist, edges = np.histogram(arr, bins=bins)
def test_object_array_of_0d(self):
# gh-7864
assert_raises(ValueError,
histogram, [np.array(0.4) for i in range(10)] + [-np.inf])
assert_raises(ValueError,
histogram, [np.array(0.4) for i in range(10)] + [np.inf])
# these should not crash
np.histogram([np.array(0.5) for i in range(10)] + [.500000000000001])
np.histogram([np.array(0.5) for i in range(10)] + [.5])
def test_some_nan_values(self):
# gh-7503
one_nan = np.array([0, 1, np.nan])
all_nan = np.array([np.nan, np.nan])
# the internal comparisons with NaN give warnings
sup = suppress_warnings()
sup.filter(RuntimeWarning)
with sup:
# can't infer range with nan
assert_raises(ValueError, histogram, one_nan, bins='auto')
assert_raises(ValueError, histogram, all_nan, bins='auto')
# explicit range solves the problem
h, b = histogram(one_nan, bins='auto', range=(0, 1))
assert_equal(h.sum(), 2) # nan is not counted
h, b = histogram(all_nan, bins='auto', range=(0, 1))
assert_equal(h.sum(), 0) # nan is not counted
# as does an explicit set of bins
h, b = histogram(one_nan, bins=[0, 1])
assert_equal(h.sum(), 2) # nan is not counted
h, b = histogram(all_nan, bins=[0, 1])
assert_equal(h.sum(), 0) # nan is not counted
def test_datetime(self):
begin = np.datetime64('2000-01-01', 'D')
offsets = np.array([0, 0, 1, 1, 2, 3, 5, 10, 20])
bins = np.array([0, 2, 7, 20])
dates = begin + offsets
date_bins = begin + bins
td = np.dtype('timedelta64[D]')
# Results should be the same for integer offsets or datetime values.
# For now, only explicit bins are supported, since linspace does not
# work on datetimes or timedeltas
d_count, d_edge = histogram(dates, bins=date_bins)
t_count, t_edge = histogram(offsets.astype(td), bins=bins.astype(td))
i_count, i_edge = histogram(offsets, bins=bins)
assert_equal(d_count, i_count)
assert_equal(t_count, i_count)
assert_equal((d_edge - begin).astype(int), i_edge)
assert_equal(t_edge.astype(int), i_edge)
Loading ...