Repository URL to install this package:
|
Version:
2022.10.0 ▾
|
import pytest
pytest.importorskip("numpy")
import numpy as np
import dask.array as da
from dask.array.utils import assert_eq, same_keys
try:
import crick
except ImportError:
crick = None
percentile_internal_methods = pytest.mark.parametrize(
"internal_method",
[
pytest.param(
"tdigest", marks=pytest.mark.skipif(not crick, reason="Requires crick")
),
"dask",
],
)
@percentile_internal_methods
def test_percentile(internal_method):
d = da.ones((16,), chunks=(4,))
qs = [0, 50, 100]
assert_eq(
da.percentile(d, qs, internal_method=internal_method),
np.array([1, 1, 1], dtype=d.dtype),
)
x = np.array([0, 0, 5, 5, 5, 5, 20, 20])
d = da.from_array(x, chunks=(3,))
result = da.percentile(d, qs, internal_method=internal_method)
assert_eq(result, np.array([0, 5, 20], dtype=result.dtype))
assert same_keys(
da.percentile(d, qs, internal_method=internal_method),
da.percentile(d, qs, internal_method=internal_method),
)
assert not same_keys(
da.percentile(d, qs, internal_method=internal_method),
da.percentile(d, [0, 50], internal_method=internal_method),
)
if internal_method != "tdigest":
x = np.array(["a", "a", "d", "d", "d", "e"])
d = da.from_array(x, chunks=(3,))
assert_eq(
da.percentile(d, [0, 50, 100]), np.array(["a", "d", "e"], dtype=x.dtype)
)
@pytest.mark.skip
def test_percentile_with_categoricals():
try:
import pandas as pd
except ImportError:
return
x0 = pd.Categorical(["Alice", "Bob", "Charlie", "Dennis", "Alice", "Alice"])
x1 = pd.Categorical(["Alice", "Bob", "Charlie", "Dennis", "Alice", "Alice"])
dsk = {("x", 0): x0, ("x", 1): x1}
x = da.Array(dsk, "x", chunks=((6, 6),))
p = da.percentile(x, [50])
assert (p.compute().categories == x0.categories).all()
assert (p.compute().codes == [0]).all()
assert same_keys(da.percentile(x, [50]), da.percentile(x, [50]))
@percentile_internal_methods
def test_percentiles_with_empty_arrays(internal_method):
x = da.ones(10, chunks=((5, 0, 5),))
assert_eq(
da.percentile(x, [10, 50, 90], internal_method=internal_method),
np.array([1, 1, 1], dtype=x.dtype),
)
@percentile_internal_methods
def test_percentiles_with_empty_q(internal_method):
x = da.ones(10, chunks=((5, 0, 5),))
assert_eq(
da.percentile(x, [], internal_method=internal_method),
np.array([], dtype=x.dtype),
)
@percentile_internal_methods
@pytest.mark.parametrize("q", [5, 5.0, np.int64(5), np.float64(5)])
def test_percentiles_with_scaler_percentile(internal_method, q):
# Regression test to ensure da.percentile works with scalar percentiles
# See #3020
d = da.ones((16,), chunks=(4,))
assert_eq(
da.percentile(d, q, internal_method=internal_method),
np.array([1], dtype=d.dtype),
)
@percentile_internal_methods
def test_unknown_chunk_sizes(internal_method):
x = da.random.random(1000, chunks=(100,))
x._chunks = ((np.nan,) * 10,)
result = da.percentile(x, 50, internal_method=internal_method).compute()
assert 0.1 < result < 0.9
a, b = da.percentile(x, [40, 60], internal_method=internal_method).compute()
assert 0.1 < a < 0.9
assert 0.1 < b < 0.9
assert a < b