Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
sarus_statistics / tests / unit / test_ops / test_bounds.py
Size: Mime:
import numpy as np
import pandas as pd
import pytest

from sarus_statistics.ops.bounds.local import automatic_column_range_pandas

np.random.seed(0)

LOWER_BOUND = 10
UPPER_BOUND = 100
NOISE = 0.01
N_SAMPLES = 10000
N_QUERIES = [4092, 508, 128, 64, 32, 8]
USERS = list(range(1000))


@pytest.fixture()
def bounds_data():
    return pd.DataFrame(
        {
            "data": np.random.uniform(size=N_SAMPLES)
            * (UPPER_BOUND - LOWER_BOUND)
            + LOWER_BOUND,
            "user": np.random.choice(USERS, size=N_SAMPLES),
            "multiplicity": [1 for _ in range(N_SAMPLES)],
        }
    )


@pytest.mark.parametrize(
    'dtype', ["float64", "float32", "int64", "int32", "int16", "int8"]
)
def test_automatic_column_range_pandas(bounds_data, dtype):
    data_dtype = bounds_data.astype(dtype=dtype)
    data_dtype["is_public"] = [False for _ in range(N_SAMPLES)]
    lower, upper = automatic_column_range_pandas(
        data=data_dtype,
        noise=NOISE,
        data_col="data",
        user_col="user",
        private_col="is_public",
        dtype=dtype,
        weight_col="multiplicity",
    )
    assert lower == 8
    assert upper == 128


def test_bounds(ops_data, admin_cols):
    public, user_col, weights = admin_cols
    min_value, max_value = automatic_column_range_pandas(
        ops_data,
        data_col='integer',
        user_col=user_col,
        private_col=public,
        weight_col=weights,
        dtype='int64',
        noise=1e-9,
        max_multiplicity=10,
    )

    assert pytest.approx([min_value, max_value], 1e-2) == [0, 1024]