Repository URL to install this package:
|
Version:
4.0.1 ▾
|
import numpy as np
from sarus_statistics.ops.tau_thresholding.local import (
dataset_above_tau_threshold,
)
np.random.seed(0)
def test_tau(ops_data, admin_cols):
public, user_col, weights = admin_cols
ds_multiplicity = 10
# there are 3 groups in enum
groups = dataset_above_tau_threshold(
data=ops_data,
user_col=user_col,
private_col=public,
weight_col=weights,
keys=['enum'],
epsilon=10.0,
delta=1e-6,
max_multiplicity=ds_multiplicity,
)
assert not groups.empty
# by increasing epsilon groups desappear
groups = dataset_above_tau_threshold(
data=ops_data,
user_col=user_col,
private_col=public,
weight_col=weights,
keys=['enum'],
epsilon=1.0,
delta=1e-6,
max_multiplicity=ds_multiplicity,
)
assert groups.empty
# by increasing delta while epsilon is small there are groups that appear
groups = dataset_above_tau_threshold(
data=ops_data,
user_col=user_col,
private_col=public,
weight_col=weights,
keys=['enum'],
epsilon=1.0,
delta=0.1,
max_multiplicity=ds_multiplicity,
)
assert not groups.empty
# group by user col. groups should be empty even at very high epsilons
groups = dataset_above_tau_threshold(
data=ops_data,
user_col=user_col,
private_col=public,
weight_col=weights,
keys=[user_col],
epsilon=1000.0,
delta=1e-6,
max_multiplicity=ds_multiplicity,
)
assert groups.empty