Repository URL to install this package:
|
Version:
4.0.1 ▾
|
from typing import Optional, Tuple
import numpy as np
import pandas as pd
from sarus_statistics.ops.utils import generator_from_seed, rescale_weights
def sum_op(
data: pd.DataFrame,
data_col: str,
user_col: str,
private_col: str,
weight_col: str,
noise: float,
bounds: Tuple[float, float],
max_multiplicity: float,
random_generator: Optional[np.random.Generator] = None,
is_data_already_scaled: bool = False,
) -> float:
"""Compute DP sum of column according to max_multiplicity of user"""
random_generator = (
random_generator
if random_generator is not None
else generator_from_seed(random_generator)
)
sensibility = max_multiplicity * max(abs(bounds[0]), abs(bounds[1]))
data[data_col] = np.clip(
data[data_col],
a_min=min(bounds[0], bounds[1]),
a_max=max(bounds[0], bounds[1]),
)
if not is_data_already_scaled:
private_data = rescale_weights(
data=data,
user_col=user_col,
private_col=private_col,
weight_col=weight_col,
max_multiplicity=max_multiplicity,
)
else:
private_data = data
sum_result = (private_data[weight_col] * private_data[data_col]).sum()
noisy_result: float = sum_result + random_generator.laplace(
loc=0, scale=noise * sensibility
)
if data[data_col].dtype == int:
noisy_result = int(noisy_result)
return noisy_result