Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
from typing import Optional, Tuple

import numpy as np
import pandas as pd

from sarus_statistics.ops.utils import generator_from_seed, rescale_weights


def sum_op(
    data: pd.DataFrame,
    data_col: str,
    user_col: str,
    private_col: str,
    weight_col: str,
    noise: float,
    bounds: Tuple[float, float],
    max_multiplicity: float,
    random_generator: Optional[np.random.Generator] = None,
    is_data_already_scaled: bool = False,
) -> float:
    """Compute DP sum of column according to max_multiplicity of user"""
    random_generator = (
        random_generator
        if random_generator is not None
        else generator_from_seed(random_generator)
    )

    sensibility = max_multiplicity * max(abs(bounds[0]), abs(bounds[1]))

    data[data_col] = np.clip(
        data[data_col],
        a_min=min(bounds[0], bounds[1]),
        a_max=max(bounds[0], bounds[1]),
    )
    if not is_data_already_scaled:
        private_data = rescale_weights(
            data=data,
            user_col=user_col,
            private_col=private_col,
            weight_col=weight_col,
            max_multiplicity=max_multiplicity,
        )
    else:
        private_data = data

    sum_result = (private_data[weight_col] * private_data[data_col]).sum()
    noisy_result: float = sum_result + random_generator.laplace(
        loc=0, scale=noise * sensibility
    )
    if data[data_col].dtype == int:
        noisy_result = int(noisy_result)

    return noisy_result