Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
from typing import Optional, Tuple, cast

import numpy as np
import pandas as pd

from sarus_statistics.ops.histograms.local import dataset_length
from sarus_statistics.ops.mean.local import mean
from sarus_statistics.ops.sum.local import sum_op


def std(
    data: pd.DataFrame,
    data_col: str,
    user_col: str,
    private_col: str,
    weight_col: str,
    noise_mean: float,
    noise_square: float,
    noise_count: float,
    bounds: Tuple[float, float],
    max_multiplicity: float,
    random_generator: Optional[np.random.Generator] = None,
    is_data_already_scaled: bool = False,
) -> float:
    """Compute DP mean of column according to max_multiplicity of user
    This is a very naive implementation"""
    copy = data.copy()

    data[data_col] = np.clip(
        data[data_col],
        a_min=min(bounds[0], bounds[1]),
        a_max=max(bounds[0], bounds[1]),
    )

    # mean
    mean_result = mean(
        copy,
        data_col,
        user_col,
        private_col,
        weight_col,
        noise_mean,
        bounds,
        max_multiplicity,
        random_generator,
        is_data_already_scaled,
    )
    copy[data_col] = copy[data_col] - mean_result

    # sum of square
    squared_centered_bounds = (
        0,
        max((bounds[0] - mean_result) ** 2, (bounds[1] - mean_result) ** 2),
    )

    copy[data_col] = copy[data_col].apply(np.square)
    sum_result = sum_op(
        copy,
        data_col,
        user_col,
        private_col,
        weight_col,
        noise_square,
        squared_centered_bounds,
        max_multiplicity,
        random_generator,
        is_data_already_scaled,
    )

    # count
    length = dataset_length(
        copy,
        user_col,
        private_col,
        weight_col,
        noise_count,
        max_multiplicity,
        random_generator,
    )
    std_result = cast(float, np.sqrt(sum_result / length))
    return std_result