Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
from typing import Tuple

import sqlalchemy as sa

from sarus_statistics.ops.exp_quantile.exp_quantile_sql import exp_quantile


def get_quantile(  # pylint:disable=too-many-arguments,too-many-locals
    session: sa.orm.Session,
    table: sa.Table,
    data_col: str,
    user_col: str,
    private_col: str,
    multiplicity_col: str,
    sampling_ratio: float,
    noise: float,
    quantile: float,
    swap: bool,
    bounds: Tuple[float, float],
    rho: float = 1e-3,
    max_multiplicity: float = 1,
) -> sa.sql.ClauseElement:
    """Computes quantiles for a given dataframe of continuous features

    Parameters
    ----------
    dataframe: pd.DataFrame
        dataset of continuous features
    sampling_ratio: float
        sampling ratio to compute quantiles
    noise: float
        target epsilon = 1 / noise
    nb_quantiles: int
        nb of quantiles to compute
    swap: bool
        if True: DP is considered in the "replace regime", else only
        "add/remove"
    bounds: Dict[str,Tuple[float,float]
        bounds on each feature distribution
    rho: float
        add small uniform noise to fix degenerate cases with a lot of
        equal values
    max_multiplicity: flaot
        max number of identical users
    user_col: Optional[str]
        name of the users' column
    private_col: Optional[str]
        name of the column indicating the privacy status
    multiplicity_col: Optional[str]
        name of the multiplicity's column

    Returns
    -------
    Tuple[Dict[str,Dict[float, float]], List[PrivateQuery]]
        quantiles and the list of queries

    Raises
    ------
    ValueError
        if sampling_ratio is not in ]0,1]
    """
    if sampling_ratio > 1 or sampling_ratio <= 0:
        raise ValueError("Sampling ratio should be in ]0,1]")
    num_samples = session.query(
        sa.cast(sa.func.count(table.c.value) * sampling_ratio, sa.Integer)
    ).scalar_subquery()

    # TODO: rescale weights
    #    if user_col and multiplicity_col:
    #        dataframe = (
    #            dataframe.groupby(user_col)
    #            .apply(
    #                lambda x: x.sample(
    #                    n=min(len(x), max_multiplicity), replace=False
    #                )
    #            )
    #            .droplevel(0)
    #        )
    #        dataframe = dataframe.drop(columns=[user_col, multiplicity_col])

    data = (
        session.query(table)
        .order_by(sa.func.rand())
        .limit(num_samples)
        .subquery()
    )

    return exp_quantile(
        session,
        data,
        data_col,
        data_low=bounds[0],
        data_high=bounds[1],
        quantile=quantile,
        eps=1 / noise,
        rho=rho,
    )