Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
from typing import Collection, Sequence, Tuple, cast

from sarus_data_spec.protobuf.utilities import wrap
from sarus_data_spec.typing import Dataset
from sarus_differential_privacy.accountant_local import LocalPrivacyAccountant

from sarus_query_builder.builders.bounds_builder import bounds_builder
from sarus_query_builder.builders.links_builder import links_builder
from sarus_query_builder.builders.marginals_builder import marginals_builder
from sarus_query_builder.builders.max_multiplicity_builder import (
    max_multiplicity_builder,
)
from sarus_query_builder.builders.size_builder import size_builder
from sarus_query_builder.builders.synthetic_builder import (
    synthetic_dpsgd_builder,
)
from sarus_query_builder.core.core import OptimizableQueryBuilder
from sarus_query_builder.core.typing import Task
from sarus_query_builder.protobuf.query_pb2 import ComposedTask, Query


class ComposedBuilder(OptimizableQueryBuilder):
    """Synthetic data builder"""

    def __init__(
        self,
        dataset: Dataset,
        builders: Sequence[OptimizableQueryBuilder],
        weights: Sequence[float],
    ):
        assert len(builders) == len(weights)
        self._dataset = dataset
        self._builders = builders
        self._weights = weights

    @property
    def builders(self) -> Sequence[OptimizableQueryBuilder]:
        return cast(Sequence[OptimizableQueryBuilder], self._builders)

    def weights(self) -> Sequence[float]:
        return self._weights

    def build_query(self, input_parameter: float) -> Task:
        return ComposedTask(
            subtasks=[
                wrap(builder.build_query(input_parameter * weight))
                for builder, weight in zip(self.builders, self.weights())
            ]
        )

    def set_input_parameter(self, input_parameter: float) -> None:
        self._input_parameter = input_parameter
        for builder, weight in zip(self.builders, self.weights()):
            builder.set_input_parameter(input_parameter * weight)

    def set_epsilon_deltas_budget(
        self, epsilon_deltas_budget: Collection[Tuple[float, float]]
    ) -> None:
        self._epsilon_deltas_budget = epsilon_deltas_budget

        if len(self.builders) == 1:
            builder = self.builders[0]
            builder.set_epsilon_deltas_budget(epsilon_deltas_budget)
        else:
            for builder in self.builders:
                if builder.input_parameter is None:
                    raise ValueError(
                        "No input parameter is set, please fit the query builder before setting budget"
                    )

                tasks = builder.build_query(builder.input_parameter)
                private_query = builder.private_query(tasks)
                accountant = LocalPrivacyAccountant()
                buidler_budget = []
                for _, delta in epsilon_deltas_budget:
                    epsilon_query = accountant.epsilon_query(
                        delta, private_query
                    )
                    # if epsilon_query == 0:
                    #    raise ValueError("`epsilon` should be greater than 0.")
                    buidler_budget.append((epsilon_query, delta))
                builder.set_epsilon_deltas_budget(buidler_budget)


# for compatibility
SimpleComposedBuilder = ComposedBuilder


def simple_composed_builder(
    dataset: Dataset,
    builders: Sequence[OptimizableQueryBuilder],
) -> ComposedBuilder:
    return ComposedBuilder(dataset, builders, [1] * len(builders))


def weighted_composed_builder(
    dataset: Dataset,
    builders: Sequence[OptimizableQueryBuilder],
    weights: Sequence[float],
) -> ComposedBuilder:
    return ComposedBuilder(dataset, builders, weights)


def bounds_marginals_builder(
    dataset: Dataset,
    bound_query: Query,
    marginal_query: Query,
) -> ComposedBuilder:
    return simple_composed_builder(
        dataset,
        [
            bounds_builder(dataset, bound_query),
            marginals_builder(dataset, marginal_query),
        ],
    )


def max_mult_size_builder(
    dataset: Dataset,
    max_multiplicity_query: Query,
    size_query: Query,
) -> ComposedBuilder:
    return simple_composed_builder(
        dataset,
        [
            max_multiplicity_builder(dataset, max_multiplicity_query),
            size_builder(dataset, size_query),
        ],
    )


def synthetic_data_builder(
    dataset: Dataset,
    links_query: Query,
    dpsgd_query: Query,
) -> ComposedBuilder:
    parents_list, parents_kwargs = dataset.parents()
    assert len(parents_list) == 1
    parent_dataset = parents_list[0]
    return weighted_composed_builder(
        dataset,
        [
            links_builder(parent_dataset, links_query),
            synthetic_dpsgd_builder(dataset, dpsgd_query),
        ],
        [0.2, 0.8],
    )