Repository URL to install this package:
|
Version:
2.7.2 ▾
|
from typing import Collection, Sequence, Tuple, cast
from sarus_data_spec.protobuf.utilities import wrap
from sarus_data_spec.typing import Dataset
from sarus_differential_privacy.accountant_local import LocalPrivacyAccountant
from sarus_query_builder.builders.bounds_builder import bounds_builder
from sarus_query_builder.builders.links_builder import links_builder
from sarus_query_builder.builders.marginals_builder import marginals_builder
from sarus_query_builder.builders.max_multiplicity_builder import (
max_multiplicity_builder,
)
from sarus_query_builder.builders.size_builder import size_builder
from sarus_query_builder.builders.synthetic_builder import (
synthetic_dpsgd_builder,
)
from sarus_query_builder.core.core import OptimizableQueryBuilder
from sarus_query_builder.core.typing import Task
from sarus_query_builder.protobuf.query_pb2 import ComposedTask, Query
class ComposedBuilder(OptimizableQueryBuilder):
"""Synthetic data builder"""
def __init__(
self,
dataset: Dataset,
builders: Sequence[OptimizableQueryBuilder],
weights: Sequence[float],
):
assert len(builders) == len(weights)
self._dataset = dataset
self._builders = builders
self._weights = weights
@property
def builders(self) -> Sequence[OptimizableQueryBuilder]:
return cast(Sequence[OptimizableQueryBuilder], self._builders)
def weights(self) -> Sequence[float]:
return self._weights
def build_query(self, input_parameter: float) -> Task:
return ComposedTask(
subtasks=[
wrap(builder.build_query(input_parameter * weight))
for builder, weight in zip(self.builders, self.weights())
]
)
def set_input_parameter(self, input_parameter: float) -> None:
self._input_parameter = input_parameter
for builder, weight in zip(self.builders, self.weights()):
builder.set_input_parameter(input_parameter * weight)
def set_epsilon_deltas_budget(
self, epsilon_deltas_budget: Collection[Tuple[float, float]]
) -> None:
self._epsilon_deltas_budget = epsilon_deltas_budget
if len(self.builders) == 1:
builder = self.builders[0]
builder.set_epsilon_deltas_budget(epsilon_deltas_budget)
else:
for builder in self.builders:
if builder.input_parameter is None:
raise ValueError(
"No input parameter is set, please fit the query builder before setting budget"
)
tasks = builder.build_query(builder.input_parameter)
private_query = builder.private_query(tasks)
accountant = LocalPrivacyAccountant()
buidler_budget = []
for _, delta in epsilon_deltas_budget:
epsilon_query = accountant.epsilon_query(
delta, private_query
)
# if epsilon_query == 0:
# raise ValueError("`epsilon` should be greater than 0.")
buidler_budget.append((epsilon_query, delta))
builder.set_epsilon_deltas_budget(buidler_budget)
# for compatibility
SimpleComposedBuilder = ComposedBuilder
def simple_composed_builder(
dataset: Dataset,
builders: Sequence[OptimizableQueryBuilder],
) -> ComposedBuilder:
return ComposedBuilder(dataset, builders, [1] * len(builders))
def weighted_composed_builder(
dataset: Dataset,
builders: Sequence[OptimizableQueryBuilder],
weights: Sequence[float],
) -> ComposedBuilder:
return ComposedBuilder(dataset, builders, weights)
def bounds_marginals_builder(
dataset: Dataset,
bound_query: Query,
marginal_query: Query,
) -> ComposedBuilder:
return simple_composed_builder(
dataset,
[
bounds_builder(dataset, bound_query),
marginals_builder(dataset, marginal_query),
],
)
def max_mult_size_builder(
dataset: Dataset,
max_multiplicity_query: Query,
size_query: Query,
) -> ComposedBuilder:
return simple_composed_builder(
dataset,
[
max_multiplicity_builder(dataset, max_multiplicity_query),
size_builder(dataset, size_query),
],
)
def synthetic_data_builder(
dataset: Dataset,
links_query: Query,
dpsgd_query: Query,
) -> ComposedBuilder:
parents_list, parents_kwargs = dataset.parents()
assert len(parents_list) == 1
parent_dataset = parents_list[0]
return weighted_composed_builder(
dataset,
[
links_builder(parent_dataset, links_query),
synthetic_dpsgd_builder(dataset, dpsgd_query),
],
[0.2, 0.8],
)