Repository URL to install this package:
|
Version:
2.7.2 ▾
|
from __future__ import annotations
from typing import TYPE_CHECKING
import numpy as np
from sarus_differential_privacy.query import (
PrivateQuery,
SampledGaussianMechanismQuery,
)
from sarus_query_builder.core.core import OptimizableQueryBuilder, QueryBuilder
from sarus_query_builder.protobuf.query_pb2 import DPSGD, Query
if TYPE_CHECKING:
from sarus_data_spec.typing import Dataset
from sarus_query_builder.core.typing import Task
class DPSGDBuilder(QueryBuilder):
"""Generate DPSGD hyperparameters"""
def __init__(self, dataset: Dataset):
self._dataset = dataset
def build_query(self, input_parameter: Query.DPSGD) -> Task:
size = len(self.dataset.to_pandas())
dpsgd = DPSGD()
dpsgd.batch_size = input_parameter.batch_size or min(
512, size
) # TODO: as much as fit in memory ?
dpsgd.l2_clipping_bound = input_parameter.l2_clipping_bound or 1
dpsgd.microbatches = input_parameter.microbatches or dpsgd.batch_size
dpsgd.model.CopyFrom(
input_parameter.model
) # pylint: disable=no-member
dpsgd.optimizer = input_parameter.optimizer
dpsgd.loss = input_parameter.loss
dpsgd.learning_rate = (
input_parameter.learning_rate or 1e-5 * dpsgd.batch_size
) # change if adam
dpsgd.epochs = (
input_parameter.epochs or 16
) # can we infer this from eps/model/input_shape/nb weights ?
dpsgd.noise_multiplier = input_parameter.noise_multiplier
return dpsgd
def private_query(self, out: Task) -> PrivateQuery:
# if not isinstance(out, DPSGD):
# raise TypeError("Expected DPSGD task")
size = self.dataset.size().statistics().size()
sampling_probability = out.batch_size / size
return SampledGaussianMechanismQuery(
sampling_probability,
out.noise_multiplier,
out.epochs * (size // out.batch_size),
)
class OptimizableDPSGDBuilder(OptimizableQueryBuilder):
def __init__(self, dataset: Dataset, query: Query):
self._dataset = dataset
self.query = query
self._builders = [DPSGDBuilder(dataset)]
def build_query(self, input_parameter: float) -> Task:
query = self.query
if input_parameter:
query.dpsgd.noise_multiplier = 1 / input_parameter
else:
query.dpsgd.noise_multiplier = np.inf
return self.builders[0].build_query(query.dpsgd)
def dpsgd_builder(dataset: Dataset, query: Query) -> OptimizableDPSGDBuilder:
return OptimizableDPSGDBuilder(dataset, query)