Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np
from sarus_differential_privacy.query import (
    PrivateQuery,
    SampledGaussianMechanismQuery,
)

from sarus_query_builder.core.core import OptimizableQueryBuilder, QueryBuilder
from sarus_query_builder.protobuf.query_pb2 import DPSGD, Query

if TYPE_CHECKING:
    from sarus_data_spec.typing import Dataset

    from sarus_query_builder.core.typing import Task


class DPSGDBuilder(QueryBuilder):
    """Generate DPSGD hyperparameters"""

    def __init__(self, dataset: Dataset):
        self._dataset = dataset

    def build_query(self, input_parameter: Query.DPSGD) -> Task:
        size = len(self.dataset.to_pandas())

        dpsgd = DPSGD()
        dpsgd.batch_size = input_parameter.batch_size or min(
            512, size
        )  # TODO: as much as fit in memory ?
        dpsgd.l2_clipping_bound = input_parameter.l2_clipping_bound or 1
        dpsgd.microbatches = input_parameter.microbatches or dpsgd.batch_size
        dpsgd.model.CopyFrom(
            input_parameter.model
        )  # pylint: disable=no-member
        dpsgd.optimizer = input_parameter.optimizer
        dpsgd.loss = input_parameter.loss
        dpsgd.learning_rate = (
            input_parameter.learning_rate or 1e-5 * dpsgd.batch_size
        )  # change if adam

        dpsgd.epochs = (
            input_parameter.epochs or 16
        )  # can we infer this from eps/model/input_shape/nb weights ?

        dpsgd.noise_multiplier = input_parameter.noise_multiplier
        return dpsgd

    def private_query(self, out: Task) -> PrivateQuery:
        # if not isinstance(out, DPSGD):
        #    raise TypeError("Expected DPSGD task")
        size = self.dataset.size().statistics().size()
        sampling_probability = out.batch_size / size
        return SampledGaussianMechanismQuery(
            sampling_probability,
            out.noise_multiplier,
            out.epochs * (size // out.batch_size),
        )


class OptimizableDPSGDBuilder(OptimizableQueryBuilder):
    def __init__(self, dataset: Dataset, query: Query):
        self._dataset = dataset
        self.query = query
        self._builders = [DPSGDBuilder(dataset)]

    def build_query(self, input_parameter: float) -> Task:
        query = self.query
        if input_parameter:
            query.dpsgd.noise_multiplier = 1 / input_parameter
        else:
            query.dpsgd.noise_multiplier = np.inf
        return self.builders[0].build_query(query.dpsgd)


def dpsgd_builder(dataset: Dataset, query: Query) -> OptimizableDPSGDBuilder:
    return OptimizableDPSGDBuilder(dataset, query)