Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
from __future__ import annotations

import warnings
from math import exp, log

from sarus_data_spec.typing import Dataset
from sarus_differential_privacy.query import EpsilonQuery, PrivateQuery

try:
    from sarus_xgboost.protobuf.xgboost_pb2 import XgboostParameters
except:
    warnings.warn('XgBoost Not available')

from sarus_query_builder.core.core import OptimizableQueryBuilder, QueryBuilder
from sarus_query_builder.core.typing import Task
from sarus_query_builder.protobuf.query_pb2 import Query


class XGBoostBuilder(QueryBuilder):
    """Generate DPSGD hyperparameters"""

    def __init__(self, dataset: Dataset):
        self._dataset = dataset

    def build_query(self, input_parameter: Query.XGBoost) -> Task:
        xgb = XgboostParameters(
            objective=input_parameter.objective or 'reg:squarederror',
            tree_method='approxDP',
            max_depth=input_parameter.max_depth or 6,
            learning_rate=input_parameter.learning_rate or 0.2,
            lambd=input_parameter.lambd or 0.1,
            base_score=input_parameter.base_score or 0.5,
            subsample=input_parameter.subsample or 1,
            min_child_weight=input_parameter.min_child_weight
            or self.dataset.size().statistics().protobuf().union.size / 10,
            nthread=input_parameter.nthread or 4,
            num_boost_rounds=input_parameter.n_estimators or 20,
            verbose=input_parameter.verbose or 0,
            booster=input_parameter.booster,
            dp_epsilon_per_tree=input_parameter.dp_epsilon_per_tree,
        )  # default params to optimize

        return xgb

    def private_query(self, out: Task) -> PrivateQuery:
        # if not isinstance(out, XgboostParameters):
        #    raise TypeError("Expected XgboostParameters task")
        return EpsilonQuery(
            epsilon=out.num_boost_rounds
            * log(1 + out.subsample * (exp(out.dp_epsilon_per_tree) - 1))
        )


class OptimizableXGBoostBuilder(OptimizableQueryBuilder):
    def __init__(self, dataset: Dataset, query: Query):
        self._dataset = dataset
        self.query = query
        self._builders = [XGBoostBuilder(dataset)]

    def build_query(self, input_parameter: float) -> Task:
        query = self.query
        query.xgboost.dp_epsilon_per_tree = input_parameter
        return self.builders[0].build_query(query.xgboost)


def xgboost_builder(
    dataset: Dataset, query: Query
) -> OptimizableXGBoostBuilder:
    return OptimizableXGBoostBuilder(dataset, query)