# @package layer_model_helper
# Module caffe2.python.layer_model_helper
from caffe2.python import core, model_helper, schema, scope, utils, muji
from caffe2.python.modeling.parameter_info import (
ParameterInfo,
)
from caffe2.python.modeling.parameter_sharing import (
parameter_sharing_context,
)
from caffe2.python.modeling.net_modifier import NetModifier
from caffe2.python.optimizer import get_param_device, Optimizer
from caffe2.python.regularizer import Regularizer, RegularizationBy
from caffe2.python.layers import layers
from future.utils import viewitems, viewvalues
import logging
import numpy as np
import copy
logger = logging.getLogger(__name__)
class LayerModelHelper(model_helper.ModelHelper):
"""
Model helper for building models on top of layers abstractions.
Each layer is the abstraction that is higher level than Operator. Layer
is responsible for ownership of it's own parameters and can easily be
instantiated in multiple nets possible with different sets of ops.
As an example: one can easily instantiate predict and train nets from
the same set of layers, where predict net will have subset of the
operators from train net.
"""
def __init__(self, name, input_feature_schema, trainer_extra_schema,
keep_blobs=False,
use_attribution=True):
''' TODO(amalevich): more documnetation on input args
use_attribution:
if True, will generate the atrribution net for feature importance
calculation; Need to turn it to false when FC is quantized as FP16
This attribute access will be consistent with MTML model.
'''
super(LayerModelHelper, self).__init__(name=name)
self._layer_names = set()
self._layers = []
self._param_to_shape = {}
# seed default
self._seed = None
self._sequence_seed = True
# optimizer bookkeeping
self.param_to_optim = {}
self.param_to_reg = {}
self._default_optimizer = None
self._loss = None
self._prediction = []
self._output_schema = None
self._post_grad_net_modifiers = []
self._final_net_modifiers = []
# breakdown map; breakdown features are categorical (like dense) but not
# necessarily used to represent data for training
self._breakdown_map = None
# Connect Schema to self.net. That particular instance of schmea will be
# use for generation of the Layers across the network and would be used
# for connection with Readers.
self._input_feature_schema = schema.NewRecord(
self.net,
input_feature_schema
) if not keep_blobs else input_feature_schema.clone()
self._trainer_extra_schema = schema.NewRecord(
self.net,
trainer_extra_schema
) if not keep_blobs else trainer_extra_schema.clone()
self._metrics_schema = schema.Struct()
self._preproc_output_schema = None
self._init_global_constants()
self.param_init_net = self.create_init_net('param_init_net')
self._initialize_params = True
self._transfer_learning_blob_name_mappings = None
# additional (hard-coded) diagnose_options to report based on the model
# TODO(xlwang): it's hack!
self.ad_hoc_diagnose_blobs_and_operations = []
self.ad_hoc_plot_blobs = []
self.use_attribution = use_attribution
def clear_output_schema(self):
self._output_schema = None
def set_initialize_params(self, initialize_params):
self._initialize_params = initialize_params
def add_metric_field(self, name, value):
assert name not in self._metrics_schema.fields, (
"Try to add metric field twice: {}".format(name))
self._metrics_schema = self._metrics_schema + schema.Struct(
(name, value)
)
# an empty white_set will skip everything
def filter_metrics_schema(self, white_set):
logger.info("Filter metric schema with white_set {}".format(white_set))
field_names = self._metrics_schema.field_names()
for name in field_names:
if name not in white_set:
self._metrics_schema = self._metrics_schema - schema.Struct((name, schema.Scalar()))
def add_ad_hoc_plot_blob(self, blob, dtype=None):
assert isinstance(
blob, (str, core.BlobReference)
), "expect type str or BlobReference, but got {}".format(type(blob))
dtype = dtype or (np.float, (1, ))
self.add_metric_field(str(blob), schema.Scalar(dtype, blob))
self.ad_hoc_plot_blobs.append(blob)
@staticmethod
def _get_global_constant_initializer_op(
blob_name, array=None, dtype=None, initializer=None
):
# to add a global constant to model, one first need to get the
# initializer
if array is not None:
assert initializer is None,\
"Only one from array and initializer should be specified"
if dtype is None:
array = np.array(array)
else:
array = np.array(array, dtype=dtype)
# TODO: make GivenTensor generic
op_name = None
if array.dtype == np.int32:
op_name = 'GivenTensorIntFill'
elif array.dtype == np.int64:
op_name = 'GivenTensorInt64Fill'
elif array.dtype == np.str:
op_name = 'GivenTensorStringFill'
elif array.dtype == np.bool:
op_name = 'GivenTensorBoolFill'
else:
op_name = 'GivenTensorFill'
def initializer(blob_name):
return core.CreateOperator(
op_name, [],
blob_name,
shape=array.shape,
values=array.flatten().tolist()
)
else:
assert initializer is not None
initializer_op = initializer(blob_name)
return initializer_op
def add_global_constant(
self, name, array=None, dtype=None, initializer=None
):
assert isinstance(name, str), (
'name should be a string as we are using it as map key')
# This is global namescope for constants. They will be created in all
# init_nets and there should be very few of them.
assert name not in self.global_constants, \
"%s already added in global_constants" % name
blob_name = self.net.NextBlob(name)
self.global_constants[name] = blob_name
initializer_op = LayerModelHelper._get_global_constant_initializer_op(
blob_name, array, dtype, initializer
)
assert blob_name not in self.global_constant_initializers, \
"there is already a initializer op associated with blob %s" % \
blob_name
self.global_constant_initializers[blob_name] = initializer_op
return blob_name
def maybe_add_global_constant(self, name, *args, **kwargs):
# To ad hoc add new global constants without duplication
# if the name was already registered in global_constants, it will not be
# added even if the intended value is different from its original value
if name in self.global_constants:
blob_name = self.global_constants[name]
initializer_op = \
LayerModelHelper._get_global_constant_initializer_op(
blob_name, *args, **kwargs
)
# check if the original initializer is the same as the one intended
# now
assert utils.OpAlmostEqual(
initializer_op,
self.global_constant_initializers[blob_name],
'debug_info'
), \
"conflict initializers for global constant %s, " \
"previous %s, now %s" % (
blob_name, str(initializer_op),
str(self.global_constant_initializers[blob_name]))
return blob_name
return self.add_global_constant(name, *args, **kwargs)
def _init_global_constants(self):
self.global_constants = {}
self.global_constant_initializers = {}
self.add_global_constant('ONE', 1.0)
self.add_global_constant('NAN', float("NaN"))
self.add_global_constant('ZERO', 0.0)
self.add_global_constant('ZERO_RANGE', [0, 0], dtype='int32')
def _add_global_constants(self, init_net):
for initializer_op in viewvalues(self.global_constant_initializers):
init_net._net.op.extend([initializer_op])
def create_init_net(self, name):
init_net = core.Net(name)
self._add_global_constants(init_net)
return init_net
def _validate_param_shape(self, param_name, shape):
if param_name not in self._param_to_shape:
return
ref_shape = self._param_to_shape[param_name]
if shape != ref_shape:
raise ValueError(
"Got inconsistent shapes between shared parameters "
"when trying to map a blob in scope {0} to {1}. ref_shape : "
" {2}, shape : {3}".format(
scope.CurrentNameScope(), param_name, ref_shape, shape)
)
def _validate_param_optim(self, param_name, optim):
# there are three possible values for optim:
# 1) None (which will use self._default_optimizer after this layer is instantiated)
# 2) self.NoOptim
# 3) an instance of Optimizer class such as AdagradOptimizer
# this implies this parameter is not shared with any other parameter so far
if param_name not in self.param_to_optim:
return
logger.info("{} shares the same parameter with another parameter. "
"Validating if the same optimizer has been specified for them.".format(
param_name,
))
ref_optim = self.param_to_optim[param_name]
if optim is None:
assert ref_optim == self._default_optimizer, (
"Optim for {} is None which will fall back to use default_optimizer. "
"However, the optimizer that has been specified for this shared parameter "
"is {} which is different from default_optimizer {}. "
"Please check the optimizers specified for parameters shared "
"with {} and the default_optimizer to ensure the consistency.".format(
param_name, ref_optim, self._default_optimizer, param_name
)
)
elif optim == self.NoOptim:
assert ref_optim == self.NoOptim, (
"Optim for {} is NoOptim. However, the optimizer for the parameters "
"shared with {} is {} which is different from NoOptim. "
"Please check the optimizer specified for other parameters in the "
"shared group to ensure consistency.".format(
param_name, param_name, ref_optim
)
)
elif isinstance(optim, Optimizer):
assert isinstance(ref_optim, Optimizer), (
"Optim for {} is an instance of Optimizer. However, the optimizer "
"for the parameters shared with {} is {} which is not an instance "
"of Optimizer. Please check the optimizer specified for other "
" parameters in the shared group to ensure consistency.".format(
param_name, param_name, ref_optim, optim
)
)
assert type(optim) is type(ref_optim) and optim.attributes == ref_optim.attributes, (
"Optim for {} is an instance of Optimizer. However, the optimizer "
"for the parameters shared with {} is {}. "
"This optimizer either doesn't have the same type as the current optimizer: "
"{} vs {}, or its attributes such as learning rate are different from "
"that of current optimizer which is {} vs {}. "
"Please check the optimizer specified for other parameters in the "
"shared group to ensure consistency.".format(
param_name, param_name, ref_optim, type(optim), type(ref_optim), optim.attributes, ref_optim.attributes
)
)
else:
raise ValueError("optim should be either None, NoOptim, or an instance of Optimizer, Got {} ".format(optim))
def create_param(self, param_name, shape, initializer, optimizer=None,
ps_param=None, regularizer=None):
if isinstance(param_name, core.BlobReference):
param_name = str(param_name)
elif isinstance(param_name, str):
# Parameter name will be equal to current Namescope that got
# resolved with the respect of parameter sharing of the scopes.
param_name = parameter_sharing_context.get_parameter_name(
param_name)
else:
raise ValueError("Unsupported type for param_name")
param_blob = core.BlobReference(param_name)
if len(initializer) == 1:
init_op_args = {}
else:
assert len(initializer) == 2
init_op_args = copy.deepcopy(initializer[1])
if shape is not None:
assert 'shape' not in init_op_args
init_op_args.update({'shape': shape})
initializer_op = None
if self._initialize_params:
initializer_op = core.CreateOperator(
initializer[0],
[],
param_blob,
**init_op_args
)
param = layers.LayerParameter(
parameter=param_blob,
initializer=initializer_op,
optimizer=optimizer,
ps_param=ps_param,
regularizer=regularizer
)
Loading ...