## @package fc_with_bootstrap
# Module caffe2.python.layers.fc_with_bootstrap
import math
import numpy as np
from caffe2.python import core, schema
from caffe2.python.helpers.arg_scope import get_current_scope
from caffe2.python.layers.layers import ModelLayer
from caffe2.python.layers.sampling_trainable_mixin import SamplingTrainableMixin
def get_fc_predictor_version(fc_version):
assert fc_version in ["fp32"], (
"Only support fp32 for the fully connected layer "
"in the predictor net, the provided FC precision is {}".format(fc_version)
)
return fc_version
class FCWithBootstrap(SamplingTrainableMixin, ModelLayer):
def __init__(
self,
model,
input_record,
output_dims,
num_bootstrap,
weight_init=None,
bias_init=None,
weight_optim=None,
bias_optim=None,
name="fc_with_bootstrap",
weight_reg=None,
bias_reg=None,
clip_param=None,
axis=1,
**kwargs
):
super(FCWithBootstrap, self).__init__(model, name, input_record, **kwargs)
assert isinstance(
input_record, schema.Scalar
), "Incorrect input type {}".format(input_record)
assert (
len(input_record.field_types()[0].shape) > 0
), "FC expects limited dimensions of the input tensor"
assert axis >= 1, "axis {} should >= 1.".format(axis)
self.axis = axis
input_dims = np.prod(input_record.field_types()[0].shape[axis - 1 :])
assert input_dims > 0, "FC expects input dimensions > 0, got {}".format(
input_dims
)
self.clip_args = None
# attributes for bootstrapping below
self.num_bootstrap = num_bootstrap
# input dim shape
self.input_dims = input_dims
# bootstrapped fully-connected layers to be used in eval time
self.bootstrapped_FCs = []
# scalar containing batch_size blob so that we don't need to recompute
self.batch_size = None
# we want this to be the last FC, so the output_dim should be 1, set to None
self.output_dim_vec = None
# lower bound when creating random indices
self.lower_bound = None
# upper bound when creating random indices
self.upper_bound = None
if clip_param is not None:
assert len(clip_param) == 2, (
"clip_param must be a tuple / list "
"of length 2 and in the form of (clip_min, clip max)"
)
clip_min, clip_max = clip_param
assert (
clip_min is not None or clip_max is not None
), "clip_min, and clip_max in clip_param cannot both be None"
assert (
clip_min is None or clip_max is None
) or clip_min < clip_max, (
"clip_param = [clip_min, clip_max] must have clip_min < clip_max"
)
self.clip_args = {}
if clip_min is not None:
self.clip_args["min"] = clip_min
if clip_max is not None:
self.clip_args["max"] = clip_max
scale = math.sqrt(1.0 / input_dims)
weight_init = (
weight_init
if weight_init
else ("UniformFill", {"min": -scale, "max": scale})
)
bias_init = (
bias_init if bias_init else ("UniformFill", {"min": -scale, "max": scale})
)
"""
bootstrapped FCs:
Ex: [
bootstrapped_weights_blob_1, bootstrapped_bias_blob_1,
...,
...,
bootstrapped_weights_blob_b, bootstrapped_bias_blob_b
]
output_schema:
Note: indices will always be on even indices.
Ex: Struct(
indices_0_blob,
preds_0_blob,
...
...
indices_b_blob,
preds_b_blob
)
"""
bootstrapped_FCs = []
output_schema = schema.Struct()
for i in range(num_bootstrap):
output_schema += schema.Struct(
(
"bootstrap_iteration_{}/indices".format(i),
self.get_next_blob_reference(
"bootstrap_iteration_{}/indices".format(i)
),
),
(
"bootstrap_iteration_{}/preds".format(i),
self.get_next_blob_reference(
"bootstrap_iteration_{}/preds".format(i)
),
),
)
self.bootstrapped_FCs.extend(
[
self.create_param(
param_name="bootstrap_iteration_{}/w".format(i),
shape=[output_dims, input_dims],
initializer=weight_init,
optimizer=weight_optim,
regularizer=weight_reg,
),
self.create_param(
param_name="bootstrap_iteration_{}/b".format(i),
shape=[output_dims],
initializer=bias_init,
optimizer=bias_optim,
regularizer=bias_reg,
),
]
)
self.output_schema = output_schema
if axis == 1:
output_shape = (output_dims,)
else:
output_shape = list(input_record.field_types()[0].shape)[0 : axis - 1]
output_shape = tuple(output_shape + [output_dims])
def _generate_bootstrapped_indices(self, net, copied_cur_layer, iteration):
"""
Args:
net: the caffe2 net to insert operator
copied_cur_layer: blob of the bootstrapped features (make sure this
blob has a stop_gradient on)
iteration: the bootstrap interation to generate for. Used to correctly
populate the output_schema
Return:
A blob containing the generated indices of shape: (batch_size,)
"""
with core.NameScope("bootstrap_iteration_{}".format(iteration)):
if iteration == 0:
# capture batch_size once for efficiency
input_shape = net.Shape(copied_cur_layer, "input_shape")
batch_size_index = net.Const(np.array([0]), "batch_size_index")
batch_size = net.Gather([input_shape, batch_size_index], "batch_size")
self.batch_size = batch_size
lower_bound = net.Const(np.array([0]), "lower_bound", dtype=np.int32)
offset = net.Const(np.array([1]), "offset", dtype=np.int32)
int_batch_size = net.Cast(
[self.batch_size], "int_batch_size", to=core.DataType.INT32
)
upper_bound = net.Sub([int_batch_size, offset], "upper_bound")
self.lower_bound = lower_bound
self.upper_bound = upper_bound
indices = net.UniformIntFill(
[self.batch_size, self.lower_bound, self.upper_bound],
self.output_schema[iteration * 2].field_blobs()[0],
input_as_shape=1,
)
return indices
def _bootstrap_ops(self, net, copied_cur_layer, indices, iteration):
"""
This method contains all the bootstrapping logic used to bootstrap
the features. Only used by the train_net.
Args:
net: the caffe2 net to insert bootstrapping operators
copied_cur_layer: the blob representing the current features.
Note, this layer should have a stop_gradient on it.
Returns:
bootstrapped_features: blob of bootstrapped version of cur_layer
with same dimensions
"""
# draw features based upon the bootstrapped indices
bootstrapped_features = net.Gather(
[copied_cur_layer, indices],
net.NextScopedBlob("bootstrapped_features_{}".format(iteration)),
)
bootstrapped_features = schema.Scalar(
(np.float32, self.input_dims), bootstrapped_features
)
return bootstrapped_features
def _insert_fc_ops(self, net, features, params, outputs, version):
"""
Args:
net: the caffe2 net to insert operator
features: Scalar containing blob of the bootstrapped features or
actual cur_layer features
params: weight and bias for FC
outputs: the output blobs
version: support fp32 for now.
"""
if version == "fp32":
pred_blob = net.FC(
features.field_blobs() + params, outputs, axis=self.axis, **self.kwargs
)
return pred_blob
else:
raise Exception("unsupported FC type version {}".format(version))
def _add_ops(self, net, features, iteration, params, version):
"""
Args:
params: the weight and bias, passed by either add_ops or
add_train_ops function
features: feature blobs to predict on. Can be the actual cur_layer
or the bootstrapped_feature blobs.
version: currently fp32 support only
"""
if self.clip_args is not None:
clipped_params = [net.NextScopedBlob("clipped_%s" % str(p)) for p in params]
for p, cp in zip(params, clipped_params):
net.Clip([p], [cp], **self.clip_args)
params = clipped_params
if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
self._insert_fc_ops(
net=net,
features=features,
params=params,
outputs=[self.output_schema.field_blobs()[(iteration * 2) + 1]],
version=version,
)
def add_ops(self, net):
"""
Both the predict net and the eval net will call this function.
For bootstrapping approach, the goal is to pass the cur_layer feature
inputs through all the bootstrapped FCs that are stored under
self.bootstrapped_FCs. Return the preds in the same output_schema
with dummy indices (because they are not needed).
"""
version_info = get_current_scope().get(
get_fc_predictor_version.__name__, {"fc_version": "fp32"}
)
predictor_fc_fp_version = version_info["fc_version"]
for i in range(self.num_bootstrap):
# these are dummy indices, not to be used anywhere
indices = self._generate_bootstrapped_indices(
net=net,
copied_cur_layer=self.input_record.field_blobs()[0],
iteration=i,
)
params = self.bootstrapped_FCs[i * 2 : (i * 2) + 2]
self._add_ops(
net=net,
features=self.input_record,
params=params,
iteration=i,
version=predictor_fc_fp_version,
)
def add_train_ops(self, net):
# use the train_param_blobs to be consistent with the SamplingTrain unittest
# obtain features
for i in range(self.num_bootstrap):
indices = self._generate_bootstrapped_indices(
net=net,
copied_cur_layer=self.input_record.field_blobs()[0],
iteration=i,
)
bootstrapped_features = self._bootstrap_ops(
net=net,
copied_cur_layer=self.input_record.field_blobs()[0],
indices=indices,
iteration=i,
)
self._add_ops(
net,
features=bootstrapped_features,
iteration=i,
params=self.train_param_blobs[i * 2 : (i * 2) + 2],
version="fp32",
)
Loading ...