## @package train
# Module caffe2.python.helpers.train
from caffe2.python import core, scope
from caffe2.proto import caffe2_pb2
def _get_weights(model, namescope=None):
if namescope is None:
namescope = scope.CurrentNameScope()
if namescope == '':
return model.weights[:]
else:
return [w for w in model.weights if w.GetNameScope() == namescope]
def iter(model, blob_out, **kwargs):
if 'device_option' in kwargs:
del kwargs['device_option']
model.param_init_net.ConstantFill(
[],
blob_out,
shape=[1],
value=0,
dtype=core.DataType.INT64,
device_option=core.DeviceOption(caffe2_pb2.CPU, 0),
**kwargs
)
return model.net.Iter(blob_out, blob_out, **kwargs)
def accuracy(model, blob_in, blob_out, **kwargs):
dev = kwargs['device_option'] if 'device_option' in kwargs \
else scope.CurrentDeviceScope()
is_cpu = dev is None or dev.device_type == caffe2_pb2.CPU
# We support top_k > 1 only on CPU
if not is_cpu and 'top_k' in kwargs and kwargs['top_k'] > 1:
pred_host = model.net.CopyGPUToCPU(blob_in[0], blob_in[0] + "_host")
label_host = model.net.CopyGPUToCPU(blob_in[1], blob_in[1] + "_host")
# Now use the Host version of the accuracy op
model.net.Accuracy(
[pred_host, label_host],
blob_out,
device_option=core.DeviceOption(caffe2_pb2.CPU, 0),
**kwargs
)
else:
model.net.Accuracy(blob_in, blob_out)
def add_weight_decay(model, weight_decay):
"""Adds a decay to weights in the model.
This is a form of L2 regularization.
Args:
weight_decay: strength of the regularization
"""
if weight_decay <= 0.0:
return
wd = model.param_init_net.ConstantFill(
[], 'wd', shape=[1], value=weight_decay
)
ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
for param in _get_weights(model):
# Equivalent to: grad += wd * param
grad = model.param_to_grad[param]
model.net.WeightedSum(
[grad, ONE, param, wd],
grad,
)