## @package control_ops_grad
# Module caffe2.python.control_ops_grad
from caffe2.proto import caffe2_pb2
def gen_do_gradient(op, g_output):
"""
Generates gradient Do operator, given forward Do op and a list
of gradient blobs corresponding to forward op's outputs
Returns a gradient op and a list of blobs corresponding to input gradients
"""
from caffe2.python.core import BlobReference
subnet, outer_to_inner_map, inner_to_outer_map, workspace_blob_name = \
_do_op_sanity_check_and_process(op)
assert len(g_output) == len(op.output), \
"Different number of gradient blobs and Do op outputs"
grad_ops, deduped_g_output = dedupe_g_output(op, g_output)
g_output = deduped_g_output
# From the outer net point of view:
# Do is an operator that has some number of inputs and outputs;
# we have to generate a gradient operator that writes into
# corresponding input gradient blobs and has access to inputs, outputs
# and gradient output blobs
# From the inner net point of view:
# Do is an operator with a subnet and blob bindings,
# we need to forward Do's output blob gradients into inner workspace,
# use them to run backward pass generation and forward Do's input blob
# gradients back into outer workspace
op_output = [str(o) for o in op.output]
op_output = op_output[:-1] # remove workspace pointer blob
op_input = [str(i) for i in op.input]
op_input = op_input[:-1] # remove workspace pointer blob
ordered_inner_output_blob_names = [outer_to_inner_map[o] for o in op_output]
backward_pass_initial_grad_map = {}
initial_grad_map = {}
for inner_output_name, outer_grad_output_name in \
zip(ordered_inner_output_blob_names, g_output):
# link inner_output_name to corresponding inner_grad_output_name for
# backward pass generation;
if outer_grad_output_name:
inner_grad_output_name = inner_output_name + "/_DO_OPERATOR_INNER_GRAD_"
backward_pass_initial_grad_map[BlobReference(inner_output_name)] = \
BlobReference(inner_grad_output_name)
initial_grad_map[inner_grad_output_name] = str(outer_grad_output_name)
assert len(initial_grad_map) > 0, "Empty initial gradient map for Do op"
inner_grad_ops, inner_grad_names_map = _gen_subgradient_pass(
subnet, backward_pass_initial_grad_map)
if len(inner_grad_ops) == 0:
return [], []
grad_copy_ops = []
g_input = []
new_op_outputs = []
new_blob_bindings = {}
for outer_input_name in op_input:
inner_input_name = outer_to_inner_map[outer_input_name]
if inner_input_name in inner_grad_names_map:
inner_grad_input_name = inner_grad_names_map[inner_input_name]
outer_grad_input_name = outer_input_name + "_grad"
# It is possible that inner_grad_input_name will need to be
# linked to another outer blob. For example:
#
# // y - param initialized in init_net
# x = ...
# z = ...
# with ops.IfNet(...):
# ops.Add([z, x], y) # inner Do block
# loss = f(..., y, ...)
#
# In this case x, y and z are external for the inner Do block,
# the inputs of the Do block are z and x and the output is y.
# When computing the gradient of input x given the gradient
# of output y it's easy to see that they are equal.
# During the generation of gradient Do operator, we link
# external gradient y (y_grad) to the internal name
# (y/_DO_OPERATOR_INNER_GRAD_) and generate the backward pass
# for the internal Do net. As a result we get gradient operators
# for the gradient Do and gradient map that maps internal Do
# blobs to their computed gradients.
# In this example, gradient map may have blob x linked to
# gradient blob y/_DO_OPERATOR_INNER_GRAD_.
# We should export gradient for x outside of Do, so
# we add a blob mapping from inner gradient blob
# (y/_DO_OPERATOR_INNER_GRAD_) to a new outer name (x_grad).
#
# (Note: since we use transparent blob mapping between outer and
# inner (Do's) workspace, these operations do not involve copying
# but are merely using blobs in outer workspace in the Do's operator
# workspace under (possibly) different names)
#
# At the same time, we need to add a blob mapping from inner name
# y/_DO_OPERATOR_INNER_GRAD_ to the outer blob y_grad
# Hence in this case, we cannot use existing blob mapping scheme
# that requires a bijection between subset of inner blob names and
# a set of all (Do's input and output) outer blob names
# TODO(iliacher): Remove unnecessary blob copying
new_inner_grad_input_name = \
inner_input_name + "/_DO_OPERATOR_INNER_GRAD_COPY_"
grad_copy_ops.append(_prepare_blob_copy_op(
inner_grad_input_name, new_inner_grad_input_name))
new_blob_bindings[new_inner_grad_input_name] = outer_grad_input_name
new_op_outputs.append(outer_grad_input_name)
g_input.append(outer_grad_input_name)
else:
g_input.append(None)
new_op_inputs = []
overwritten_names = set()
saved_local_blob_names = set()
for grad_op in inner_grad_ops:
grad_op_input = [str(i) for i in grad_op.input]
grad_op_output = [str(o) for o in grad_op.output]
for grad_op_input_name in grad_op_input:
if grad_op_input_name in overwritten_names:
continue
# check if this is an external blob
outer_name = inner_to_outer_map.get(grad_op_input_name, None)
if not outer_name:
# check if this is an external gradient blob
outer_name = initial_grad_map.get(grad_op_input_name, None)
if outer_name:
outer_name = str(outer_name)
if outer_name not in new_op_inputs:
new_op_inputs.append(outer_name)
new_blob_bindings[grad_op_input_name] = outer_name
else:
# this is a local blob, we'll get it's value from
# a saved forward op workspace
saved_local_blob_names.add(grad_op_input_name)
overwritten_names.update(grad_op_output)
# add inner gradient copy ops
inner_grad_ops += grad_copy_ops
gradient_do_def = _prepare_gradient_do_op(
fwd_op=op,
fwd_net=subnet,
grad_ops=inner_grad_ops,
inputs=new_op_inputs,
outputs=new_op_outputs,
blob_bindings=new_blob_bindings,
saved_fwd_blobs=saved_local_blob_names,
workspace_blob_name=workspace_blob_name)
grad_ops.append(gradient_do_def)
_do_op_sanity_check_and_process(gradient_do_def)
return grad_ops, g_input
def dedupe_g_output(op, g_output):
# When generation a gradient op it's possible to receive the same gradient
# blob corresponding to different forward op output blobs, Do operator
# requires a bijection between inner and outer names, make sure we do
# deduplication
grad_ops = []
deduped_g_output = []
init_grad_map = {}
for output_name, grad_name in zip(op.output, g_output):
if not grad_name:
deduped_g_output.append(grad_name)
continue
if output_name in init_grad_map:
deduped_g_output.append(init_grad_map[output_name])
else:
if grad_name not in init_grad_map.values():
init_grad_map[output_name] = grad_name
deduped_g_output.append(grad_name)
else:
deduped_grad_name = output_name + "_" + grad_name + "_DEDUP"
assert deduped_grad_name not in init_grad_map.values()
grad_copy_op = caffe2_pb2.OperatorDef()
grad_copy_op.type = "Copy"
grad_copy_op.input.extend([grad_name])
grad_copy_op.output.extend([deduped_grad_name])
grad_ops.append(grad_copy_op)
deduped_g_output.append(deduped_grad_name)
init_grad_map[output_name] = deduped_grad_name
return grad_ops, deduped_g_output
def gen_while_gradient(op, g_output):
"""
Generates gradient While operator
"""
from caffe2.python.core import BlobReference
assert op.type == "While", "Expected While op"
assert len(op.input) > 0, "Expected at least one input in While op"
assert len(op.output) == len(g_output), \
"Different number of gradient blobs and While op outputs"
grad_ops, deduped_g_output = dedupe_g_output(op, g_output)
g_output = deduped_g_output
init_grad_map = {}
op_output = [str(o) for o in op.output]
for output_name, grad_output_name in zip(op_output, g_output):
if grad_output_name:
init_grad_map[BlobReference(output_name)] = \
BlobReference(grad_output_name)
assert len(init_grad_map) > 0, "Empty initial gradient map for While op"
loop_net = _get_net_argument(op, "loop_net")
assert loop_net, "Expected loop subnet in While op"
assert len(loop_net.op) == 1 and loop_net.op[0].type == "Do", \
"Gradient While op requires single Do op as a loop body"
do_op = loop_net.op[0]
do_args = _get_do_arguments(do_op)
assert "reuse_workspace" not in do_args or not do_args["reuse_workspace"], \
"Gradient While op requires Do loop body op without reuse_workspace set"
assert len(do_op.output) > 0, "Expected Do op with at least one output"
workspace_blob = do_op.output[-1]
loop_grad_net, loop_grad_map, loop_input_names, loop_output_names = \
_gen_subnet_gradient(loop_net, init_grad_map)
assert loop_grad_net, "Failed to get gradient net for loop body in While op"
grad_ops += _prepare_gradient_while_ops(
fwd_op=op,
input_names=loop_input_names,
output_names=loop_output_names,
loop_grad_net=loop_grad_net,
workspace_blob=workspace_blob,
init_grad_map=init_grad_map,
loop_grad_map=loop_grad_map)
op_input = [str(i) for i in op.input]
g_input = [loop_grad_map.get(i, None) for i in op_input]
return grad_ops, g_input
# Constructs gradient While op, arguments:
# fwd_op - forward While op
# input_names - input blob names for a gradient op
# output_names - output blob names for a gradient op
# loop_grad_net - gradient loop body net
# workspace_blob - blob that holds forward workspaces stack
# init_grad_map - initial gradient to forward blob map
# loop_grad_map - gradient blob map for loop's body
def _prepare_gradient_while_ops(
fwd_op, input_names, output_names, loop_grad_net, workspace_blob,
init_grad_map, loop_grad_map):
gradient_while_def = caffe2_pb2.OperatorDef()
gradient_while_def.CopyFrom(fwd_op)
if gradient_while_def.name:
gradient_while_def.name += "_grad"
loop_net_arg = caffe2_pb2.Argument()
loop_net_arg.name = "loop_net"
loop_net_arg.n.CopyFrom(loop_grad_net)
cond_net_arg = caffe2_pb2.Argument()
cond_net_arg.name = "cond_net"
from caffe2.python.core import Net, BlobReference
# Construct condition net - check that there're still forward workspaces
# left using HasScope op
cond_net = Net('gradient_loop_cond_net')
cond_init_net = Net('gradient_loop_cond_net_init')
cond_blob = cond_net.NextScopedBlob(cond_net.Name() + '/cond')
cond_init_net.HasScope(workspace_blob, cond_blob)
cond_net.HasScope(workspace_blob, cond_blob)
for blob, init_grad_blob in init_grad_map.items():
blob_name = str(blob)
init_grad_blob_name = str(init_grad_blob)
if blob_name in loop_grad_map and \
loop_grad_map[blob_name] != init_grad_blob_name:
cond_net.Copy(
BlobReference(loop_grad_map[blob_name]), init_grad_blob)
cond_init_net.Copy(
init_grad_blob, BlobReference(loop_grad_map[blob_name]))
cond_net_arg.n.CopyFrom(cond_net.Proto())
del gradient_while_def.arg[:]
gradient_while_def.arg.extend([loop_net_arg, cond_net_arg])
del gradient_while_def.control_input[:]
del gradient_while_def.input[:]
gradient_while_def.input.extend(
[str(cond_blob).encode('utf-8')] + list(input_names))
del gradient_while_def.output[:]
gradient_while_def.output.extend(output_names)
gradient_while_def.is_gradient_op = True
return [o for o in cond_init_net.Proto().op] + [gradient_while_def]
def _get_do_arguments(do_op):
assert do_op.type == "Do", "Expected Do op"
args = {}
for arg in do_op.arg:
if not arg.name:
continue
if arg.name == "net":
assert arg.n, "Expected non empty net argument"
args["net"] = arg.n
elif arg.name == "reuse_workspace":
assert arg.i, "Expected non empty reuse_workspace argument"
args["reuse_workspace"] = bool(arg.i)
elif arg.name == "inner_blobs":
assert arg.strings, "Expected non empty inner_blobs argument"
args["inner_blobs"] = arg.strings
elif arg.name == "outer_blobs_idx":
assert arg.ints, "Expected non empty outer_blobs_idx argument"
args["outer_blobs_idx"] = arg.ints
return args
def gen_if_gradient(op, g_output):
"""
Generates gradient If operator, given forward If op and a list
of gradient blobs corresponding to forward op's outputs
Returns a gradient op and a list of blobs corresponding to input gradients
"""
from caffe2.python.core import BlobReference
assert op.type == "If", "Expected If op"
# first input is the condition blob
assert len(op.input) > 0, "Expected at least one input in If op"
assert len(op.output) == len(g_output), \
"Different number of gradient blobs and If op outputs"
grad_ops, deduped_g_output = dedupe_g_output(op, g_output)
g_output = deduped_g_output
init_grad_map = {} # map from if's output blob to output gradient blob
Loading ...