import copy
import logging
import os
import re
from tensorboard.compat.proto.graph_pb2 import GraphDef
from tensorboard.compat.proto.node_def_pb2 import NodeDef
from tensorboard.compat.proto.tensor_shape_pb2 import TensorShapeProto
from builtins import bytes
from caffe2.proto import caffe2_pb2
from caffe2.python import core, workspace
from typing import Set, Dict, Tuple, List
def _make_unique_name(seen: Set[str], name: str, min_version: int = 0):
'''
Make the name unique by appending a unique number to the name. Used for SSA.
Args:
seen (set): Set of names that have already been used (with respect to
some context).
name (string): The name to make unique
min_version (number): Starting index. Is incremented continually until
it can make the resulting name unique relative to 'seen'.
Returns:
x (string): A version of name that is not in seen.
'''
assert name is not None
i = min_version
x = '%s_%d' % (name, i) if i else name
while x in seen:
i += 1
x = '%s_%d' % (name, i)
seen.add(x)
return x
def _rename_tensorflow_style(shapes, blob_name_tracker, ops):
'''
Convert some of the common names in Caffe2 to tensorflow.
NOTE: The common names in both Caffe2 and Tensorflow are currently
hardcoded, if either side changes at some point, then this code should
change as well.
Args:
shapes: Dictionary mapping blob names to their shapes/dimensions.
blob_name_tracker: Dictionary of all unique blob names (with respect to
some context).
ops: List of Caffe2 operators
Returns:
None. The _rename_all() call modifies blob_name_tracker and ops in-place.
'''
WEIGHT = re.compile(r"(_w)$")
WEIGHT_ = re.compile(r"(_w_)")
BN = re.compile(r"(_bn)$")
BN_ = re.compile(r"(_bn_)")
BIAS = re.compile(r"(_b)$")
BIAS_ = re.compile(r"(_b_)")
SCALE = re.compile(r"(_s)$")
SCALE_ = re.compile(r"(_s_)")
SUM = re.compile(r"(_sum)$")
SUM_ = re.compile(r"(_sum_)")
BRANCH = re.compile(r"(_branch)")
def f(name):
inter_name = WEIGHT_.sub('/weight_', WEIGHT.sub('/weight', name))
inter_name = BN_.sub('/batchnorm_', BN.sub('/batchnorm', inter_name))
inter_name = BIAS_.sub('/bias_', BIAS.sub('/bias', inter_name))
inter_name = SCALE_.sub('/scale_', SCALE.sub('/scale', inter_name))
inter_name = SUM_.sub('/sum_', SUM.sub('/sum', inter_name))
new_name = BRANCH.sub('/branch', inter_name)
return new_name
_rename_all(shapes, blob_name_tracker, ops, f)
def _convert_to_ssa(shapes, blob_name_tracker, ops):
'''
Convert an operator graph to SSA (i.e. out-of-place).
i.e. blobs will be renamed so that each blob is produced only once.
Args:
shapes: Dictionary mapping blob names to their shapes/dimensions.
blob_name_tracker: Dictionary of all unique blob names (with respect to
some context).
ops: List of Caffe2 operators
Returns:
None. Modifies blob_name_tracker and ops in-place.
'''
ir = core.IR(ops)
seen: Set[str] = set()
versioned: Dict[Tuple[str, int], int] = {}
new_shapes = {}
new_blob_name_tracker = {}
def ssa_name(name: str, versions: Dict[str, int]) -> int:
assert name in versions
version = versions[name]
if (name, version) in versioned:
return versioned[(name, version)]
# Always setting name2 = `{name}_{version}` would work, but we also try
# to avoid a trailing `_0`, so we have to be careful not to introduce
# name collisions, such as (foo_1, 0) = foo_1 = (foo, 1).
# Note: operator names (if any) will be handled later.
new_name = _make_unique_name(seen, name, min_version=version)
versioned[(name, version)] = new_name
# Transfer shape.
if name in shapes:
new_shapes[new_name] = shapes[name]
if blob_name_tracker and name in blob_name_tracker:
new_blob_name_tracker[new_name] = blob_name_tracker[name]
return new_name
for (op, ssa) in zip(ops, ir.ssa):
assert op is ssa.op
inputs = list(op.input)
outputs = list(op.output)
del op.input[:]
del op.output[:]
op.input.extend(ssa_name(name, ssa.in_versions) for name in inputs)
op.output.extend(ssa_name(name, ssa.out_versions) for name in outputs)
shapes.clear()
shapes.update(new_shapes)
if blob_name_tracker:
blob_name_tracker.clear()
blob_name_tracker.update(new_blob_name_tracker)
def _get_blob_names(ops):
'''
Get all the operator input and output blobs and perform dedup on their names.
Args:
ops: List of Caffe2 operators to extract inputs and outputs from
Returns:
set containing distinct inputs and outputs from 'ops'
'''
names = set()
for op in ops:
names.update(op.input)
names.update(op.output)
return {name: name for name in names}
def _remap_keys(old_dict, rename_fn):
'''
Rename keys of 'old_dict' according to 'rename_fn'.
Args:
old_dict: Dictionary (i.e. containing blob_name -> blob_name
relationships.)
remap_fn: Function string -> string for renaming.
Returns:
None. Modifies old_dict in-place.
'''
new_dict = {rename_fn(key): value for key,
value in old_dict.items()}
old_dict.clear()
old_dict.update(new_dict)
def _rename_all(shapes, blob_name_tracker, ops, rename_fn):
'''
Rename all the names in the operators.
Args:
shapes: Dictionary mapping blob names to their shapes/dimensions.
blob_name_tracker: Dictionary of all unique blob names (with respect to
some context).
ops: List of Caffe2 operators
rename_fn: Function string -> string that specifies how to rename
Returns:
None. Modifies shapes, blob_name_tracker and ops in-place using the
specified 'rename_fn'.
'''
seen: Set[str] = set()
renamed: Dict[Tuple[str, int], int] = {}
def g(name):
""" Collision-free version of f.
"""
if name is None:
return None
if name in renamed:
return renamed[name]
new_name = _make_unique_name(seen, rename_fn(name))
renamed[name] = new_name
return new_name
for op in ops:
inputs = list(op.input)
outputs = list(op.output)
del op.input[:]
del op.output[:]
op.input.extend(g(name) for name in inputs)
op.output.extend(g(name) for name in outputs)
_remap_keys(shapes, g)
if blob_name_tracker:
_remap_keys(blob_name_tracker, g)
# Rename all operator names (if any) independently so that the
# unique-fication happens only once in _fill_missing_operator_names().
seen.clear()
renamed.clear()
for op in ops:
op.name = g(op.name)
def _add_gradient_scope(shapes, blob_name_tracker, ops):
"""
For all operators or blobs with name containing "_grad", add a
"GRADIENTS/" scope.
Note: breaks graph execution since the blob -> gradient mapping is
hardcoded.
Args:
shapes: Dictionary mapping blob names to their shapes/dimensions.
blob_name_tracker: Dictionary of all unique blob names (with respect to
some context).
ops: List of Caffe2 operators
Returns:
None. Modifies shapes, blob_name_tracker and ops in-place by renaming.
"""
def f(name):
if '_grad' in name:
return 'GRADIENTS/{}'.format(name)
else:
return name
_rename_all(shapes, blob_name_tracker, ops, f)
def _replace_colons(shapes, blob_name_tracker, ops, repl):
'''
`:i` has a special meaning in Tensorflow. This function replaces all colons
with $ to avoid any possible conflicts.
Args:
shapes: Dictionary mapping blob names to their shapes/dimensions.
blob_name_tracker: Dictionary of all unique blob names (with respect to
some context).
ops: List of Caffe2 operators
repl: String representing the text to replace ':' with. Usually this is
'$'.
Returns:
None. Modifies blob_name_tracker in-place.
'''
def f(name):
return name.replace(':', repl)
_rename_all(shapes, blob_name_tracker, ops, f)
def _fill_missing_operator_names(ops):
'''
Give missing operators a name.
We expect C2 operators to be generally unnamed. This gives them a scope
(inferred from their outputs) and a name after their type. Duplicates will
be postfixed by an index.
Args:
ops: List of Caffe2 operators to assign names to.
Returns:
None: Modifies 'ops' in-place.
'''
seen = set()
for op in ops:
# Make sure operator names don't collide with blobs.
seen.update(op.input)
seen.update(op.output)
for op in ops:
if op.name:
name = op.name
elif op.output or op.input:
name_list = [os.path.dirname(name)
for name in op.output or op.input]
scope = os.path.commonprefix(name_list)
name = os.path.join(scope, op.type)
else:
name = op.type
assert(name)
op.name = _make_unique_name(seen, name)
def _tf_device(device_option):
'''
Handle the devices.
Args:
device_option (caffe2_pb2.DeviceOption): DeviceOption protobuf,
associated to an operator, that contains information such as
device_type (optional), cuda_gpu_id (optional), node_name (optional,
tells which node the operator should execute on). See caffe2.proto
in caffe2/proto for the full list.
Returns:
Formatted string representing device information contained in
device_option.
'''
if not device_option.HasField("device_type"):
return ""
if device_option.device_type == caffe2_pb2.CPU or device_option.device_type == caffe2_pb2.MKLDNN:
return "/cpu:*"
if device_option.device_type == caffe2_pb2.CUDA:
return "/gpu:{}".format(device_option.device_id)
raise Exception("Unhandled device", device_option)
def _add_tf_shape(attr_dict, ints):
'''
Converts a list of ints to a TensorShapeProto representing the dimensions of
a blob/object.
Args:
attr_dict: Dictionary to update (usually attributes of a Node)
ints: List of integers representing dimensions of some object.
Returns:
None. Modifies attr_dict in-place.
'''
shape_proto = TensorShapeProto()
for i in ints:
dim = TensorShapeProto.Dim()
dim.size = i
shape_proto.dim.extend([dim])
attr_dict['_output_shapes'].list.shape.extend([shape_proto])
def _set_tf_attr(attr_dict, arg):
'''
Add attributes to a node. Key is the arg.name, and values can be shape,
floats, strings, ints or an empty list.
Args:
attr_dict: Dictionary to update (usually attributes of a Node)
Loading ...