Repository URL to install this package:
|
Version:
1.14.0 ▾
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow layers with added variables for parameter masking.
Branched from tensorflow/contrib/layers/python/layers/layers.py
"""
# pylint: disable=missing-docstring
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
from tensorflow.contrib.framework.python.ops import add_arg_scope
from tensorflow.contrib.framework.python.ops import variables
from tensorflow.contrib.layers.python.layers import initializers
from tensorflow.contrib.layers.python.layers import utils
from tensorflow.contrib.model_pruning.python.layers import core_layers as core
from tensorflow.python.framework import ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import variable_scope
from tensorflow.python.ops import variables as tf_variables
def _model_variable_getter(getter,
name,
shape=None,
dtype=None,
initializer=None,
regularizer=None,
trainable=True,
collections=None,
caching_device=None,
partitioner=None,
rename=None,
use_resource=None,
**_):
"""Getter that uses model_variable for compatibility with core layers."""
short_name = name.split('/')[-1]
if rename and short_name in rename:
name_components = name.split('/')
name_components[-1] = rename[short_name]
name = '/'.join(name_components)
return variables.model_variable(
name,
shape=shape,
dtype=dtype,
initializer=initializer,
regularizer=regularizer,
collections=collections,
trainable=trainable,
caching_device=caching_device,
partitioner=partitioner,
custom_getter=getter,
use_resource=use_resource)
def _build_variable_getter(rename=None):
"""Build a model variable getter that respects scope getter and renames."""
# VariableScope will nest the getters
def layer_variable_getter(getter, *args, **kwargs):
kwargs['rename'] = rename
return _model_variable_getter(getter, *args, **kwargs)
return layer_variable_getter
def _add_variable_to_collections(variable, collections_set, collections_name):
"""Adds variable (or all its parts) to all collections with that name."""
collections = utils.get_variable_collections(collections_set,
collections_name) or []
variables_list = [variable]
if isinstance(variable, tf_variables.PartitionedVariable):
variables_list = [v for v in variable]
for collection in collections:
for var in variables_list:
if var not in ops.get_collection(collection):
ops.add_to_collection(collection, var)
@add_arg_scope
def masked_convolution(inputs,
num_outputs,
kernel_size,
stride=1,
padding='SAME',
data_format=None,
rate=1,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer(),
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
"""Adds an 2D convolution followed by an optional batch_norm layer.
The layer creates a mask variable on top of the weight variable. The input to
the convolution operation is the elementwise multiplication of the mask
variable and the weigh
It is required that 1 <= N <= 3.
`convolution` creates a variable called `weights`, representing the
convolutional kernel, that is convolved (actually cross-correlated) with the
`inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is
provided (such as `batch_norm`), it is then applied. Otherwise, if
`normalizer_fn` is None and a `biases_initializer` is provided then a `biases`
variable would be created and added the activations. Finally, if
`activation_fn` is not `None`, it is applied to the activations as well.
Performs atrous convolution with input stride/dilation rate equal to `rate`
if a value > 1 for any dimension of `rate` is specified. In this case
`stride` values != 1 are not supported.
Args:
inputs: A Tensor of rank N+2 of shape
`[batch_size] + input_spatial_shape + [in_channels]` if data_format does
not start with "NC" (default), or
`[batch_size, in_channels] + input_spatial_shape` if data_format starts
with "NC".
num_outputs: Integer, the number of output filters.
kernel_size: A sequence of N positive integers specifying the spatial
dimensions of the filters. Can be a single integer to specify the same
value for all spatial dimensions.
stride: A sequence of N positive integers specifying the stride at which to
compute output. Can be a single integer to specify the same value for all
spatial dimensions. Specifying any `stride` value != 1 is incompatible
with specifying any `rate` value != 1.
padding: One of `"VALID"` or `"SAME"`.
data_format: A string or None. Specifies whether the channel dimension of
the `input` and output is the last dimension (default, or if `data_format`
does not start with "NC"), or the second dimension (if `data_format`
starts with "NC"). For N=1, the valid values are "NWC" (default) and
"NCW". For N=2, the valid values are "NHWC" (default) and "NCHW".
For N=3, the valid values are "NDHWC" (default) and "NCDHW".
rate: A sequence of N positive integers specifying the dilation rate to use
for atrous convolution. Can be a single integer to specify the same
value for all spatial dimensions. Specifying any `rate` value != 1 is
incompatible with specifying any `stride` value != 1.
activation_fn: Activation function. The default value is a ReLU function.
Explicitly set it to None to skip it and maintain a linear activation.
normalizer_fn: Normalization function to use instead of `biases`. If
`normalizer_fn` is provided then `biases_initializer` and
`biases_regularizer` are ignored and `biases` are not created nor added.
default set to None for no normalizer function
normalizer_params: Normalization function parameters.
weights_initializer: An initializer for the weights.
weights_regularizer: Optional regularizer for the weights.
biases_initializer: An initializer for the biases. If None skip biases.
biases_regularizer: Optional regularizer for the biases.
reuse: Whether or not the layer and its variables should be reused. To be
able to reuse the layer scope must be given.
variables_collections: Optional list of collections for all the variables or
a dictionary containing a different list of collection per variable.
outputs_collections: Collection to add the outputs.
trainable: If `True` also add variables to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
scope: Optional scope for `variable_scope`.
Returns:
A tensor representing the output of the operation.
Raises:
ValueError: If `data_format` is invalid.
ValueError: Both 'rate' and `stride` are not uniformly 1.
"""
if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']:
raise ValueError('Invalid data_format: %r' % (data_format,))
layer_variable_getter = _build_variable_getter({
'bias': 'biases',
'kernel': 'weights'
})
with variable_scope.variable_scope(
scope, 'Conv', [inputs], reuse=reuse,
custom_getter=layer_variable_getter) as sc:
inputs = ops.convert_to_tensor(inputs)
input_rank = inputs.get_shape().ndims
if input_rank == 3:
raise ValueError('Sparse Convolution not supported for input with rank',
input_rank)
elif input_rank == 4:
layer_class = core.MaskedConv2D
elif input_rank == 5:
raise ValueError('Sparse Convolution not supported for input with rank',
input_rank)
else:
raise ValueError('Sparse Convolution not supported for input with rank',
input_rank)
if data_format is None or data_format == 'NHWC':
df = 'channels_last'
elif data_format == 'NCHW':
df = 'channels_first'
else:
raise ValueError('Unsupported data format', data_format)
layer = layer_class(
filters=num_outputs,
kernel_size=kernel_size,
strides=stride,
padding=padding,
data_format=df,
dilation_rate=rate,
activation=None,
use_bias=not normalizer_fn and biases_initializer,
kernel_initializer=weights_initializer,
bias_initializer=biases_initializer,
kernel_regularizer=weights_regularizer,
bias_regularizer=biases_regularizer,
activity_regularizer=None,
trainable=trainable,
name=sc.name,
dtype=inputs.dtype.base_dtype,
_scope=sc,
_reuse=reuse)
outputs = layer.apply(inputs)
# Add variables to collections.
_add_variable_to_collections(layer.kernel, variables_collections, 'weights')
if layer.use_bias:
_add_variable_to_collections(layer.bias, variables_collections, 'biases')
if normalizer_fn is not None:
normalizer_params = normalizer_params or {}
outputs = normalizer_fn(outputs, **normalizer_params)
if activation_fn is not None:
outputs = activation_fn(outputs)
return utils.collect_named_outputs(outputs_collections,
sc.original_name_scope, outputs)
masked_conv2d = masked_convolution
@add_arg_scope
def masked_fully_connected(
inputs,
num_outputs,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer(),
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
"""Adds a sparse fully connected layer. The weight matrix is masked.
`fully_connected` creates a variable called `weights`, representing a fully
connected weight matrix, which is multiplied by the `inputs` to produce a
`Tensor` of hidden units. If a `normalizer_fn` is provided (such as
`batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
None and a `biases_initializer` is provided then a `biases` variable would be
created and added the hidden units. Finally, if `activation_fn` is not `None`,
it is applied to the hidden units as well.
Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
prior to the initial matrix multiply by `weights`.
Args:
inputs: A tensor of at least rank 2 and static value for the last dimension;
i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
num_outputs: Integer or long, the number of output units in the layer.
activation_fn: Activation function. The default value is a ReLU function.
Explicitly set it to None to skip it and maintain a linear activation.
normalizer_fn: Normalization function to use instead of `biases`. If
`normalizer_fn` is provided then `biases_initializer` and
`biases_regularizer` are ignored and `biases` are not created nor added.
default set to None for no normalizer function
normalizer_params: Normalization function parameters.
weights_initializer: An initializer for the weights.
weights_regularizer: Optional regularizer for the weights.
biases_initializer: An initializer for the biases. If None skip biases.
biases_regularizer: Optional regularizer for the biases.
reuse: Whether or not the layer and its variables should be reused. To be
able to reuse the layer scope must be given.
variables_collections: Optional list of collections for all the variables or
a dictionary containing a different list of collections per variable.
outputs_collections: Collection to add the outputs.
trainable: If `True` also add variables to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
scope: Optional scope for variable_scope.
Returns:
The tensor variable representing the result of the series of operations.
Raises:
ValueError: If x has rank less than 2 or if its last dimension is not set.
"""
if not isinstance(num_outputs, six.integer_types):
raise ValueError('num_outputs should be int or long, got %s.' %
(num_outputs,))
layer_variable_getter = _build_variable_getter({
'bias': 'biases',
'kernel': 'weights'
})
with variable_scope.variable_scope(
scope,
'fully_connected', [inputs],
reuse=reuse,
custom_getter=layer_variable_getter) as sc:
inputs = ops.convert_to_tensor(inputs)
layer = core.MaskedFullyConnected(
units=num_outputs,
activation=None,
use_bias=not normalizer_fn and biases_initializer,
kernel_initializer=weights_initializer,
bias_initializer=biases_initializer,
kernel_regularizer=weights_regularizer,
bias_regularizer=biases_regularizer,
activity_regularizer=None,
trainable=trainable,
name=sc.name,
dtype=inputs.dtype.base_dtype,
_scope=sc,
_reuse=reuse)
outputs = layer.apply(inputs)
# Add variables to collections.
_add_variable_to_collections(layer.kernel, variables_collections, 'weights')
if layer.bias is not None:
_add_variable_to_collections(layer.bias, variables_collections, 'biases')
# Apply normalizer function / layer.
if normalizer_fn is not None:
if not normalizer_params:
normalizer_params = {}
outputs = normalizer_fn(outputs, **normalizer_params)
if activation_fn is not None:
outputs = activation_fn(outputs)
return utils.collect_named_outputs(outputs_collections,
sc.original_name_scope, outputs)