from collections import OrderedDict
import functools
from numbers import Number
from typing import Any, Dict, Optional, Tuple, Union
import warnings
import weakref
import torch
import torch._C as _C
from torch._namedtensor_internals import (
update_names, check_serializing_named_tensor, resolve_ellipsis,
unzip_namedshape, single_ellipsis_index, is_ellipsis)
from torch.overrides import (
has_torch_function, has_torch_function_unary, has_torch_function_variadic,
handle_torch_function)
import torch.utils.hooks as hooks
def _wrap_type_error_to_not_implemented(f):
# functools.wraps doesn't work well with methods in python 2
method_assignments = ('__name__', '__doc__')
assigned = functools.WRAPPER_ASSIGNMENTS
@functools.wraps(f, assigned=assigned)
def wrapped(*args, **kwargs):
if has_torch_function(args):
return handle_torch_function(wrapped, args, *args, **kwargs)
try:
return f(*args, **kwargs)
except TypeError:
return NotImplemented
return wrapped
def _rebuild_from_type(func, type, args, dict):
if type is Tensor:
return func(*args)
ret = func(*args).as_subclass(type)
ret.__dict__ = dict
return ret
# NB: If you subclass Tensor, and want to share the subclassed class
# across processes, you must also update torch/multiprocessing/reductions.py
# to define a ForkingPickler serialization mode for the class.
#
# NB: If you add a new method to Tensor, you must update
# torch/__init__.py.in to add a type annotation for your method;
# otherwise, it will not show up in autocomplete.
class Tensor(torch._C._TensorBase):
def __deepcopy__(self, memo):
if has_torch_function_unary(self):
return handle_torch_function(Tensor.__deepcopy__, (self,), self, memo)
if not self.is_leaf:
raise RuntimeError("Only Tensors created explicitly by the user "
"(graph leaves) support the deepcopy protocol at the moment")
if id(self) in memo:
return memo[id(self)]
with torch.no_grad():
if self.is_sparse or self.device.type == 'xla':
new_tensor = self.clone()
else:
new_storage = self.storage().__deepcopy__(memo)
if self.is_quantized:
# quantizer_params can be different type based on torch attribute
quantizer_params: Union[Tuple[torch.qscheme, float, int], Tuple[torch.qscheme, Tensor, Tensor, int]]
if self.qscheme() == torch.per_tensor_affine:
quantizer_params = self.qscheme(), self.q_scale(), self.q_zero_point()
elif self.qscheme() in (torch.per_channel_affine, torch.per_channel_affine_float_qparams):
quantizer_params = self.qscheme(), \
self.q_per_channel_scales(), \
self.q_per_channel_zero_points(), \
self.q_per_channel_axis()
else:
raise RuntimeError(f"Unsupported qscheme {self.qscheme()} in deepcopy")
new_tensor = torch._utils._rebuild_qtensor(
new_storage,
self.storage_offset(),
self.size(),
self.stride(),
quantizer_params,
self.requires_grad,
self._backward_hooks)
else:
new_tensor = self.new()
new_tensor.set_(new_storage, self.storage_offset(), self.size(), self.stride())
new_tensor.requires_grad = self.requires_grad
if self.grad is not None:
new_tensor.grad = self.grad.__deepcopy__(memo)
memo[id(self)] = new_tensor
return new_tensor
def __reduce_ex__(self, proto):
if type(self) is Tensor:
return self._reduce_ex_internal(proto)
relevant_args = (self,)
from torch.overrides import has_torch_function, handle_torch_function
if type(self) is not Tensor and has_torch_function(relevant_args):
return handle_torch_function(Tensor.__reduce_ex__, relevant_args, self, proto)
func, args = self._reduce_ex_internal(proto)
return (_rebuild_from_type, (func, type(self), args, self.__dict__))
def _reduce_ex_internal(self, proto):
if has_torch_function_unary(self):
return handle_torch_function(Tensor.__reduce_ex__, (self,), self, proto)
check_serializing_named_tensor(self)
# See Note [Don't serialize hooks]
torch.utils.hooks.warn_if_has_hooks(self)
backward_hooks: Dict[Any, Any] = OrderedDict()
# Note: Numpy array is chosen to be the rebuild component for XLA Tensor.
# We considered a few options:
# 1. CPU tensor can't be used here.
# Otherwise in torch.load CPU storage is reconstructed with randomly
# initialized data, moved onto XLA device, and then storage is updated
# to the serialized content. This works perfectly for CPU/CUDA but not XLA.
# XLA tensor is disconnected with storage so it doesn't get the update.
# 2. Python list is not a good fit due to performance reason.
# `tolist()` converts every single element in the tensor into python objects
# and serialize them one by one.
if self.device.type == 'xla':
arg_xla = (self.cpu().numpy(),
self.dtype,
str(self.device),
self.requires_grad)
return (torch._utils._rebuild_xla_tensor, arg_xla)
if self.is_quantized:
# quantizer_params can be different type based on torch attribute
quantizer_params: Union[Tuple[torch.qscheme, float, int], Tuple[Any, Tensor, Tensor, int]]
if self.qscheme() == torch.per_tensor_affine:
quantizer_params = (torch.per_tensor_affine,
self.q_scale(),
self.q_zero_point())
elif self.qscheme() in (torch.per_channel_affine, torch.per_channel_affine_float_qparams):
# convert scales and zero points to tuple to avoid recursive calls
# when/if we get multi-axis quantized tensors in the future, the shape
# is recoverable from the main tensor shape
quantizer_params = (torch.per_channel_affine,
self.q_per_channel_scales(),
self.q_per_channel_zero_points(),
self.q_per_channel_axis())
else:
raise RuntimeError(f"Serialization is not supported for tensors of type {self.qscheme()}")
args_qtensor = (self.storage(),
self.storage_offset(),
tuple(self.size()),
self.stride(),
quantizer_params,
self.requires_grad,
backward_hooks)
return (torch._utils._rebuild_qtensor, args_qtensor)
elif self.is_sparse:
if self.layout == torch.sparse_coo:
args_sparse = (self.layout,
(self._indices(),
self._values(),
self.size()))
else:
raise NotImplementedError(
'sparse tensor __reduce_ex__ for layout `%s`' % (self.layout))
return (torch._utils._rebuild_sparse_tensor, args_sparse)
else:
args = (self.storage(),
self.storage_offset(),
tuple(self.size()),
self.stride(),
self.requires_grad,
backward_hooks) # previously was self._backward_hooks
return (torch._utils._rebuild_tensor_v2, args)
def __setstate__(self, state):
if has_torch_function_unary(self):
return handle_torch_function(Tensor.__setstate__, (self,), self, state)
# Warning: this method is NOT called when you torch.load() a tensor;
# that is managed by _rebuild_tensor_v2
if not self.is_leaf:
raise RuntimeError('__setstate__ can be only called on leaf Tensors')
if len(state) == 4:
# legacy serialization of Tensor
self.set_(*state)
return
elif len(state) == 5:
# legacy serialization of Variable
self.data = state[0]
state = (state[3], state[4], state[2])
# The setting of _backward_hooks is expected to be a no-op.
# See Note [Don't serialize hooks]
self.requires_grad, _, self._backward_hooks = state
def __repr__(self):
if has_torch_function_unary(self):
return handle_torch_function(Tensor.__repr__, (self,), self)
# All strings are unicode in Python 3.
return torch._tensor_str._str(self)
def backward(self, gradient=None, retain_graph=None, create_graph=False, inputs=None):
r"""Computes the gradient of current tensor w.r.t. graph leaves.
The graph is differentiated using the chain rule. If the tensor is
non-scalar (i.e. its data has more than one element) and requires
gradient, the function additionally requires specifying ``gradient``.
It should be a tensor of matching type and location, that contains
the gradient of the differentiated function w.r.t. ``self``.
This function accumulates gradients in the leaves - you might need to zero
``.grad`` attributes or set them to ``None`` before calling it.
See :ref:`Default gradient layouts<default-grad-layouts>`
for details on the memory layout of accumulated gradients.
.. note::
If you run any forward ops, create ``gradient``, and/or call ``backward``
in a user-specified CUDA stream context, see
:ref:`Stream semantics of backward passes<bwd-cuda-stream-semantics>`.
Args:
gradient (Tensor or None): Gradient w.r.t. the
tensor. If it is a tensor, it will be automatically converted
to a Tensor that does not require grad unless ``create_graph`` is True.
None values can be specified for scalar Tensors or ones that
don't require grad. If a None value would be acceptable then
this argument is optional.
retain_graph (bool, optional): If ``False``, the graph used to compute
the grads will be freed. Note that in nearly all cases setting
this option to True is not needed and often can be worked around
in a much more efficient way. Defaults to the value of
``create_graph``.
create_graph (bool, optional): If ``True``, graph of the derivative will
be constructed, allowing to compute higher order derivative
products. Defaults to ``False``.
inputs (sequence of Tensor): Inputs w.r.t. which the gradient will be
accumulated into ``.grad``. All other Tensors will be ignored. If not
provided, the gradient is accumulated into all the leaf Tensors that were
used to compute the attr::tensors. All the provided inputs must be leaf
Tensors.
"""
if has_torch_function_unary(self):
return handle_torch_function(
Tensor.backward,
(self,),
self,
gradient=gradient,
retain_graph=retain_graph,
create_graph=create_graph,
inputs=inputs)
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
def register_hook(self, hook):
r"""Registers a backward hook.
The hook will be called every time a gradient with respect to the
Tensor is computed. The hook should have the following signature::
hook(grad) -> Tensor or None
The hook should not modify its argument, but it can optionally return
a new gradient which will be used in place of :attr:`grad`.
This function returns a handle with a method ``handle.remove()``
that removes the hook from the module.
Example::
>>> v = torch.tensor([0., 0., 0.], requires_grad=True)
>>> h = v.register_hook(lambda grad: grad * 2) # double the gradient
>>> v.backward(torch.tensor([1., 2., 3.]))
>>> v.grad
2
4
6
[torch.FloatTensor of size (3,)]
>>> h.remove() # removes the hook
"""
if has_torch_function_unary(self):
return handle_torch_function(Tensor.register_hook, (self,), self, hook)
if not self.requires_grad:
raise RuntimeError("cannot register a hook on a tensor that "
"doesn't require gradient")
if self._backward_hooks is None:
self._backward_hooks = OrderedDict()
if self.grad_fn is not None:
self.grad_fn._register_hook_dict(self)
handle = hooks.RemovableHandle(self._backward_hooks)
self._backward_hooks[handle.id] = hook
return handle
def reinforce(self, reward):
def trim(str):
return '\n'.join([line.strip() for line in str.split('\n')])
raise RuntimeError(trim(r"""reinforce() was removed.
Use torch.distributions instead.
See https://pytorch.org/docs/master/distributions.html
Instead of:
probs = policy_network(state)
action = probs.multinomial()
next_state, reward = env.step(action)
action.reinforce(reward)
action.backward()
Use:
probs = policy_network(state)
# NOTE: categorical is equivalent to what used to be called multinomial
m = torch.distributions.Categorical(probs)
action = m.sample()
next_state, reward = env.step(action)
loss = -m.log_prob(action) * reward
loss.backward()
"""))
detach = _C._add_docstr(_C._TensorBase.detach, r"""
Returns a new Tensor, detached from the current graph.
The result will never require gradient.
.. note::
Returned Tensor shares the same storage with the original one.
In-place modifications on either of them will be seen, and may trigger
errors in correctness checks.
IMPORTANT NOTE: Previously, in-place size / stride / storage changes
(such as `resize_` / `resize_as_` / `set_` / `transpose_`) to the returned tensor
also update the original tensor. Now, these in-place changes will not update the
original tensor anymore, and will instead trigger an error.
For sparse tensors:
In-place indices / values changes (such as `zero_` / `copy_` / `add_`) to the
returned tensor will not update the original tensor anymore, and will instead
trigger an error.
""")
detach_ = _C._add_docstr(_C._TensorBase.detach_, r"""
Detaches the Tensor from the graph that created it, making it a leaf.
Views cannot be detached in-place.
""")
def retain_grad(self):
r"""Enables .grad attribute for non-leaf Tensors."""
if has_torch_function_unary(self):
return handle_torch_function(Tensor.retain_grad, (self,), self)
if not self.requires_grad:
Loading ...