tensor.py · neilisaac/torch

Learn more » Push, build, and install RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages
neilisaac / torch python

Repository URL to install this package:
Version: 1.8.0

/ tensor.py

from collections import OrderedDict
import functools
from numbers import Number
from typing import Any, Dict, Optional, Tuple, Union
import warnings
import weakref

import torch
import torch._C as _C
from torch._namedtensor_internals import (
    update_names, check_serializing_named_tensor, resolve_ellipsis,
    unzip_namedshape, single_ellipsis_index, is_ellipsis)
from torch.overrides import (
    has_torch_function, has_torch_function_unary, has_torch_function_variadic,
    handle_torch_function)
import torch.utils.hooks as hooks


def _wrap_type_error_to_not_implemented(f):
    # functools.wraps doesn't work well with methods in python 2
    method_assignments = ('__name__', '__doc__')
    assigned = functools.WRAPPER_ASSIGNMENTS

    @functools.wraps(f, assigned=assigned)
    def wrapped(*args, **kwargs):
        if has_torch_function(args):
            return handle_torch_function(wrapped, args, *args, **kwargs)
        try:
            return f(*args, **kwargs)
        except TypeError:
            return NotImplemented
    return wrapped

def _rebuild_from_type(func, type, args, dict):
    if type is Tensor:
        return func(*args)

    ret = func(*args).as_subclass(type)
    ret.__dict__ = dict
    return ret


# NB: If you subclass Tensor, and want to share the subclassed class
# across processes, you must also update torch/multiprocessing/reductions.py
# to define a ForkingPickler serialization mode for the class.
#
# NB: If you add a new method to Tensor, you must update
# torch/__init__.py.in to add a type annotation for your method;
# otherwise, it will not show up in autocomplete.
class Tensor(torch._C._TensorBase):
    def __deepcopy__(self, memo):
        if has_torch_function_unary(self):
            return handle_torch_function(Tensor.__deepcopy__, (self,), self, memo)
        if not self.is_leaf:
            raise RuntimeError("Only Tensors created explicitly by the user "
                               "(graph leaves) support the deepcopy protocol at the moment")
        if id(self) in memo:
            return memo[id(self)]
        with torch.no_grad():
            if self.is_sparse or self.device.type == 'xla':
                new_tensor = self.clone()
            else:
                new_storage = self.storage().__deepcopy__(memo)
                if self.is_quantized:
                    # quantizer_params can be different type based on torch attribute
                    quantizer_params: Union[Tuple[torch.qscheme, float, int], Tuple[torch.qscheme, Tensor, Tensor, int]]
                    if self.qscheme() == torch.per_tensor_affine:
                        quantizer_params = self.qscheme(), self.q_scale(), self.q_zero_point()
                    elif self.qscheme() in (torch.per_channel_affine, torch.per_channel_affine_float_qparams):
                        quantizer_params = self.qscheme(), \
                            self.q_per_channel_scales(), \
                            self.q_per_channel_zero_points(), \
                            self.q_per_channel_axis()
                    else:
                        raise RuntimeError(f"Unsupported qscheme {self.qscheme()} in deepcopy")
                    new_tensor = torch._utils._rebuild_qtensor(
                        new_storage,
                        self.storage_offset(),
                        self.size(),
                        self.stride(),
                        quantizer_params,
                        self.requires_grad,
                        self._backward_hooks)
                else:
                    new_tensor = self.new()
                    new_tensor.set_(new_storage, self.storage_offset(), self.size(), self.stride())
                    new_tensor.requires_grad = self.requires_grad
            if self.grad is not None:
                new_tensor.grad = self.grad.__deepcopy__(memo)
            memo[id(self)] = new_tensor
            return new_tensor

    def __reduce_ex__(self, proto):
        if type(self) is Tensor:
            return self._reduce_ex_internal(proto)
        relevant_args = (self,)
        from torch.overrides import has_torch_function, handle_torch_function
        if type(self) is not Tensor and has_torch_function(relevant_args):
            return handle_torch_function(Tensor.__reduce_ex__, relevant_args, self, proto)
        func, args = self._reduce_ex_internal(proto)
        return (_rebuild_from_type, (func, type(self), args, self.__dict__))

    def _reduce_ex_internal(self, proto):
        if has_torch_function_unary(self):
            return handle_torch_function(Tensor.__reduce_ex__, (self,), self, proto)
        check_serializing_named_tensor(self)
        # See Note [Don't serialize hooks]
        torch.utils.hooks.warn_if_has_hooks(self)
        backward_hooks: Dict[Any, Any] = OrderedDict()
        # Note: Numpy array is chosen to be the rebuild component for XLA Tensor.
        # We considered a few options:
        # 1. CPU tensor can't be used here.
        #    Otherwise in torch.load CPU storage is reconstructed with randomly
        #    initialized data, moved onto XLA device, and then storage is updated
        #    to the serialized content. This works perfectly for CPU/CUDA but not XLA.
        #    XLA tensor is disconnected with storage so it doesn't get the update.
        # 2. Python list is not a good fit due to performance reason.
        #    `tolist()` converts every single element in the tensor into python objects
        #    and serialize them one by one.
        if self.device.type == 'xla':
            arg_xla = (self.cpu().numpy(),
                       self.dtype,
                       str(self.device),
                       self.requires_grad)
            return (torch._utils._rebuild_xla_tensor, arg_xla)
        if self.is_quantized:
            # quantizer_params can be different type based on torch attribute
            quantizer_params: Union[Tuple[torch.qscheme, float, int], Tuple[Any, Tensor, Tensor, int]]
            if self.qscheme() == torch.per_tensor_affine:
                quantizer_params = (torch.per_tensor_affine,
                                    self.q_scale(),
                                    self.q_zero_point())
            elif self.qscheme() in (torch.per_channel_affine, torch.per_channel_affine_float_qparams):
                # convert scales and zero points to tuple to avoid recursive calls
                # when/if we get multi-axis quantized tensors in the future, the shape
                # is recoverable from the main tensor shape
                quantizer_params = (torch.per_channel_affine,
                                    self.q_per_channel_scales(),
                                    self.q_per_channel_zero_points(),
                                    self.q_per_channel_axis())
            else:
                raise RuntimeError(f"Serialization is not supported for tensors of type {self.qscheme()}")
            args_qtensor = (self.storage(),
                            self.storage_offset(),
                            tuple(self.size()),
                            self.stride(),
                            quantizer_params,
                            self.requires_grad,
                            backward_hooks)
            return (torch._utils._rebuild_qtensor, args_qtensor)
        elif self.is_sparse:
            if self.layout == torch.sparse_coo:
                args_sparse = (self.layout,
                               (self._indices(),
                                self._values(),
                                self.size()))
            else:
                raise NotImplementedError(
                    'sparse tensor __reduce_ex__ for layout `%s`' % (self.layout))
            return (torch._utils._rebuild_sparse_tensor, args_sparse)
        else:
            args = (self.storage(),
                    self.storage_offset(),
                    tuple(self.size()),
                    self.stride(),
                    self.requires_grad,
                    backward_hooks)  # previously was self._backward_hooks
            return (torch._utils._rebuild_tensor_v2, args)

    def __setstate__(self, state):
        if has_torch_function_unary(self):
            return handle_torch_function(Tensor.__setstate__, (self,), self, state)
        # Warning: this method is NOT called when you torch.load() a tensor;
        # that is managed by _rebuild_tensor_v2
        if not self.is_leaf:
            raise RuntimeError('__setstate__ can be only called on leaf Tensors')
        if len(state) == 4:
            # legacy serialization of Tensor
            self.set_(*state)
            return
        elif len(state) == 5:
            # legacy serialization of Variable
            self.data = state[0]
            state = (state[3], state[4], state[2])
        # The setting of _backward_hooks is expected to be a no-op.
        # See Note [Don't serialize hooks]
        self.requires_grad, _, self._backward_hooks = state

    def __repr__(self):
        if has_torch_function_unary(self):
            return handle_torch_function(Tensor.__repr__, (self,), self)
        # All strings are unicode in Python 3.
        return torch._tensor_str._str(self)

    def backward(self, gradient=None, retain_graph=None, create_graph=False, inputs=None):
        r"""Computes the gradient of current tensor w.r.t. graph leaves.

        The graph is differentiated using the chain rule. If the tensor is
        non-scalar (i.e. its data has more than one element) and requires
        gradient, the function additionally requires specifying ``gradient``.
        It should be a tensor of matching type and location, that contains
        the gradient of the differentiated function w.r.t. ``self``.

        This function accumulates gradients in the leaves - you might need to zero
        ``.grad`` attributes or set them to ``None`` before calling it.
        See :ref:`Default gradient layouts<default-grad-layouts>`
        for details on the memory layout of accumulated gradients.

        .. note::

            If you run any forward ops, create ``gradient``, and/or call ``backward``
            in a user-specified CUDA stream context, see
            :ref:`Stream semantics of backward passes<bwd-cuda-stream-semantics>`.

        Args:
            gradient (Tensor or None): Gradient w.r.t. the
                tensor. If it is a tensor, it will be automatically converted
                to a Tensor that does not require grad unless ``create_graph`` is True.
                None values can be specified for scalar Tensors or ones that
                don't require grad. If a None value would be acceptable then
                this argument is optional.
            retain_graph (bool, optional): If ``False``, the graph used to compute
                the grads will be freed. Note that in nearly all cases setting
                this option to True is not needed and often can be worked around
                in a much more efficient way. Defaults to the value of
                ``create_graph``.
            create_graph (bool, optional): If ``True``, graph of the derivative will
                be constructed, allowing to compute higher order derivative
                products. Defaults to ``False``.
            inputs (sequence of Tensor): Inputs w.r.t. which the gradient will be
                accumulated into ``.grad``. All other Tensors will be ignored. If not
                provided, the gradient is accumulated into all the leaf Tensors that were
                used to compute the attr::tensors. All the provided inputs must be leaf
                Tensors.
        """
        if has_torch_function_unary(self):
            return handle_torch_function(
                Tensor.backward,
                (self,),
                self,
                gradient=gradient,
                retain_graph=retain_graph,
                create_graph=create_graph,
                inputs=inputs)
        torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)

    def register_hook(self, hook):
        r"""Registers a backward hook.

        The hook will be called every time a gradient with respect to the
        Tensor is computed. The hook should have the following signature::

            hook(grad) -> Tensor or None


        The hook should not modify its argument, but it can optionally return
        a new gradient which will be used in place of :attr:`grad`.

        This function returns a handle with a method ``handle.remove()``
        that removes the hook from the module.

        Example::

            >>> v = torch.tensor([0., 0., 0.], requires_grad=True)
            >>> h = v.register_hook(lambda grad: grad * 2)  # double the gradient
            >>> v.backward(torch.tensor([1., 2., 3.]))
            >>> v.grad

             2
             4
             6
            [torch.FloatTensor of size (3,)]

            >>> h.remove()  # removes the hook
        """
        if has_torch_function_unary(self):
            return handle_torch_function(Tensor.register_hook, (self,), self, hook)
        if not self.requires_grad:
            raise RuntimeError("cannot register a hook on a tensor that "
                               "doesn't require gradient")
        if self._backward_hooks is None:
            self._backward_hooks = OrderedDict()
            if self.grad_fn is not None:
                self.grad_fn._register_hook_dict(self)
        handle = hooks.RemovableHandle(self._backward_hooks)
        self._backward_hooks[handle.id] = hook
        return handle

    def reinforce(self, reward):
        def trim(str):
            return '\n'.join([line.strip() for line in str.split('\n')])

        raise RuntimeError(trim(r"""reinforce() was removed.
            Use torch.distributions instead.
            See https://pytorch.org/docs/master/distributions.html

            Instead of:

            probs = policy_network(state)
            action = probs.multinomial()
            next_state, reward = env.step(action)
            action.reinforce(reward)
            action.backward()

            Use:

            probs = policy_network(state)
            # NOTE: categorical is equivalent to what used to be called multinomial
            m = torch.distributions.Categorical(probs)
            action = m.sample()
            next_state, reward = env.step(action)
            loss = -m.log_prob(action) * reward
            loss.backward()
        """))

    detach = _C._add_docstr(_C._TensorBase.detach, r"""
    Returns a new Tensor, detached from the current graph.

    The result will never require gradient.

    .. note::

      Returned Tensor shares the same storage with the original one.
      In-place modifications on either of them will be seen, and may trigger
      errors in correctness checks.
      IMPORTANT NOTE: Previously, in-place size / stride / storage changes
      (such as `resize_` / `resize_as_` / `set_` / `transpose_`) to the returned tensor
      also update the original tensor. Now, these in-place changes will not update the
      original tensor anymore, and will instead trigger an error.
      For sparse tensors:
      In-place indices / values changes (such as `zero_` / `copy_` / `add_`) to the
      returned tensor will not update the original tensor anymore, and will instead
      trigger an error.
    """)

    detach_ = _C._add_docstr(_C._TensorBase.detach_, r"""
    Detaches the Tensor from the graph that created it, making it a leaf.
    Views cannot be detached in-place.
    """)

    def retain_grad(self):
        r"""Enables .grad attribute for non-leaf Tensors."""
        if has_torch_function_unary(self):
            return handle_torch_function(Tensor.retain_grad, (self,), self)
        if not self.requires_grad:
Loading ...
neilisaac / torch python

Version: 1.8.0

/ tensor.py

Products

About

Resources

Contact Gemfury