from dataclasses import dataclass
from typing import Dict, List, Optional, Sequence, Set, Tuple, Union
from torchgen.api import cpp
from torchgen.api.types import Binding, CppSignature, CppSignatureGroup
from torchgen.gen import pythonify_default
from torchgen.model import (
Argument,
BaseTy,
BaseType,
FunctionSchema,
ListType,
NativeFunction,
OptionalType,
Return,
Type,
Variant,
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
#
# Data Models
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
#
# [Notes] python binding codegen
#
# The Python binding codegen produces code that takes the input list of
# PyObjects, finds the matching ATen C++ function using PythonArgParser,
# converts the PyObjects into C++ types and calls the ATen C++ function:
#
# +--------+ parsing +------------------------+ binding +-----------------------+
# | PyObjs | ---------> | PythonArgParser Output | ---------> | Cpp Function Dispatch |
# +--------+ +------------------------+ +-----------------------+
#
# The following examples demonstrate the data models the Python binding
# codegen needs to deal with and the tasks it needs to accomplish. It
# helps understand the purpose of the new data types we introduced below.
#
# - Function Schema (source of truth)
#
# aten::empty.names(int[] size, *, Dimname[]? names,
# ScalarType? dtype=None, Layout? layout=None,
# Device? device=None, bool? pin_memory=None,
# MemoryFormat? memory_format=None) -> Tensor
#
# - Python Signature
#
# It's used to generate input schema string for PythonArgParser.
# Note: TensorOptions fields are reordered and the additional
# 'requires_grad' field is added:
#
# empty(IntArrayRef size, *, DimnameList? names,
# MemoryFormat? memory_format=None, ScalarType dtype=None,
# Layout layout=torch.strided, Device device=None,
# bool pin_memory=False, bool requires_grad=False)
#
# - C++ Signature
#
# It's used to generate C++ lambda formals & dispatch call.
# Note: the scattered TensorOptions fields are packed into 'options'.
#
# auto dispatch_empty =
# [](IntArrayRef size, c10::optional<DimnameList> names,
# const TensorOptions & options,
# c10::optional<MemoryFormat> memory_format) -> Tensor {
# pybind11::gil_scoped_release no_gil;
# return torch::empty(size, names, options, memory_format);
# };
#
# - Binding between Python Arguments and C++ Arguments
#
# Given a set of Python Arguments in scope, we need produce the
# binding expressions that translate the Python API into C++ API:
#
# Python Args Cpp Args Binding Exprs
# -----------------------------------------------------------------
# 0: size size '_r.intlist(0)'
# 1: names names 'names' [special init]
# 2: memory_format -------+
# 3: dtype -----+-|--> options 'options' [special packing]
# 4: layout / |
# 5: device / +--> memory_format '_r.memoryformatOptional(2)'
# 6: pin_memory /
# 7: requires_grad -+
#
# So the full dispatch expression would look like:
#
# dispatch_empty(_r.intlist(0), names, options,
# _r.memoryformatOptional(2))
#
# Where does 'names' come from? It involves special local init:
#
# auto __names = _r.toDimnameListOptional(1);
# c10::optional<DimnameList> names =
# __names ? c10::make_optional(DimnameList(__names.value()))
# : c10::nullopt;
#
# Where does 'options' come from? It involves special local init
# for TensorOptions. Note that Python side has the additional
# 'requires_grad' field:
#
# const auto options = TensorOptions()
# .dtype(_r.scalartype(3))
# .device(_r.device(5))
# .layout(_r.layoutOptional(4))
# .requires_grad(_r.toBool(7))
# .pinned_memory(_r.toBool(6));
#
# In some other cases one Python Argument can map to multiple C++
# Arguments. For example:
#
# aten::max.names_dim(Tensor self, Dimname dim, bool keepdim=False)
# -> (Tensor values, Tensor indices)
#
# Python Args Cpp Args Binding Exprs
# ---------------------------------------------------------------------
# +----> max 'out[0]'
# /-----> max_values 'out[1]
# 0: input / self '_r.tensor(0)'
# 1: dim / dim '_r.dimname(1)'
# 2: keepdim / keepdim '_r.toBool(2)'
# 3: out -----+ [local init] out '_r.tensorlist_n<2>(3)'
#
# As demonstrated above, the binding can involve reordering,
# packing, unpacking and special local inits.
#
#
# Let's look at a concrete example:
#
# static PythonArgParser parser({
# "abs(Tensor input, *, Tensor out=None)",
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ^
# +--- Python Schema, represented by PythonSignature and PythonArgument
#
# }, /*traceable=*/true);
#
# ParsedArgs<2> parsed_args;
# auto _r = parser.parse(nullptr, args, kwargs, parsed_args);
#
# ...
#
# if (_r.isNone(1)) {
# ~~~~~~~~~~~~ <--- Scattered PythonArgParser output (arg name = 'out')
# represented by PythonArgParserOutputExpr
#
# // aten::abs(Tensor self) -> Tensor
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ^
# +--- NativeFunction schema, base version
#
# auto dispatch_abs = [](const Tensor & self) -> Tensor {
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ^
# +--- dispatch_lambda_args / dispatch_lambda_return_str
# generated from NativeFunction / CppSignature
# (deprecated PythonSignature is special)
# arguments are represented by DispatchLambdaArgument
#
# pybind11::gil_scoped_release no_gil;
# return self.abs();
# ~~~~~~~~~~~ <--- cpp_dispatch_target / cpp_dispatch_exprs
# generated from NativeFunction / CppSignature
# };
# return wrap(dispatch_abs(_r.tensor(0)));
# ~~~~~~~~~~~~~
# ^
# +--- dispatch_lambda_exprs
# binding PythonArgParserOutputExpr (python args)
# and DispatchLambdaArgument (c++ args)
#
# } else {
# // aten::abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ^
# +--- NativeFunction schema, out-variant
#
# auto dispatch_abs_out = [](Tensor out, const Tensor & self) -> Tensor {
# pybind11::gil_scoped_release no_gil;
# return at::abs_out(out, self);
# };
# return wrap(dispatch_abs_out(_r.tensor(1), _r.tensor(0)));
# }
#
#
# [Notes] python interface codegen
# The python dataclasses below are used used to generate both python binding code
# and pyi type hint signatures.
# In theory these two should look very similar, but there are number of differences
# in how pyi signatures vs. python_arg_parser signatures are generated.
# These differences have been encapsulated in signature_str() vs. signature_str_pyi()
# to display the full signatures, and argument_str() vs argument_str_pyi() to display arguments.
# For examples, only pyi signatures include return types.
@dataclass(frozen=True)
class PythonReturns:
returns: Tuple[Return, ...]
@dataclass(frozen=True)
class PythonArgument:
name: str
type: Type
default: Optional[str]
# Used to generate the default init expr for some PythonArgParser outputs, e.g.:
#
# _r.layoutWithDefault(3, layout_from_backend(self.options().backend())))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ^
# +--- default_init str
default_init: Optional[str]
# Compute argument formal for python argument parsing.
# Needs to be consistent with torch/csrc/utils/python_arg_parser.h.
def argument_str(self, *, method: bool = False, symint: bool = True) -> str:
type_str = (
argument_type_str(self.type, symint=symint)
.replace("const ", "")
.replace(" &", "")
)
name = self.name
# s/self/input/ outside method bindings
# [old codegen] TODO: remove this? doesn't rename in codegen, it's just
# for the parse string
if name == "self" and type_str in ["Tensor", "Number"] and not method:
name = "input"
# add default
if self.default is not None:
default = {
"nullptr": "None",
"c10::nullopt": "None",
"{}": "None",
}.get(self.default, self.default)
return f"{type_str} {name}={default}"
else:
return f"{type_str} {name}"
def argument_str_pyi(
self, *, method: bool = False, deprecated: bool = False
) -> str:
type_str = argument_type_str_pyi(self.type)
name = self.name
# s/self/input/ outside method bindings
# [old codegen] TODO: remove this? doesn't rename in codegen, it's just
# for the parse string
if name == "self" and type_str == "Tensor" and not method and not deprecated:
name = "input"
if name == "from": # from is a Python keyword...
name += "_"
# pyi merges the _out and functional variants into the same signature, with an optional out arg
if name == "out" and type_str == "Tensor" and not deprecated:
type_str = "Optional[" + type_str + "]"
# pyi deprecated signatures don't get defaults for their out arg
treat_as_no_default = (
deprecated
and isinstance(self, PythonOutArgument)
and self.default == "None"
)
# add default
if self.default is not None and not treat_as_no_default:
if (
isinstance(self.type, ListType)
and self.type.elem == BaseType(BaseTy.int)
and self.default.startswith("{")
and self.default.endswith("}")
):
default = "(" + self.default[1:-1] + ")"
else:
default = {
"nullptr": "None",
"c10::nullopt": "None",
"{}": "None",
"MemoryFormat::Contiguous": "contiguous_format",
"QScheme::PER_TENSOR_AFFINE": "per_tensor_affine",
}.get(self.default, self.default)
return f"{name}: {type_str}={default}"
else:
return f"{name}: {type_str}"
@dataclass(frozen=True)
class PythonOutArgument(PythonArgument):
# In Python signature multiple output fields are packed into one 'out' argument.
# When binding to C++, it's first binded to a local 'out' variable:
# 'auto out = _r.tensorlist_n<2>(2);',
# then binded to scattered C++ output arguments as 'out[0]', 'out[1]', and etc.
# TODO: maybe don't need keep scattered out fields for python signature?
outputs: Tuple[PythonArgument, ...]
@staticmethod
def from_outputs(
outputs: Tuple[PythonArgument, ...]
) -> Optional["PythonOutArgument"]:
if not outputs:
return None
size = len(outputs)
if size == 1:
return PythonOutArgument(
name=outputs[0].name,
type=outputs[0].type,
default="None",
default_init=None,
outputs=outputs,
)
elif size > 1:
if any(map(lambda a: not a.type.is_tensor_like(), outputs)):
raise RuntimeError(f"Unsupported output type: {outputs}")
return PythonOutArgument(
name="out",
# TODO: shouldn't this be OptionalType[ListType[...]], since it defaults to None?
type=ListType(BaseType(BaseTy.Tensor), size),
default="None",
default_init=None,
outputs=outputs,
)
raise AssertionError(r"Unexpected PythonOutArgument size")
@dataclass(frozen=True)
class PythonSignature:
# Base operator name, without inplace/outplace suffix.
name: str
# Positional arguments.
# TODO: create a dedicated SelfArgument type for 'self'?
input_args: Tuple[PythonArgument, ...]
# Keyword arguments excluding the 'out' argument and scattered kwargs belonging
# to TensorOptions (dtype, layout, device, pin_memory, requires_grad, etc).
input_kwargs: Tuple[PythonArgument, ...]
output_args: Optional[PythonOutArgument]
Loading ...