#pragma once
namespace at { namespace native {
// ---------------------------------------------------------------------
//
// Math
//
// ---------------------------------------------------------------------
constexpr int input_batch_size_dim = 0; // also grad_input
constexpr int input_channels_dim = 1;
constexpr int output_batch_size_dim = 0; // also grad_output
constexpr int output_channels_dim = 1;
constexpr int weight_output_channels_dim = 0;
constexpr int weight_input_channels_dim = 1;
// Often written as 2 + max_dim (extra dims for batch size and channels)
constexpr int max_dim = 3;
// NB: conv_output_size and conv_input_size are not bijections,
// as conv_output_size loses information; this is why conv_input_size
// takes an extra output_padding argument to resolve the ambiguity.
static inline std::vector<int64_t> conv_output_size(
IntArrayRef input_size, IntArrayRef weight_size,
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation = IntArrayRef()
) {
// ASSERT(input_size.size() > 2)
// ASSERT(input_size.size() == weight_size.size())
bool has_dilation = dilation.size() > 0;
auto dim = input_size.size();
std::vector<int64_t> output_size(dim);
output_size[0] = input_size[input_batch_size_dim];
output_size[1] = weight_size[weight_output_channels_dim];
for (size_t d = 2; d < dim; ++d) {
auto dilation_ = has_dilation ? dilation[d - 2] : 1;
auto kernel = dilation_ * (weight_size[d] - 1) + 1;
output_size[d] = (input_size[d] + (2 * padding[d - 2]) - kernel) / stride[d - 2] + 1;
}
return output_size;
}
static inline std::vector<int64_t> conv_input_size(
IntArrayRef output_size, IntArrayRef weight_size,
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups
) {
// ASSERT(output_size.size() > 2)
// ASSERT(output_size.size() == weight_size.size())
auto dim = output_size.size();
std::vector<int64_t> input_size(dim);
input_size[0] = output_size[output_batch_size_dim];
input_size[1] = weight_size[weight_input_channels_dim] * groups;
for (size_t d = 2; d < dim; ++d) {
int kernel = dilation[d - 2] * (weight_size[d] - 1) + 1;
input_size[d] = (output_size[d] - 1) * stride[d - 2] - (2 * padding[d - 2]) +
kernel + output_padding[d - 2];
}
return input_size;
}
static inline std::vector<int64_t> conv_weight_size(
IntArrayRef input_size, IntArrayRef output_size,
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups
) {
auto dim = input_size.size();
std::vector<int64_t> weight_size(dim);
weight_size[0] = output_size[1];
weight_size[1] = input_size[1] / groups;
for (size_t d = 2; d < dim; ++d) {
int kernel = input_size[d] - (output_size[d] - 1) * stride[d - 2]
+ 2 * padding[d - 2] - output_padding[d - 2];
weight_size[d] = (kernel - 1) / dilation[d - 2] + 1;
}
return weight_size;
}
static inline Tensor reshape_bias(int64_t dim, const Tensor& bias) {
std::vector<int64_t> shape(dim, 1);
shape[1] = -1;
return bias.reshape(shape);
}
static inline bool cudnn_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
// disable NHWC for float64 input.
if (!detail::getCUDAHooks().compiledWithCuDNN() ||
input.scalar_type() == at::kDouble ||
weight.scalar_type() == at::kDouble) {
return false;
}
long cudnn_version = detail::getCUDAHooks().versionCuDNN();
return (cudnn_version >= 7603) &&
((input.suggest_memory_format() == at::MemoryFormat::ChannelsLast) ||
(weight.suggest_memory_format() == at::MemoryFormat::ChannelsLast));
}
}} // namespace at::native