#ifndef CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
#define CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
#include <algorithm>
#include <vector>
#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/operator.h"
#include "caffe2/proto/caffe2_legacy.pb.h"
#include "caffe2/utils/math.h"
// This macro is here just to allow us to experiment with padding values that
// determines, when we have an odd number of pads, which side gets the one
// additional pad value, the head side, or the tail side. Setting it to false
// will enable the TensorFlow behavior, and setting it to true will enable
// a behavior more consistent with Caffe and CuDNN.
// This only affects the case when you set legacy pad to VALID or SAME. The
// behavior inherits from the early designs of Google's CNN implementation,
// where padding values are implicitly calculated instead of explicitly
// specified. This is still the case with TensorFlow. Many frameworks have
// followed a slightly different approach of explicitly giving padding values,
// in which case the value of this constant value does not matter.
const bool CAFFE2_PAD_HEAD_MORE = false;
namespace caffe2 {
template <class Context>
class ConvPoolOpBase : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
explicit ConvPoolOpBase(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
legacy_pad_(
static_cast<LegacyPadding>(this->template GetSingleArgument<int>(
"legacy_pad",
LegacyPadding::NOTSET))),
global_pooling_(
this->template GetSingleArgument<int>("global_pooling", 0)),
kernel_(this->template GetRepeatedArgument<int>("kernels")),
dilation_(this->template GetRepeatedArgument<int>("dilations")),
stride_(this->template GetRepeatedArgument<int>("strides")),
pads_(this->template GetRepeatedArgument<int>("pads")),
float16_compute_(
this->template GetSingleArgument<bool>("float16_compute", false)),
group_(this->template GetSingleArgument<int>("group", 1)),
order_(StringToStorageOrder(
this->template GetSingleArgument<string>("order", "NCHW"))),
shared_buffer_(
this->template GetSingleArgument<int>("shared_buffer", 0)),
ws_(ws) {
// For the padding, they should either be the legacy padding strategy
// (VALID or SAME), or an explicit, non-negative value.
if (legacy_pad_ == LegacyPadding::VALID ||
legacy_pad_ == LegacyPadding::SAME) {
CAFFE_ENFORCE(
!OperatorBase::HasArgument("pads"),
"If you use legacy padding VALID or SAME, you should not specify "
"any specific padding values.");
}
// Get old arguments values.
if (OperatorBase::HasArgument("kernel")) {
kernel_.resize(2, this->template GetSingleArgument<int>("kernel", 0));
} else if (
OperatorBase::HasArgument("kernel_h") &&
OperatorBase::HasArgument("kernel_w")) {
kernel_.push_back(this->template GetSingleArgument<int>("kernel_h", 0));
kernel_.push_back(this->template GetSingleArgument<int>("kernel_w", 0));
}
if (OperatorBase::HasArgument("stride")) {
stride_.resize(2, this->template GetSingleArgument<int>("stride", 0));
} else if (
OperatorBase::HasArgument("stride_h") &&
OperatorBase::HasArgument("stride_w")) {
stride_.push_back(this->template GetSingleArgument<int>("stride_h", 0));
stride_.push_back(this->template GetSingleArgument<int>("stride_w", 0));
}
if (OperatorBase::HasArgument("dilation")) {
dilation_.resize(2, this->template GetSingleArgument<int>("dilation", 0));
} else if (
OperatorBase::HasArgument("dilation_h") &&
OperatorBase::HasArgument("dilation_w")) {
dilation_.push_back(
this->template GetSingleArgument<int>("dilation_h", 0));
dilation_.push_back(
this->template GetSingleArgument<int>("dilation_w", 0));
}
if (OperatorBase::HasArgument("pad")) {
CAFFE_ENFORCE(
legacy_pad_ != LegacyPadding::VALID &&
legacy_pad_ != LegacyPadding::SAME,
"If you use legacy padding VALID or SAME, you should not specify "
"any specific padding values.");
pads_.resize(4, this->template GetSingleArgument<int>("pad", 0));
} else if (
OperatorBase::HasArgument("pad_t") &&
OperatorBase::HasArgument("pad_l") &&
OperatorBase::HasArgument("pad_b") &&
OperatorBase::HasArgument("pad_r")) {
CAFFE_ENFORCE(
legacy_pad_ != LegacyPadding::VALID &&
legacy_pad_ != LegacyPadding::SAME,
"If you use legacy padding VALID or SAME, you should not specify "
"any specific padding values.");
pads_.push_back(this->template GetSingleArgument<int>("pad_t", 0));
pads_.push_back(this->template GetSingleArgument<int>("pad_l", 0));
pads_.push_back(this->template GetSingleArgument<int>("pad_b", 0));
pads_.push_back(this->template GetSingleArgument<int>("pad_r", 0));
}
// Fill default values.
if (kernel_.size() == 0) {
kernel_.assign({0, 0});
}
if (stride_.size() == 0) {
stride_.resize(kernel_.size(), 1);
}
if (pads_.size() == 0) {
pads_.resize(kernel_.size() * 2, 0);
}
if (dilation_.size() == 0) {
dilation_.resize(kernel_.size(), 1);
}
CAFFE_ENFORCE_EQ(stride_.size(), kernel_.size());
CAFFE_ENFORCE_EQ(dilation_.size(), kernel_.size());
if (legacy_pad_ != LegacyPadding::VALID &&
legacy_pad_ != LegacyPadding::SAME) {
CAFFE_ENFORCE_EQ(pads_.size(), 2 * kernel_.size());
}
if (global_pooling_) {
for (size_t dim = 0; dim < kernel_.size(); ++dim) {
CAFFE_ENFORCE(
pads_[2 * dim] == 0 && pads_[2 * dim + 1] == 0 &&
dilation_[dim] == 1 && stride_[dim] == 1,
"If global_pooling is set pad, dilation and stride shouldn't be set.");
}
}
// Check kernel only if we are doing conv or pooling. The reason is that a
// few other ops, like PadImage, are also using this base class. We really
// need to clean this up.
if (operator_def.name().find("Conv") == 0 ||
operator_def.name().find("Pool") != std::string::npos) {
for (size_t dim = 0; dim < kernel_.size(); ++dim) {
CAFFE_ENFORCE_GE(pads_[dim], 0);
CAFFE_ENFORCE_GE(pads_[kernel_.size() + dim], 0);
CAFFE_ENFORCE(
kernel_[dim],
"If you are doing convolution or pooling, you will need to set "
"explicitly the kernel size.");
}
}
for (size_t dim = 0; dim < kernel_.size(); ++dim) {
CAFFE_ENFORCE_GE(kernel_[dim], 0);
CAFFE_ENFORCE_GE(dilation_[dim], 0);
CAFFE_ENFORCE_GE(stride_[dim], 0);
}
}
// Returns the input image dimensions for the current storage order type.
vector<int> GetDims(const Tensor& input) {
vector<int> dims;
switch (order_) {
case StorageOrder::NCHW:
dims.assign(input.sizes().begin() + 2, input.sizes().end());
break;
case StorageOrder::NHWC:
dims.assign(input.sizes().begin() + 1, input.sizes().end() - 1);
break;
default:
CAFFE_THROW("Unknown storage order : ", order_);
}
return dims;
}
// Returns the size of the input image for the current storage type.
int GetDimsSize(const Tensor& input) {
int size = 0;
switch (order_) {
case StorageOrder::NCHW:
size = std::accumulate(
input.sizes().begin() + 2,
input.sizes().end(),
1,
std::multiplies<int>());
break;
case StorageOrder::NHWC:
size = std::accumulate(
input.sizes().begin() + 1,
input.sizes().end() - 1,
1,
std::multiplies<int>());
break;
default:
CAFFE_THROW("Unknown storage order : ", order_);
}
return size;
}
// Gets the output size. The output channel is manually provided since
// it may not be identical to the input channels.
// This function can be used in the forward functions to obtain the output
// sizes.
// Note(jiayq): the templatization of this function is mainly to help
// implementations that do not use first-class Tensor objects, such as the
// MKL operator. One can still call this function with dummy
// Tensor objects in order to obtain the sizes.
std::vector<int64_t> GetOutputSize(const Tensor& input, int output_channel) {
CAFFE_ENFORCE_GE(input.dim(), 2);
const int inner_size = input.size_from_dim(1);
CAFFE_ENFORCE_GT(inner_size, 0);
std::vector<int64_t> output_dims;
InferOutputSize64(
input.sizes(),
output_channel,
order_,
global_pooling_,
legacy_pad_,
dilation_,
stride_,
&kernel_,
&pads_,
&output_dims);
return output_dims;
}
void SetOutputSize(const Tensor& input, Tensor* output, int output_channel) {
const int inner_size = input.size_from_dim(1);
CAFFE_ENFORCE_GT(inner_size, 0);
std::vector<int> output_dims;
InferOutputSize(
input.sizes(),
output_channel,
order_,
global_pooling_,
legacy_pad_,
dilation_,
stride_,
&kernel_,
&pads_,
&output_dims);
output->Resize(output_dims);
}
// Helper function that is also called from OperatorSchema. Modified
// kernel parameters and output output_dims and channel_first.
static void InferOutputSize(
const at::IntArrayRef& input_dims,
const int output_channel,
const StorageOrder order,
const bool global_pooling,
const LegacyPadding legacy_pad,
const std::vector<int>& dilation,
const std::vector<int>& stride,
std::vector<int>* kernel,
std::vector<int>* pads,
std::vector<int>* output_dims) {
CAFFE_ENFORCE_NE(order, StorageOrder::UNKNOWN);
const int ndim = input_dims.size() - 2;
output_dims->resize(ndim + 2);
output_dims->front() = input_dims.front();
if (order == StorageOrder::NCHW) {
output_dims->at(1) = output_channel;
} else {
output_dims->back() = output_channel;
}
const int offset = order == StorageOrder::NCHW ? 2 : 1;
if (global_pooling) {
std::copy_n(input_dims.cbegin() + offset, ndim, kernel->begin());
std::fill_n(output_dims->begin() + offset, ndim, 1LL);
} else {
for (int i = 0; i < ndim; ++i) {
ComputeSizeAndPad(
input_dims[i + offset],
stride[i],
kernel->at(i),
dilation[i],
legacy_pad,
&pads->at(i),
&pads->at(i + ndim),
&output_dims->at(i + offset));
}
}
}
static void InferOutputSize64(
const at::IntArrayRef& input_dims,
const int output_channel,
const StorageOrder order,
const bool global_pooling,
const LegacyPadding legacy_pad,
const std::vector<int>& dilation,
const std::vector<int>& stride,
std::vector<int>* kernel,
std::vector<int>* pads,
std::vector<int64_t>* output_dims) {
CAFFE_ENFORCE_NE(order, StorageOrder::UNKNOWN);
const int ndim = input_dims.size() - 2;
output_dims->resize(ndim + 2);
output_dims->front() = input_dims.front();
if (order == StorageOrder::NCHW) {
output_dims->at(1) = output_channel;
} else {
output_dims->back() = output_channel;
}
const int offset = order == StorageOrder::NCHW ? 2 : 1;
if (global_pooling) {
std::copy_n(input_dims.cbegin() + offset, ndim, kernel->begin());
std::fill_n(output_dims->begin() + offset, ndim, 1LL);
} else {
for (int i = 0; i < ndim; ++i) {
ComputeSizeAndPad64(
input_dims[i + offset],
stride[i],
kernel->at(i),
dilation[i],
legacy_pad,
&pads->at(i),
&pads->at(i + ndim),
&output_dims->at(i + offset));
}
}
}
// ComputePads could be used in backward functions to figure out the padding
// values for the given input.
void ComputePads(const vector<int>& dims) {
if (global_pooling_) {
kernel_ = dims;
} else if (legacy_pad_ != LegacyPadding::NOTSET) {
int output_unused;
for (int dim = 0; dim < dims.size(); ++dim) {
ComputeSizeAndPad(
dims[dim],
Loading ...