Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

neilisaac / torch   python

Repository URL to install this package:

Version: 1.8.0 

/ include / caffe2 / operators / conv_pool_op_base.h

#ifndef CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
#define CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_

#include <algorithm>
#include <vector>

#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/operator.h"
#include "caffe2/proto/caffe2_legacy.pb.h"
#include "caffe2/utils/math.h"

// This macro is here just to allow us to experiment with padding values that
// determines, when we have an odd number of pads, which side gets the one
// additional pad value, the head side, or the tail side. Setting it to false
// will enable the TensorFlow behavior, and setting it to true will enable
// a behavior more consistent with Caffe and CuDNN.
// This only affects the case when you set legacy pad to VALID or SAME. The
// behavior inherits from the early designs of Google's CNN implementation,
// where padding values are implicitly calculated instead of explicitly
// specified. This is still the case with TensorFlow. Many frameworks have
// followed a slightly different approach of explicitly giving padding values,
// in which case the value of this constant value does not matter.
const bool CAFFE2_PAD_HEAD_MORE = false;

namespace caffe2 {

template <class Context>
class ConvPoolOpBase : public Operator<Context> {
 public:
  USE_OPERATOR_CONTEXT_FUNCTIONS;
  explicit ConvPoolOpBase(const OperatorDef& operator_def, Workspace* ws)
      : Operator<Context>(operator_def, ws),
        legacy_pad_(
            static_cast<LegacyPadding>(this->template GetSingleArgument<int>(
                "legacy_pad",
                LegacyPadding::NOTSET))),
        global_pooling_(
            this->template GetSingleArgument<int>("global_pooling", 0)),
        kernel_(this->template GetRepeatedArgument<int>("kernels")),
        dilation_(this->template GetRepeatedArgument<int>("dilations")),
        stride_(this->template GetRepeatedArgument<int>("strides")),
        pads_(this->template GetRepeatedArgument<int>("pads")),
        float16_compute_(
            this->template GetSingleArgument<bool>("float16_compute", false)),
        group_(this->template GetSingleArgument<int>("group", 1)),
        order_(StringToStorageOrder(
            this->template GetSingleArgument<string>("order", "NCHW"))),
        shared_buffer_(
            this->template GetSingleArgument<int>("shared_buffer", 0)),
        ws_(ws) {
    // For the padding, they should either be the legacy padding strategy
    // (VALID or SAME), or an explicit, non-negative value.
    if (legacy_pad_ == LegacyPadding::VALID ||
        legacy_pad_ == LegacyPadding::SAME) {
      CAFFE_ENFORCE(
          !OperatorBase::HasArgument("pads"),
          "If you use legacy padding VALID or SAME, you should not specify "
          "any specific padding values.");
    }

    // Get old arguments values.
    if (OperatorBase::HasArgument("kernel")) {
      kernel_.resize(2, this->template GetSingleArgument<int>("kernel", 0));
    } else if (
        OperatorBase::HasArgument("kernel_h") &&
        OperatorBase::HasArgument("kernel_w")) {
      kernel_.push_back(this->template GetSingleArgument<int>("kernel_h", 0));
      kernel_.push_back(this->template GetSingleArgument<int>("kernel_w", 0));
    }

    if (OperatorBase::HasArgument("stride")) {
      stride_.resize(2, this->template GetSingleArgument<int>("stride", 0));
    } else if (
        OperatorBase::HasArgument("stride_h") &&
        OperatorBase::HasArgument("stride_w")) {
      stride_.push_back(this->template GetSingleArgument<int>("stride_h", 0));
      stride_.push_back(this->template GetSingleArgument<int>("stride_w", 0));
    }

    if (OperatorBase::HasArgument("dilation")) {
      dilation_.resize(2, this->template GetSingleArgument<int>("dilation", 0));
    } else if (
        OperatorBase::HasArgument("dilation_h") &&
        OperatorBase::HasArgument("dilation_w")) {
      dilation_.push_back(
          this->template GetSingleArgument<int>("dilation_h", 0));
      dilation_.push_back(
          this->template GetSingleArgument<int>("dilation_w", 0));
    }

    if (OperatorBase::HasArgument("pad")) {
      CAFFE_ENFORCE(
          legacy_pad_ != LegacyPadding::VALID &&
              legacy_pad_ != LegacyPadding::SAME,
          "If you use legacy padding VALID or SAME, you should not specify "
          "any specific padding values.");
      pads_.resize(4, this->template GetSingleArgument<int>("pad", 0));
    } else if (
        OperatorBase::HasArgument("pad_t") &&
        OperatorBase::HasArgument("pad_l") &&
        OperatorBase::HasArgument("pad_b") &&
        OperatorBase::HasArgument("pad_r")) {
      CAFFE_ENFORCE(
          legacy_pad_ != LegacyPadding::VALID &&
              legacy_pad_ != LegacyPadding::SAME,
          "If you use legacy padding VALID or SAME, you should not specify "
          "any specific padding values.");
      pads_.push_back(this->template GetSingleArgument<int>("pad_t", 0));
      pads_.push_back(this->template GetSingleArgument<int>("pad_l", 0));
      pads_.push_back(this->template GetSingleArgument<int>("pad_b", 0));
      pads_.push_back(this->template GetSingleArgument<int>("pad_r", 0));
    }

    // Fill default values.
    if (kernel_.size() == 0) {
      kernel_.assign({0, 0});
    }

    if (stride_.size() == 0) {
      stride_.resize(kernel_.size(), 1);
    }

    if (pads_.size() == 0) {
      pads_.resize(kernel_.size() * 2, 0);
    }

    if (dilation_.size() == 0) {
      dilation_.resize(kernel_.size(), 1);
    }

    CAFFE_ENFORCE_EQ(stride_.size(), kernel_.size());
    CAFFE_ENFORCE_EQ(dilation_.size(), kernel_.size());

    if (legacy_pad_ != LegacyPadding::VALID &&
        legacy_pad_ != LegacyPadding::SAME) {
      CAFFE_ENFORCE_EQ(pads_.size(), 2 * kernel_.size());
    }

    if (global_pooling_) {
      for (size_t dim = 0; dim < kernel_.size(); ++dim) {
        CAFFE_ENFORCE(
            pads_[2 * dim] == 0 && pads_[2 * dim + 1] == 0 &&
                dilation_[dim] == 1 && stride_[dim] == 1,
            "If global_pooling is set pad, dilation and stride shouldn't be set.");
      }
    }

    // Check kernel only if we are doing conv or pooling. The reason is that a
    // few other ops, like PadImage, are also using this base class. We really
    // need to clean this up.
    if (operator_def.name().find("Conv") == 0 ||
        operator_def.name().find("Pool") != std::string::npos) {
      for (size_t dim = 0; dim < kernel_.size(); ++dim) {
        CAFFE_ENFORCE_GE(pads_[dim], 0);
        CAFFE_ENFORCE_GE(pads_[kernel_.size() + dim], 0);
        CAFFE_ENFORCE(
            kernel_[dim],
            "If you are doing convolution or pooling, you will need to set "
            "explicitly the kernel size.");
      }
    }

    for (size_t dim = 0; dim < kernel_.size(); ++dim) {
      CAFFE_ENFORCE_GE(kernel_[dim], 0);
      CAFFE_ENFORCE_GE(dilation_[dim], 0);
      CAFFE_ENFORCE_GE(stride_[dim], 0);
    }
  }

  // Returns the input image dimensions for the current storage order type.
  vector<int> GetDims(const Tensor& input) {
    vector<int> dims;
    switch (order_) {
      case StorageOrder::NCHW:
        dims.assign(input.sizes().begin() + 2, input.sizes().end());
        break;
      case StorageOrder::NHWC:
        dims.assign(input.sizes().begin() + 1, input.sizes().end() - 1);
        break;
      default:
        CAFFE_THROW("Unknown storage order : ", order_);
    }
    return dims;
  }

  // Returns the size of the input image for the current storage type.
  int GetDimsSize(const Tensor& input) {
    int size = 0;
    switch (order_) {
      case StorageOrder::NCHW:
        size = std::accumulate(
            input.sizes().begin() + 2,
            input.sizes().end(),
            1,
            std::multiplies<int>());
        break;
      case StorageOrder::NHWC:
        size = std::accumulate(
            input.sizes().begin() + 1,
            input.sizes().end() - 1,
            1,
            std::multiplies<int>());
        break;
      default:
        CAFFE_THROW("Unknown storage order : ", order_);
    }
    return size;
  }

  // Gets the output size. The output channel is manually provided since
  // it may not be identical to the input channels.
  // This function can be used in the forward functions to obtain the output
  // sizes.
  // Note(jiayq): the templatization of this function is mainly to help
  // implementations that do not use first-class Tensor objects, such as the
  // MKL operator. One can still call this function with dummy
  // Tensor objects in order to obtain the sizes.
  std::vector<int64_t> GetOutputSize(const Tensor& input, int output_channel) {
    CAFFE_ENFORCE_GE(input.dim(), 2);
    const int inner_size = input.size_from_dim(1);
    CAFFE_ENFORCE_GT(inner_size, 0);
    std::vector<int64_t> output_dims;
    InferOutputSize64(
        input.sizes(),
        output_channel,
        order_,
        global_pooling_,
        legacy_pad_,
        dilation_,
        stride_,
        &kernel_,
        &pads_,
        &output_dims);
    return output_dims;
  }

  void SetOutputSize(const Tensor& input, Tensor* output, int output_channel) {
    const int inner_size = input.size_from_dim(1);
    CAFFE_ENFORCE_GT(inner_size, 0);
    std::vector<int> output_dims;
    InferOutputSize(
        input.sizes(),
        output_channel,
        order_,
        global_pooling_,
        legacy_pad_,
        dilation_,
        stride_,
        &kernel_,
        &pads_,
        &output_dims);
    output->Resize(output_dims);
  }

  // Helper function that is also called from OperatorSchema. Modified
  // kernel parameters and output output_dims and channel_first.
  static void InferOutputSize(
      const at::IntArrayRef& input_dims,
      const int output_channel,
      const StorageOrder order,
      const bool global_pooling,
      const LegacyPadding legacy_pad,
      const std::vector<int>& dilation,
      const std::vector<int>& stride,
      std::vector<int>* kernel,
      std::vector<int>* pads,
      std::vector<int>* output_dims) {
    CAFFE_ENFORCE_NE(order, StorageOrder::UNKNOWN);
    const int ndim = input_dims.size() - 2;
    output_dims->resize(ndim + 2);
    output_dims->front() = input_dims.front();
    if (order == StorageOrder::NCHW) {
      output_dims->at(1) = output_channel;
    } else {
      output_dims->back() = output_channel;
    }
    const int offset = order == StorageOrder::NCHW ? 2 : 1;
    if (global_pooling) {
      std::copy_n(input_dims.cbegin() + offset, ndim, kernel->begin());
      std::fill_n(output_dims->begin() + offset, ndim, 1LL);
    } else {
      for (int i = 0; i < ndim; ++i) {
        ComputeSizeAndPad(
            input_dims[i + offset],
            stride[i],
            kernel->at(i),
            dilation[i],
            legacy_pad,
            &pads->at(i),
            &pads->at(i + ndim),
            &output_dims->at(i + offset));
      }
    }
  }

  static void InferOutputSize64(
      const at::IntArrayRef& input_dims,
      const int output_channel,
      const StorageOrder order,
      const bool global_pooling,
      const LegacyPadding legacy_pad,
      const std::vector<int>& dilation,
      const std::vector<int>& stride,
      std::vector<int>* kernel,
      std::vector<int>* pads,
      std::vector<int64_t>* output_dims) {
    CAFFE_ENFORCE_NE(order, StorageOrder::UNKNOWN);
    const int ndim = input_dims.size() - 2;
    output_dims->resize(ndim + 2);
    output_dims->front() = input_dims.front();
    if (order == StorageOrder::NCHW) {
      output_dims->at(1) = output_channel;
    } else {
      output_dims->back() = output_channel;
    }
    const int offset = order == StorageOrder::NCHW ? 2 : 1;
    if (global_pooling) {
      std::copy_n(input_dims.cbegin() + offset, ndim, kernel->begin());
      std::fill_n(output_dims->begin() + offset, ndim, 1LL);
    } else {
      for (int i = 0; i < ndim; ++i) {
        ComputeSizeAndPad64(
            input_dims[i + offset],
            stride[i],
            kernel->at(i),
            dilation[i],
            legacy_pad,
            &pads->at(i),
            &pads->at(i + ndim),
            &output_dims->at(i + offset));
      }
    }
  }

  // ComputePads could be used in backward functions to figure out the padding
  // values for the given input.
  void ComputePads(const vector<int>& dims) {
    if (global_pooling_) {
      kernel_ = dims;
    } else if (legacy_pad_ != LegacyPadding::NOTSET) {
      int output_unused;
      for (int dim = 0; dim < dims.size(); ++dim) {
        ComputeSizeAndPad(
            dims[dim],
Loading ...