Repository URL to install this package:
Version:
1.0.0-3 ▾
|
#ifndef CAFFE_VISION_LAYERS_HPP_
#define CAFFE_VISION_LAYERS_HPP_
#include <string>
#include <utility>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/common_layers.hpp"
#include "caffe/data_layers.hpp"
#include "caffe/layer.hpp"
#include "caffe/loss_layers.hpp"
#include "caffe/neuron_layers.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/modified_permutohedral.hpp"
#include <boost/shared_array.hpp>
namespace caffe {
template <typename Dtype>
class BaseConvolutionLayer : public Layer<Dtype> {
public:
explicit BaseConvolutionLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline int MinBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline bool EqualNumBottomTopBlobs() const { return true; }
protected:
// Helper functions that abstract away the column buffer and gemm arguments.
// The last argument in forward_cpu_gemm is so that we can skip the im2col if
// we just called weight_cpu_gemm with the same input.
void forward_cpu_gemm(const Dtype* input, const Dtype* weights,
Dtype* output, bool skip_im2col = false);
void forward_cpu_bias(Dtype* output, const Dtype* bias);
void backward_cpu_gemm(const Dtype* input, const Dtype* weights,
Dtype* output);
void weight_cpu_gemm(const Dtype* input, const Dtype* output, Dtype*
weights);
void backward_cpu_bias(Dtype* bias, const Dtype* input);
#ifndef CPU_ONLY
void forward_gpu_gemm(const Dtype* col_input, const Dtype* weights,
Dtype* output, bool skip_im2col = false);
void forward_gpu_bias(Dtype* output, const Dtype* bias);
void backward_gpu_gemm(const Dtype* input, const Dtype* weights,
Dtype* col_output);
void weight_gpu_gemm(const Dtype* col_input, const Dtype* output, Dtype*
weights);
void backward_gpu_bias(Dtype* bias, const Dtype* input);
#endif
// reverse_dimensions should return true iff we are implementing deconv, so
// that conv helpers know which dimensions are which.
virtual bool reverse_dimensions() = 0;
// Compute height_out_ and width_out_ from other parameters.
virtual void compute_output_shape() = 0;
int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
int num_;
int channels_;
int pad_h_, pad_w_;
int height_, width_;
int group_;
int num_output_;
int height_out_, width_out_;
bool bias_term_;
bool is_1x1_;
private:
// wrap im2col/col2im so we don't have to remember the (long) argument lists
inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) {
im2col_cpu(data, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff);
}
inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
col2im_cpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data);
}
#ifndef CPU_ONLY
inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {
im2col_gpu(data, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff);
}
inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {
col2im_gpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data);
}
#endif
int conv_out_channels_;
int conv_in_channels_;
int conv_out_spatial_dim_;
int conv_in_height_;
int conv_in_width_;
int kernel_dim_;
int weight_offset_;
int col_offset_;
int output_offset_;
//memory reduced change before
//Blob<Dtype> col_buffer_;
//memory reduced change before end
static Blob<Dtype> col_buffer_;
Blob<Dtype> bias_multiplier_;
};
/**
* @brief Convolves the input image with a bank of learned filters,
* and (optionally) adds biases.
*
* Caffe convolves by reduction to matrix multiplication. This achieves
* high-throughput and generality of input and filter dimensions but comes at
* the cost of memory for matrices. This makes use of efficiency in BLAS.
*
* The input is "im2col" transformed to a channel K' x H x W data matrix
* for multiplication with the N x K' x H x W filter matrix to yield a
* N' x H x W output matrix that is then "col2im" restored. K' is the
* input channel * kernel height * kernel width dimension of the unrolled
* inputs so that the im2col matrix has a column for each input region to
* be filtered. col2im restores the output spatial structure by rolling up
* the output channel N' columns of the output matrix.
*/
template <typename Dtype>
class ConvolutionLayer : public BaseConvolutionLayer<Dtype> {
public:
/**
* @param param provides ConvolutionParameter convolution_param,
* with ConvolutionLayer options:
* - num_output. The number of filters.
* - kernel_size / kernel_h / kernel_w. The filter dimensions, given by
* kernel_size for square filters or kernel_h and kernel_w for rectangular
* filters.
* - stride / stride_h / stride_w (\b optional, default 1). The filter
* stride, given by stride_size for equal dimensions or stride_h and stride_w
* for different strides. By default the convolution is dense with stride 1.
* - pad / pad_h / pad_w (\b optional, default 0). The zero-padding for
* convolution, given by pad for equal dimensions or pad_h and pad_w for
* different padding. Input padding is computed implicitly instead of
* actually padding.
* - group (\b optional, default 1). The number of filter groups. Group
* convolution is a method for reducing parameterization by selectively
* connecting input and output channels. The input and output channel dimensions must be divisible
* by the number of groups. For group @f$ \geq 1 @f$, the
* convolutional filters' input and output channels are separated s.t. each
* group takes 1 / group of the input channels and makes 1 / group of the
* output channels. Concretely 4 input channels, 8 output channels, and
* 2 groups separate input channels 1-2 and output channels 1-4 into the
* first group and input channels 3-4 and output channels 5-8 into the second
* group.
* - bias_term (\b optional, default true). Whether to have a bias.
* - engine: convolution has CAFFE (matrix multiplication) and CUDNN (library
* kernels + stream parallelism) engines.
*/
explicit ConvolutionLayer(const LayerParameter& param)
: BaseConvolutionLayer<Dtype>(param) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_CONVOLUTION;
}
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return FilterMap<Dtype>(this->kernel_h_, this->kernel_w_, this->stride_h_,
this->stride_w_, this->pad_h_, this->pad_w_).inv();
}
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual inline bool reverse_dimensions() { return false; }
virtual void compute_output_shape();
};
template <typename Dtype>
class DeconvolutionLayer : public BaseConvolutionLayer<Dtype> {
public:
explicit DeconvolutionLayer(const LayerParameter& param)
: BaseConvolutionLayer<Dtype>(param) {}
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_DECONVOLUTION;
}
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return FilterMap<Dtype>(this->kernel_h_, this->kernel_w_, this->stride_h_,
this->stride_w_, this->pad_h_, this->pad_w_);
}
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual inline bool reverse_dimensions() { return true; }
virtual void compute_output_shape();
};
#ifdef USE_CUDNN
/*
* @brief cuDNN implementation of ConvolutionLayer.
* Fallback to ConvolutionLayer for CPU mode.
*
* cuDNN accelerates convolution through forward kernels for filtering and bias
* plus backward kernels for the gradient w.r.t. the filters, biases, and
* inputs. Caffe + cuDNN further speeds up the computation through forward
* parallelism across groups and backward parallelism across gradients.
*
* The CUDNN engine does not have memory overhead for matrix buffers. For many
* input and filter regimes the CUDNN engine is faster than the CAFFE engine,
* but for fully-convolutional models and large inputs the CAFFE engine can be
* faster as long as it fits in memory.
*/
template <typename Dtype>
class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
public:
explicit CuDNNConvolutionLayer(const LayerParameter& param)
: ConvolutionLayer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual ~CuDNNConvolutionLayer();
protected:
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
cudnnHandle_t* handle_;
cudaStream_t* stream_;
vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
cudnnTensorDescriptor_t bias_desc_;
cudnnFilterDescriptor_t filter_desc_;
vector<cudnnConvolutionDescriptor_t> conv_descs_;
int bottom_offset_, top_offset_, weight_offset_, bias_offset_;
size_t workspaceSizeInBytes;
void *workspace;
};
#endif
/*!
* \brief A helper class for {@link MultiStageMeanfieldLayer} class, which is the Caffe layer that implements the
* CRF-RNN described in the paper: Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* This class itself is not a proper Caffe layer although it behaves like one to some degree.
*
* \authors Sadeep Jayasumana, Bernardino Romera-Paredes, Shuai Zheng, Zhizhong Su.
* \version 1.0
* \date 2015
* \copyright Torr Vision Group, University of Oxford.
* \details If you use this code, please consider citing the paper:
* Shuai Zheng, Sadeep Jayasumana, Bernardino Romera-Paredes, Vibhav Vineet, Zhizhong Su, Dalong Du,
* Chang Huang, Philip H. S. Torr. Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* For more information about CRF-RNN, please visit the project website http://crfasrnn.torr.vision.
*/
template <typename Dtype>
class MeanfieldIteration {
public:
/**
* Must be invoked only once after the construction of the layer.
*/
void OneTimeSetUp(
Blob<Dtype>* const unary_terms,
Blob<Dtype>* const softmax_input,
Blob<Dtype>* const output_blob,
const shared_ptr<ModifiedPermutohedral> spatial_lattice,
const Blob<Dtype>* const spatial_norm);
/**
* Must be invoked before invoking {@link Forward_cpu()}
*/
virtual void PrePass(
const vector<shared_ptr<Blob<Dtype> > >& parameters_to_copy_from,
const vector<shared_ptr<ModifiedPermutohedral> >* const bilateral_lattices,
const Blob<Dtype>* const bilateral_norms);
/**
* Forward pass - to be called during inference.
*/
virtual void Forward_cpu();
/**
* Backward pass - to be called during training.
*/
virtual void Backward_cpu();
// A quick hack. This should be properly encapsulated.
vector<shared_ptr<Blob<Dtype> > >& blobs() {
return blobs_;
}
protected:
vector<shared_ptr<Blob<Dtype> > > blobs_;
int count_;
int num_;
int channels_;
int height_;
int width_;
int num_pixels_;
Blob<Dtype> spatial_out_blob_;
Blob<Dtype> bilateral_out_blob_;
Blob<Dtype> pairwise_;
Blob<Dtype> softmax_input_;
Blob<Dtype> prob_;
Blob<Dtype> message_passing_;
vector<Blob<Dtype>*> softmax_top_vec_;
vector<Blob<Dtype>*> softmax_bottom_vec_;
vector<Blob<Dtype>*> sum_top_vec_;
vector<Blob<Dtype>*> sum_bottom_vec_;
shared_ptr<SoftmaxLayer<Dtype> > softmax_layer_;
shared_ptr<EltwiseLayer<Dtype> > sum_layer_;
shared_ptr<ModifiedPermutohedral> spatial_lattice_;
const vector<shared_ptr<ModifiedPermutohedral> >* bilateral_lattices_;
const Blob<Dtype>* spatial_norm_;
const Blob<Dtype>* bilateral_norms_;
};
/*!
* \brief The Caffe layer that implements the CRF-RNN described in the paper:
* Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* \authors Sadeep Jayasumana, Bernardino Romera-Paredes, Shuai Zheng, Zhizhong Su.
* \version 1.0
* \date 2015
* \copyright Torr Vision Group, University of Oxford.
* \details If you use this code, please consider citing the paper:
* Shuai Zheng, Sadeep Jayasumana, Bernardino Romera-Paredes, Vibhav Vineet, Zhizhong Su, Dalong Du,
* Chang Huang, Philip H. S. Torr. Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* For more information about CRF-RNN, please visit the project website http://crfasrnn.torr.vision.
*/
template <typename Dtype>
class MultiStageMeanfieldLayer : public Layer<Dtype> {
public:
explicit MultiStageMeanfieldLayer(const LayerParameter& param) : Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_MULTI_STAGE_MEANFIELD;
}
virtual inline int ExactNumBottomBlobs() const { return 3; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void compute_spatial_kernel(float* const output_kernel);
virtual void compute_bilateral_kernel(const Blob<Dtype>* const rgb_blob, const int n, float* const output_kernel);
int count_;
int num_;
int channels_;
int height_;
int width_;
int num_pixels_;
Dtype theta_alpha_;
Dtype theta_beta_;
Dtype theta_gamma_;
int num_iterations_;
boost::shared_array<Dtype> norm_feed_;
Blob<Dtype> spatial_norm_;
Blob<Dtype> bilateral_norms_;
vector<Blob<Dtype>*> split_layer_bottom_vec_;
vector<Blob<Dtype>*> split_layer_top_vec_;
vector<shared_ptr<Blob<Dtype> > > split_layer_out_blobs_;
vector<shared_ptr<Blob<Dtype> > > iteration_output_blobs_;
vector<shared_ptr<MeanfieldIteration<Dtype> > > meanfield_iterations_;
shared_ptr<SplitLayer<Dtype> > split_layer_;
shared_ptr<ModifiedPermutohedral> spatial_lattice_;
boost::shared_array<float> bilateral_kernel_buffer_;
vector<shared_ptr<ModifiedPermutohedral> > bilateral_lattices_;
};
/**
* @brief A helper for image operations that rearranges image regions into
* column vectors. Used by ConvolutionLayer to perform convolution
* by matrix multiplication.
*
* TODO(dox): thorough documentation for Forward, Backward, and proto params.
*/
template <typename Dtype>
class Im2colLayer : public Layer<Dtype> {
public:
explicit Im2colLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_IM2COL;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
int channels_;
int height_, width_;
int pad_h_, pad_w_;
};
// Forward declare PoolingLayer and SplitLayer for use in LRNLayer.
template <typename Dtype> class PoolingLayer;
template <typename Dtype> class SplitLayer;
/**
* @brief Normalize the input in a local region across or within feature maps.
*
* TODO(dox): thorough documentation for Forward, Backward, and proto params.
*/
template <typename Dtype>
class LRNLayer : public Layer<Dtype> {
public:
explicit LRNLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_LRN;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return DiagonalAffineMap<Dtype>::identity(2);
}
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void CrossChannelForward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void CrossChannelForward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void WithinChannelForward(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void CrossChannelBackward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void CrossChannelBackward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void WithinChannelBackward(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
int size_;
int pre_pad_;
Dtype alpha_;
Dtype beta_;
Dtype k_;
int num_;
int channels_;
int height_;
int width_;
// Fields used for normalization ACROSS_CHANNELS
// scale_ stores the intermediate summing results
Blob<Dtype> scale_;
// Fields used for normalization WITHIN_CHANNEL
shared_ptr<SplitLayer<Dtype> > split_layer_;
vector<Blob<Dtype>*> split_top_vec_;
shared_ptr<PowerLayer<Dtype> > square_layer_;
Blob<Dtype> square_input_;
Blob<Dtype> square_output_;
vector<Blob<Dtype>*> square_bottom_vec_;
vector<Blob<Dtype>*> square_top_vec_;
shared_ptr<PoolingLayer<Dtype> > pool_layer_;
Blob<Dtype> pool_output_;
vector<Blob<Dtype>*> pool_top_vec_;
shared_ptr<PowerLayer<Dtype> > power_layer_;
Blob<Dtype> power_output_;
vector<Blob<Dtype>*> power_top_vec_;
shared_ptr<EltwiseLayer<Dtype> > product_layer_;
Blob<Dtype> product_input_;
vector<Blob<Dtype>*> product_bottom_vec_;
};
/**
* @brief Pools the input image by taking the max, average, etc. within regions.
*
* TODO(dox): thorough documentation for Forward, Backward, and proto params.
*/
template <typename Dtype>
class PoolingLayer : public Layer<Dtype> {
public:
explicit PoolingLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_POOLING;
}
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
// MAX POOL layers can output an extra top blob for the mask;
// others can only output the pooled inputs.
virtual inline int MaxTopBlobs() const {
return (this->layer_param_.pooling_param().pool() ==
PoolingParameter_PoolMethod_MAX) ? 2 : 1;
}
virtual inline DiagonalAffineMap<Dtype> coord_map() {
return FilterMap<Dtype>(kernel_h_, kernel_w_, stride_h_, stride_w_,
pad_h_, pad_w_).inv();
}
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
int kernel_h_, kernel_w_;
int stride_h_, stride_w_;
int pad_h_, pad_w_;
int channels_;
int height_, width_;
int pooled_height_, pooled_width_;
bool global_pooling_;
Blob<Dtype> rand_idx_;
Blob<int> max_idx_;
};
#ifdef USE_CUDNN
/*
* @brief cuDNN implementation of PoolingLayer.
* Fallback to PoolingLayer for CPU mode.
*/
template <typename Dtype>
class CuDNNPoolingLayer : public PoolingLayer<Dtype> {
public:
explicit CuDNNPoolingLayer(const LayerParameter& param)
: PoolingLayer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual ~CuDNNPoolingLayer();
// Currently, cuDNN does not support the extra top blob.
virtual inline int MinTopBlobs() const { return -1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
cudnnHandle_t handle_;
cudnnTensorDescriptor_t bottom_desc_, top_desc_;
cudnnPoolingDescriptor_t pooling_desc_;
cudnnPoolingMode_t mode_;
};
#endif
template <typename Dtype>
class CropLayer : public Layer<Dtype> {
public:
explicit CropLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline LayerParameter_LayerType type() const {
return LayerParameter_LayerType_CROP;
}
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
virtual inline DiagonalAffineMap<Dtype> coord_map() {
vector<pair<Dtype, Dtype> > coefs;
coefs.push_back(make_pair(1, - crop_h_));
coefs.push_back(make_pair(1, - crop_w_));
return DiagonalAffineMap<Dtype>(coefs);
}
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
int crop_h_, crop_w_;
};
} // namespace caffe
#endif // CAFFE_VISION_LAYERS_HPP_