Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

neilisaac / torch   python

Repository URL to install this package:

Version: 1.8.0 

/ include / caffe2 / operators / generate_proposals_op_util_boxes.h

#ifndef CAFFE2_OPERATORS_UTILS_BOXES_H_
#define CAFFE2_OPERATORS_UTILS_BOXES_H_

#include "caffe2/utils/eigen_utils.h"
#include "caffe2/utils/math.h"

// Bounding box utils for generate_proposals_op
// Reference: facebookresearch/Detectron/detectron/utils/boxes.py

namespace caffe2 {
namespace utils {

// Default value for minimum bounding box width and height after bounding box
//     transformation (bbox_transform()) in log-space
const float BBOX_XFORM_CLIP_DEFAULT = log(1000.0 / 16.0);
const float PI = 3.14159265358979323846;

// Forward transform that maps proposal boxes to ground-truth boxes using
//     bounding-box regression deltas.
// boxes: pixel coordinates of the bounding boxes
//     size (M, 4), format [x1; y1; x2; y2], x2 >= x1, y2 >= y1
// deltas: bounding box translations and scales
//     size (M, 4), format [dx; dy; dw; dh]
//     dx, dy: scale-invariant translation of the center of the bounding box
//     dw, dh: log-space scaling of the width and height of the bounding box
// weights: weights [wx, wy, ww, wh] for the deltas
// bbox_xform_clip: minimum bounding box width and height in log-space after
//     transofmration
// correct_transform_coords: Correct bounding box transform coordates. Set to
//     true to match the detectron code, set to false for backward compatibility
// return: pixel coordinates of the bounding boxes
//     size (M, 4), format [x1; y1; x2; y2]
// see "Rich feature hierarchies for accurate object detection and semantic
//     segmentation" Appendix C for more details
// reference: detectron/lib/utils/boxes.py bbox_transform()
template <class Derived1, class Derived2>
EArrXXt<typename Derived1::Scalar> bbox_transform_upright(
    const Eigen::ArrayBase<Derived1>& boxes,
    const Eigen::ArrayBase<Derived2>& deltas,
    const std::vector<typename Derived2::Scalar>& weights =
        std::vector<typename Derived2::Scalar>{1.0, 1.0, 1.0, 1.0},
    const float bbox_xform_clip = BBOX_XFORM_CLIP_DEFAULT,
    const bool legacy_plus_one = false) {
  using T = typename Derived1::Scalar;
  using EArrXX = EArrXXt<T>;
  using EArrX = EArrXt<T>;

  if (boxes.rows() == 0) {
    return EArrXX::Zero(T(0), deltas.cols());
  }

  CAFFE_ENFORCE_EQ(boxes.rows(), deltas.rows());
  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
  CAFFE_ENFORCE_EQ(deltas.cols(), 4);

  EArrX widths = boxes.col(2) - boxes.col(0) + T(int(legacy_plus_one));
  EArrX heights = boxes.col(3) - boxes.col(1) + T(int(legacy_plus_one));
  auto ctr_x = boxes.col(0) + T(0.5) * widths;
  auto ctr_y = boxes.col(1) + T(0.5) * heights;

  auto dx = deltas.col(0).template cast<T>() / weights[0];
  auto dy = deltas.col(1).template cast<T>() / weights[1];
  auto dw =
      (deltas.col(2).template cast<T>() / weights[2]).cwiseMin(bbox_xform_clip);
  auto dh =
      (deltas.col(3).template cast<T>() / weights[3]).cwiseMin(bbox_xform_clip);

  EArrX pred_ctr_x = dx * widths + ctr_x;
  EArrX pred_ctr_y = dy * heights + ctr_y;
  EArrX pred_w = dw.exp() * widths;
  EArrX pred_h = dh.exp() * heights;

  EArrXX pred_boxes = EArrXX::Zero(deltas.rows(), deltas.cols());
  // x1
  pred_boxes.col(0) = pred_ctr_x - T(0.5) * pred_w;
  // y1
  pred_boxes.col(1) = pred_ctr_y - T(0.5) * pred_h;
  // x2
  pred_boxes.col(2) = pred_ctr_x + T(0.5) * pred_w - T(int(legacy_plus_one));
  // y2
  pred_boxes.col(3) = pred_ctr_y + T(0.5) * pred_h - T(int(legacy_plus_one));

  return pred_boxes;
}

// Like bbox_transform_upright, but works on rotated boxes.
// boxes: pixel coordinates of the bounding boxes
//     size (M, 5), format [ctr_x; ctr_y; width; height; angle (in degrees)]
// deltas: bounding box translations and scales
//     size (M, 5), format [dx; dy; dw; dh; da]
//     dx, dy: scale-invariant translation of the center of the bounding box
//     dw, dh: log-space scaling of the width and height of the bounding box
//     da: delta for angle in radians
// return: pixel coordinates of the bounding boxes
//     size (M, 5), format [ctr_x; ctr_y; width; height; angle (in degrees)]
template <class Derived1, class Derived2>
EArrXXt<typename Derived1::Scalar> bbox_transform_rotated(
    const Eigen::ArrayBase<Derived1>& boxes,
    const Eigen::ArrayBase<Derived2>& deltas,
    const std::vector<typename Derived2::Scalar>& weights =
        std::vector<typename Derived2::Scalar>{1.0, 1.0, 1.0, 1.0},
    const float bbox_xform_clip = BBOX_XFORM_CLIP_DEFAULT,
    const bool angle_bound_on = true,
    const int angle_bound_lo = -90,
    const int angle_bound_hi = 90) {
  using T = typename Derived1::Scalar;
  using EArrXX = EArrXXt<T>;

  if (boxes.rows() == 0) {
    return EArrXX::Zero(T(0), deltas.cols());
  }

  CAFFE_ENFORCE_EQ(boxes.rows(), deltas.rows());
  CAFFE_ENFORCE_EQ(boxes.cols(), 5);
  CAFFE_ENFORCE_EQ(deltas.cols(), 5);

  const auto& ctr_x = boxes.col(0);
  const auto& ctr_y = boxes.col(1);
  const auto& widths = boxes.col(2);
  const auto& heights = boxes.col(3);
  const auto& angles = boxes.col(4);

  auto dx = deltas.col(0).template cast<T>() / weights[0];
  auto dy = deltas.col(1).template cast<T>() / weights[1];
  auto dw =
      (deltas.col(2).template cast<T>() / weights[2]).cwiseMin(bbox_xform_clip);
  auto dh =
      (deltas.col(3).template cast<T>() / weights[3]).cwiseMin(bbox_xform_clip);
  // Convert back to degrees
  auto da = deltas.col(4).template cast<T>() * 180.0 / PI;

  EArrXX pred_boxes = EArrXX::Zero(deltas.rows(), deltas.cols());
  // new ctr_x
  pred_boxes.col(0) = dx * widths + ctr_x;
  // new ctr_y
  pred_boxes.col(1) = dy * heights + ctr_y;
  // new width
  pred_boxes.col(2) = dw.exp() * widths;
  // new height
  pred_boxes.col(3) = dh.exp() * heights;
  // new angle
  pred_boxes.col(4) = da + angles;

  if (angle_bound_on) {
    // Normalize angle to be within [angle_bound_lo, angle_bound_hi].
    // Deltas are guaranteed to be <= period / 2 while computing training
    // targets by bbox_transform_inv.
    const int period = angle_bound_hi - angle_bound_lo;
    CAFFE_ENFORCE(period > 0 && period % 180 == 0);
    auto angles = pred_boxes.col(4);
    for (int i = 0; i < angles.size(); ++i) {
      if (angles[i] < angle_bound_lo) {
        angles[i] += T(period);
      } else if (angles[i] > angle_bound_hi) {
        angles[i] -= T(period);
      }
    }
  }

  return pred_boxes;
}

template <class Derived1, class Derived2>
EArrXXt<typename Derived1::Scalar> bbox_transform(
    const Eigen::ArrayBase<Derived1>& boxes,
    const Eigen::ArrayBase<Derived2>& deltas,
    const std::vector<typename Derived2::Scalar>& weights =
        std::vector<typename Derived2::Scalar>{1.0, 1.0, 1.0, 1.0},
    const float bbox_xform_clip = BBOX_XFORM_CLIP_DEFAULT,
    const bool legacy_plus_one = false,
    const bool angle_bound_on = true,
    const int angle_bound_lo = -90,
    const int angle_bound_hi = 90) {
  CAFFE_ENFORCE(boxes.cols() == 4 || boxes.cols() == 5);
  if (boxes.cols() == 4) {
    // Upright boxes
    return bbox_transform_upright(
        boxes, deltas, weights, bbox_xform_clip, legacy_plus_one);
  } else {
    // Rotated boxes with angle info
    return bbox_transform_rotated(
        boxes,
        deltas,
        weights,
        bbox_xform_clip,
        angle_bound_on,
        angle_bound_lo,
        angle_bound_hi);
  }
}

template <class Derived>
EArrXXt<typename Derived::Scalar> bbox_xyxy_to_ctrwh(
    const Eigen::ArrayBase<Derived>& boxes,
    bool legacy_plus_one = false) {
  CAFFE_ENFORCE_EQ(boxes.cols(), 4);

  const auto& x1 = boxes.col(0);
  const auto& y1 = boxes.col(1);
  const auto& x2 = boxes.col(2);
  const auto& y2 = boxes.col(3);

  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), 4);
  ret.col(0) = (x1 + x2) / 2.0; // x_ctr
  ret.col(1) = (y1 + y2) / 2.0; // y_ctr
  ret.col(2) = x2 - x1 + int(legacy_plus_one); // w
  ret.col(3) = y2 - y1 + int(legacy_plus_one); // h
  return ret;
}

template <class Derived>
EArrXXt<typename Derived::Scalar> bbox_ctrwh_to_xyxy(
    const Eigen::ArrayBase<Derived>& boxes,
    const bool legacy_plus_one = false) {
  CAFFE_ENFORCE_EQ(boxes.cols(), 4);

  const auto& x_ctr = boxes.col(0);
  const auto& y_ctr = boxes.col(1);
  const auto& w = boxes.col(2);
  const auto& h = boxes.col(3);

  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), 4);
  ret.col(0) = x_ctr - (w - int(legacy_plus_one)) / 2.0; // x1
  ret.col(1) = y_ctr - (h - int(legacy_plus_one)) / 2.0; // y1
  ret.col(2) = x_ctr + (w - int(legacy_plus_one)) / 2.0; // x2
  ret.col(3) = y_ctr + (h - int(legacy_plus_one)) / 2.0; // y2
  return ret;
}

// Clip boxes to image boundaries
// boxes: pixel coordinates of bounding box, size (M * 4)
template <class Derived>
EArrXXt<typename Derived::Scalar> clip_boxes_upright(
    const Eigen::ArrayBase<Derived>& boxes,
    int height,
    int width,
    bool legacy_plus_one = false) {
  CAFFE_ENFORCE(boxes.cols() == 4);

  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), boxes.cols());

  // x1 >= 0 && x1 < width
  ret.col(0) = boxes.col(0).cwiseMin(width - int(legacy_plus_one)).cwiseMax(0);
  // y1 >= 0 && y1 < height
  ret.col(1) = boxes.col(1).cwiseMin(height - int(legacy_plus_one)).cwiseMax(0);
  // x2 >= 0 && x2 < width
  ret.col(2) = boxes.col(2).cwiseMin(width - int(legacy_plus_one)).cwiseMax(0);
  // y2 >= 0 && y2 < height
  ret.col(3) = boxes.col(3).cwiseMin(height - int(legacy_plus_one)).cwiseMax(0);

  return ret;
}

// Similar to clip_boxes_upright but handles rotated boxes with angle info.
// boxes: size (M, 5), format [ctr_x; ctr_y; width; height; angle (in degrees)]
//
// Clipping is only performed for boxes that are almost upright
// (within a given `angle_thresh` tolerance) to maintain backward compatibility
// for non-rotated boxes.
//
// We don't clip rotated boxes due to a couple of reasons:
// (1) There are potentially multiple ways to clip a rotated box to make it
//     fit within the image.
// (2) It's tricky to make the entire rectangular box fit within the image and
//     still be able to not leave out pixels of interest.
// Therefore, we rely on upstream ops like RoIAlignRotated safely handling this.
template <class Derived>
EArrXXt<typename Derived::Scalar> clip_boxes_rotated(
    const Eigen::ArrayBase<Derived>& boxes,
    int height,
    int width,
    float angle_thresh = 1.0,
    bool legacy_plus_one = false) {
  CAFFE_ENFORCE(boxes.cols() == 5);

  const auto& angles = boxes.col(4);

  // Filter boxes that are upright (with a tolerance of angle_thresh)
  EArrXXt<typename Derived::Scalar> upright_boxes;
  const auto& indices = GetArrayIndices(angles.abs() <= angle_thresh);
  GetSubArrayRows(boxes, AsEArrXt(indices), &upright_boxes);

  // Convert to [x1, y1, x2, y2] format and clip them
  const auto& upright_boxes_xyxy =
      bbox_ctrwh_to_xyxy(upright_boxes.leftCols(4), legacy_plus_one);
  const auto& clipped_upright_boxes_xyxy =
      clip_boxes_upright(upright_boxes_xyxy, height, width, legacy_plus_one);

  // Convert back to [x_ctr, y_ctr, w, h, angle] and update upright boxes
  upright_boxes.block(0, 0, upright_boxes.rows(), 4) =
      bbox_xyxy_to_ctrwh(clipped_upright_boxes_xyxy, legacy_plus_one);

  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), boxes.cols());
  ret = boxes;
  for (int i = 0; i < upright_boxes.rows(); ++i) {
    ret.row(indices[i]) = upright_boxes.row(i);
  }
  return ret;
}

// Clip boxes to image boundaries.
template <class Derived>
EArrXXt<typename Derived::Scalar> clip_boxes(
    const Eigen::ArrayBase<Derived>& boxes,
    int height,
    int width,
    float angle_thresh = 1.0,
    bool legacy_plus_one = false) {
  CAFFE_ENFORCE(boxes.cols() == 4 || boxes.cols() == 5);
  if (boxes.cols() == 4) {
    // Upright boxes
    return clip_boxes_upright(boxes, height, width, legacy_plus_one);
  } else {
    // Rotated boxes with angle info
    return clip_boxes_rotated(
        boxes, height, width, angle_thresh, legacy_plus_one);
  }
}

// Only keep boxes with both sides >= min_size and center within the image.
// boxes: pixel coordinates of bounding box, size (M * 4)
// im_info: [height, width, img_scale]
// return: row indices for 'boxes'
template <class Derived>
std::vector<int> filter_boxes_upright(
    const Eigen::ArrayBase<Derived>& boxes,
    double min_size,
    const Eigen::Array3f& im_info,
    const bool legacy_plus_one = false) {
  CAFFE_ENFORCE_EQ(boxes.cols(), 4);

  // Scale min_size to match image scale
  min_size *= im_info[2];

  using T = typename Derived::Scalar;
  using EArrX = EArrXt<T>;

  EArrX ws = boxes.col(2) - boxes.col(0) + T(int(legacy_plus_one));
  EArrX hs = boxes.col(3) - boxes.col(1) + T(int(legacy_plus_one));
  EArrX x_ctr = boxes.col(0) + ws / T(2);
  EArrX y_ctr = boxes.col(1) + hs / T(2);

  EArrXb keep = (ws >= min_size) && (hs >= min_size) &&
      (x_ctr < T(im_info[1])) && (y_ctr < T(im_info[0]));
Loading ...