#ifndef CAFFE2_OPERATORS_UTILITY_OPS_H_
#define CAFFE2_OPERATORS_UTILITY_OPS_H_
#include <cmath>
#include <map>
#include <utility>
#include "caffe2/core/common_omp.h"
#include "caffe2/core/context.h"
#include "caffe2/core/export_caffe2_op_to_c10.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/types.h"
#include "caffe2/operators/gather_op.h"
#include "caffe2/utils/conversions.h"
#include "caffe2/utils/math.h"
C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(GatherRangesOp);
C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(LengthsGatherOp);
namespace caffe2 {
template <class Context>
class NanCheckOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
template <class... Args>
explicit NanCheckOp(Args&&... args)
: Operator<Context>(std::forward<Args>(args)...) {}
bool RunOnDevice() override;
private:
TensorPrinter tensorPrinter_;
Tensor scratch_;
};
struct GetNanCheckGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
std::vector<OperatorDef> GetGradientDefs() override {
return {CreateOperatorDef(
"NanCheck",
"",
std::vector<string>{GO(0)},
std::vector<string>{GI(0)})};
}
};
template <class Context>
class IsNanOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
IsNanOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws) {}
bool RunOnDevice() override {
return DispatchHelper<TensorTypes<float, double>>::call(this, Input(0));
}
template <typename T>
bool DoRunWithType() {
auto& X = Input(0);
auto* Y = Output(0, X.sizes(), at::dtype<uint8_t>());
const auto* X_data = X.template data<T>();
uint8_t* Y_data = Y->template mutable_data<uint8_t>();
for (size_t i = 0; i < X.numel(); i++) {
Y_data[i] = (uint8_t)(std::isnan(X_data[i]));
}
return true;
}
};
template <class Context>
class WallClockTimeOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
template <class... Args>
explicit WallClockTimeOp(Args&&... args)
: Operator<Context>(std::forward<Args>(args)...) {}
bool RunOnDevice() override {
int64_t nanoseconds = static_cast<long int>(
std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::high_resolution_clock::now().time_since_epoch())
.count());
TensorCPU* output = Output(0);
output->Resize();
*output->template mutable_data<int64_t>() = nanoseconds;
return true;
}
};
const char kPrintFileExtension[] = ".log";
template <class Context>
class PrintOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
USE_DISPATCH_HELPER;
explicit PrintOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
tensor_printer_(
operator_def.input(0),
this->template GetSingleArgument<int>("to_file", 0)
? ws->RootFolder() + "/" + operator_def.input(0) +
kPrintFileExtension
: "",
this->template GetSingleArgument<int>("limit", 0)),
every_n_(this->template GetSingleArgument<int>("every_n", 1)) {
CAFFE_ENFORCE_GE(every_n_, 1);
}
bool RunOnDevice() override {
if (++occurrences_mod_n_ > every_n_) {
occurrences_mod_n_ -= every_n_;
}
if (occurrences_mod_n_ != 1) {
return true;
}
if (!this->InputIsTensorType(0, Context::GetDeviceType()) &&
!this->InputIsTensorType(0, CPU)) {
LOG(INFO) << "Blob of type: "
<< OperatorBase::Inputs().at(0)->meta().name();
return true;
}
// special-case empty tensors since they may have no meta()
if (Input(0).numel() == 0) {
tensor_printer_.PrintMeta(Input(0));
return true;
}
using Types = TensorTypes<
float,
double,
int,
long,
bool,
char,
unsigned char,
std::string>;
if (this->InputIsTensorType(0, CPU)) {
return DispatchHelper<Types>::call(
this, this->template Input<Tensor>(0, CPU));
} else {
return DispatchHelper<Types>::call(this, Input(0));
}
}
private:
template <typename T>
bool DoRunWithType() {
// A simple strategy to copy tensor if needed, and have the tensor pointer
// pointing to the right instantiation. Note that tensor_copy_if_needed
// will handle memory deallocation itself so no smart pointer is needed.
const TensorCPU* tensor;
Tensor tensor_copy_if_needed(CPU);
if (this->InputIsTensorType(0, CPU)) {
tensor = &this->template Input<Tensor>(0, CPU);
} else {
// sync copy
tensor_copy_if_needed.CopyFrom(Input(0));
tensor = &tensor_copy_if_needed;
}
tensor_printer_.Print<T>(*tensor);
return true;
}
private:
TensorPrinter tensor_printer_;
int every_n_;
int occurrences_mod_n_{0};
};
/**
* @brief Alias op makes the output and the input share the same underlying
* storage.
*
* WARNING: in general, in caffe2's operator interface different tensors should
* have different underlying storage, which is the assumption made by
* components such as the dependency engine and memory optimization. Thus, in
* normal situations you should not use the AliasOp, especially in a normal
* forward-backward pass.
*
* The Alias op is provided so one can achieve true asynchrony, such as
* Hogwild, in a graph. But make sure you understand all the implications
* similar to multi-thread computation before you use it explicitly.
*/
template <class Context>
class AliasOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
USE_SIMPLE_CTOR_DTOR(AliasOp);
bool RunOnDevice() override {
auto& input = Input(0);
CAFFE_ENFORCE_GE(input.numel(), 0, "Tensor is not initialized");
OutputTensorAlias(0, input);
return true;
}
};
/**
* @brief Pass inputs to outputs.
* Input:
* DATA - dense tensor.
* Output:
* DATA - same tensor as input.
*/
template <class Context>
class EnsureDenseOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
USE_SIMPLE_CTOR_DTOR(EnsureDenseOp)
bool RunOnDevice() override {
const auto& input = Input(0);
auto* output = Output(0);
CAFFE_ENFORCE_GT(input.dim(), 0, "Input has to be at least a vector.");
// it is allowed to have the output inplace overwrite the input but also
// allow the output to be copied from the input
if (&input != output) {
output->ResizeLike(input);
output->CopyFrom(input, true /*async*/);
}
return true;
}
};
template <class Context>
class FlattenToVecOp : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
USE_SIMPLE_CTOR_DTOR(FlattenToVecOp);
bool RunOnDevice() override {
auto& input = Input(0);
auto* output = Output(0);
CAFFE_ENFORCE_GE(input.dim(), 1, "The rank of the tensor must be >= 1.");
output->Resize(input.numel());
context_.CopyItemsSameDevice(
input.dtype(),
input.numel(),
input.raw_data(),
output->raw_mutable_data(input.dtype()));
return true;
}
};
// Output gets the data of input(0), but reshapes it like input(1).
template <class Context>
class ResizeLikeOp : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
USE_SIMPLE_CTOR_DTOR(ResizeLikeOp);
bool RunOnDevice() override {
auto& input0 = Input(0);
auto& input1 = Input(1);
auto* output = Output(0);
CAFFE_ENFORCE_EQ(input0.numel(), input1.numel());
output->ResizeLike(Input(1));
context_.CopyItemsSameDevice(
input0.dtype(),
input0.numel(),
input0.raw_data(),
output->raw_mutable_data(input0.dtype()));
return true;
}
};
template <class Context>
class SumOp : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
USE_SIMPLE_CTOR_DTOR(SumOp);
template <typename T>
bool DoRunWithType() {
auto& input0 = Input(0);
if (InputSize() == 1) {
// TODO: better TensorOptions argument passing(e.g. default argument)
OutputTensorCopyFrom(
0,
// I'll change the order of argument in another diff, so that we don't
// need to write this
at::dtype(input0.dtype()),
input0,
true /*async*/);
return true;
}
auto* output = Output(0, input0.sizes(), at::dtype<T>());
T* output_data = output->template mutable_data<T>();
// Dimension checking
for (int i = 1; i < InputSize(); ++i) {
if (output->sizes() != Input(i).sizes()) {
CAFFE_THROW(
"Check failed: output->sizes() == Input(i).sizes().",
"Description: Input #",
i,
", input dimension:",
Input(i).sizes(),
" should match output dimension: ",
output->sizes());
}
}
// Add the first two - works if in-place or not.
math::Add(
output->numel(),
input0.template data<T>(),
Input(1).template data<T>(),
output_data,
&context_);
// Add remaining.
for (int i = 2; i < InputSize(); ++i) {
math::Add(
output->numel(),
output_data,
Input(i).template data<T>(),
output_data,
&context_);
}
return true;
}
bool RunOnDevice() override {
return DispatchHelper<TensorTypes<float, double, int32_t, int64_t>>::call(
this, Input(0));
}
};
inline OpSchema::Cost CostInferenceForSum(
const OperatorDef& def,
const std::vector<TensorShape>& in) {
struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in);
cost.flops *= (in.size() - 1);
cost.params_bytes = 0;
return cost;
Loading ...