#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include <stack>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <torch/csrc/profiler/events.h>
#include <c10/util/Exception.h>
namespace torch {
namespace profiler {
namespace impl {
namespace linux_perf {
/*
* Maximum number of events supported
* This stems from the hardware limitation on CPU performance counters, and the
* fact that we don't support time multiplexing just yet.
* Time multiplexing involves scaling the counter values proportional to
* the enabled and running time or running the workload multiple times.
*/
constexpr uint8_t MAX_EVENTS = 4;
struct PerfCounter {
uint64_t value; /* The value of the event */
uint64_t time_enabled; /* for TIME_ENABLED */
uint64_t time_running; /* for TIME_RUNNING */
};
/*
* Basic perf event handler for Android and Linux
*/
class PerfEvent {
public:
explicit PerfEvent(std::string& name) : name_(name) {}
PerfEvent& operator=(PerfEvent&& other) noexcept {
if (this != &other) {
fd_ = other.fd_;
other.fd_ = -1;
name_ = std::move(other.name_);
}
return *this;
}
PerfEvent(PerfEvent&& other) noexcept {
*this = std::move(other);
}
~PerfEvent();
/* Setup perf events with the Linux Kernel, attaches perf to this process
* using perf_event_open(2) */
void Init();
/* Stop incrementing hardware counters for this event */
void Disable() const;
/* Start counting hardware event from this point on */
void Enable() const;
/* Zero out the counts for this event */
void Reset() const;
/* Returns PerfCounter values for this event from kernel, on non supported
* platforms this always returns zero */
uint64_t ReadCounter() const;
private:
/* Name of the event */
std::string name_;
int fd_ = -1;
};
class PerfProfiler {
public:
/* Configure all the events and track them as individual PerfEvent */
void Configure(std::vector<std::string>& event_names);
/* Enable events counting from here */
void Enable();
/* Disable counting and fill in the caller supplied container with delta
* calculated from the start count values since last Enable() */
void Disable(perf_counters_t&);
private:
uint64_t CalcDelta(uint64_t start, uint64_t end) const;
void StartCounting() const;
void StopCounting() const;
std::vector<PerfEvent> events_;
std::stack<perf_counters_t> start_values_;
};
} // namespace linux_perf
} // namespace impl
} // namespace profiler
} // namespace torch