Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

edgify / torch   python

Repository URL to install this package:

Version: 2.0.1+cpu 

/ include / torch / csrc / profiler / perf.h

#pragma once

#include <array>
#include <cstdint>
#include <memory>
#include <stack>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include <torch/csrc/profiler/events.h>

#include <c10/util/Exception.h>

namespace torch {
namespace profiler {
namespace impl {
namespace linux_perf {

/*
 * Maximum number of events supported
 * This stems from the hardware limitation on CPU performance counters, and the
 * fact that we don't support time multiplexing just yet.
 * Time multiplexing involves scaling the counter values proportional to
 * the enabled and running time or running the workload multiple times.
 */
constexpr uint8_t MAX_EVENTS = 4;

struct PerfCounter {
  uint64_t value; /* The value of the event */
  uint64_t time_enabled; /* for TIME_ENABLED */
  uint64_t time_running; /* for TIME_RUNNING */
};

/*
 * Basic perf event handler for Android and Linux
 */
class PerfEvent {
 public:
  explicit PerfEvent(std::string& name) : name_(name) {}

  PerfEvent& operator=(PerfEvent&& other) noexcept {
    if (this != &other) {
      fd_ = other.fd_;
      other.fd_ = -1;
      name_ = std::move(other.name_);
    }
    return *this;
  }

  PerfEvent(PerfEvent&& other) noexcept {
    *this = std::move(other);
  }

  ~PerfEvent();

  /* Setup perf events with the Linux Kernel, attaches perf to this process
   * using perf_event_open(2) */
  void Init();

  /* Stop incrementing hardware counters for this event */
  void Disable() const;

  /* Start counting hardware event from this point on */
  void Enable() const;

  /* Zero out the counts for this event */
  void Reset() const;

  /* Returns PerfCounter values for this event from kernel, on non supported
   * platforms this always returns zero */
  uint64_t ReadCounter() const;

 private:
  /* Name of the event */
  std::string name_;

  int fd_ = -1;
};

class PerfProfiler {
 public:
  /* Configure all the events and track them as individual PerfEvent */
  void Configure(std::vector<std::string>& event_names);

  /* Enable events counting from here */
  void Enable();

  /* Disable counting and fill in the caller supplied container with delta
   * calculated from the start count values since last Enable() */
  void Disable(perf_counters_t&);

 private:
  uint64_t CalcDelta(uint64_t start, uint64_t end) const;
  void StartCounting() const;
  void StopCounting() const;

  std::vector<PerfEvent> events_;
  std::stack<perf_counters_t> start_values_;
};
} // namespace linux_perf
} // namespace impl
} // namespace profiler
} // namespace torch