ZeroErr
benchmark.h
Go to the documentation of this file.
1 /*
2  * This benchmark component is modified from nanobench by Martin Ankerl
3  * https://github.com/martinus/nanobench
4  */
5 
6 #pragma once
8 
9 #include <chrono>
10 #include <cstdint>
11 #include <string>
12 #include <vector>
13 
14 
16 
17 #define ZEROERR_CREATE_BENCHMARK_FUNC(function, name) \
18  static void function(zeroerr::TestContext*); \
19  static zeroerr::detail::regTest ZEROERR_NAMEGEN(_zeroerr_reg)( \
20  {name, __FILE__, __LINE__, function}, zeroerr::TestType::bench); \
21  static void function(ZEROERR_UNUSED(zeroerr::TestContext* _ZEROERR_TEST_CONTEXT))
22 
23 #define BENCHMARK(name) ZEROERR_CREATE_BENCHMARK_FUNC(ZEROERR_NAMEGEN(_zeroerr_benchmark), name)
24 
25 
26 namespace zeroerr {
27 
31 template <typename T>
32 struct PerfCountSet {
34  T data[7]{};
35 
36  T& timeElapsed() { return data[0]; }
37  T& pageFaults() { return data[1]; }
38  T& cpuCycles() { return data[2]; }
39  T& contextSwitches() { return data[3]; }
40  T& instructions() { return data[4]; }
41  T& branchInstructions() { return data[5]; }
42  T& branchMisses() { return data[6]; }
43 };
44 
45 using Clock = std::conditional<std::chrono::high_resolution_clock::is_steady,
46  std::chrono::high_resolution_clock, std::chrono::steady_clock>::type;
47 
48 namespace detail {
49 struct LinuxPerformanceCounter;
50 struct WindowsPerformanceCounter;
51 } // namespace detail
52 
59 
60  void beginMeasure();
61  void endMeasure();
62  void updateResults(uint64_t numIters);
63 
64  PerfCountSet<uint64_t> const& val() const noexcept { return _val; }
65  PerfCountSet<bool> has() const noexcept { return _has; }
66 
67  static PerformanceCounter& inst();
68 
69  Clock::duration elapsed;
70 
71 protected:
72  Clock::time_point _start;
75 
76  detail::LinuxPerformanceCounter* _perf = nullptr;
77  detail::WindowsPerformanceCounter* win_perf = nullptr;
78 };
79 
83 struct BenchResult {
84  enum Measure {
85  time_elapsed = 1 << 0,
86  iterations = 1 << 1,
87  page_faults = 1 << 2,
88  cpu_cycles = 1 << 3,
89  context_switches = 1 << 4,
90  instructions = 1 << 5,
92  branch_misses = 1 << 7,
93  all = (1 << 8) - 1,
94  };
95  std::string name;
96  std::vector<PerfCountSet<double>> epoch_details;
98 
100  PerfCountSet<double> min() const;
101  PerfCountSet<double> max() const;
102  PerfCountSet<double> mean() const;
103 };
104 
105 struct Benchmark;
106 struct BenchState;
108 void destroyBenchState(BenchState* state);
109 
110 size_t getNumIter(BenchState* state);
111 void runIteration(BenchState* state);
112 void moveResult(BenchState* state, std::string name);
113 
114 
120 struct Benchmark {
121  std::string title = "benchmark";
122  const char* op_unit = "op";
123  const char* time_unit = "ns";
124  uint64_t epochs = 10;
125  uint64_t warmup = 0;
126  uint64_t iter_per_epoch = 0;
127 
128  using ns = std::chrono::nanoseconds;
129  using ms = std::chrono::milliseconds;
130  using time = ns;
131 
134 
135  uint64_t minimalResolutionMutipler = 1000;
136 
137  Benchmark(std::string title) { this->title = title; }
138 
139 
140  template <typename Op>
141  Benchmark& run(std::string name, Op&& op) {
142  auto* s = createBenchState(*this);
143  auto& pc = PerformanceCounter::inst();
144  while (auto n = getNumIter(s)) {
145  pc.beginMeasure();
146  while (n-- > 0) op();
147  pc.endMeasure();
148  runIteration(s);
149  }
150  moveResult(s, name);
151  return *this;
152  }
153 
154  template <typename Op>
155  Benchmark& run(Op&& op) {
156  return run("", std::forward<Op>(op));
157  }
158 
159  std::vector<BenchResult> result;
160  void report();
161 };
162 
163 
164 namespace detail {
165 
166 #if defined(_MSC_VER)
167 void doNotOptimizeAwaySink(void const*);
168 
169 template <typename T>
170 void doNotOptimizeAway(T const& val) {
171  doNotOptimizeAwaySink(&val);
172 }
173 
174 #else
175 
176 // These assembly magic is directly from what Google Benchmark is doing. I have previously used
177 // what facebook's folly was doing, but this seemed to have compilation problems in some cases.
178 // Google Benchmark seemed to be the most well tested anyways. see
179 // https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307
180 template <typename T>
181 void doNotOptimizeAway(T const& val) {
182  // NOLINTNEXTLINE(hicpp-no-assembler)
183  asm volatile("" : : "r,m"(val) : "memory");
184 }
185 
186 template <typename T>
187 void doNotOptimizeAway(T& val) {
188 #if defined(__clang__)
189  // NOLINTNEXTLINE(hicpp-no-assembler)
190  asm volatile("" : "+r,m"(val) : : "memory");
191 #else
192  // NOLINTNEXTLINE(hicpp-no-assembler)
193  asm volatile("" : "+m,r"(val) : : "memory");
194 #endif
195 }
196 #endif
197 
198 } // namespace detail
199 
200 
207 template <typename Arg>
208 void doNotOptimizeAway(Arg&& arg) {
209  detail::doNotOptimizeAway(std::forward<Arg>(arg));
210 }
211 
212 } // namespace zeroerr
213 
#define ZEROERR_SUPPRESS_COMMON_WARNINGS_POP
Definition: config.h:265
#define ZEROERR_SUPPRESS_COMMON_WARNINGS_PUSH
Definition: config.h:218
void doNotOptimizeAway(T const &val)
Definition: benchmark.h:181
Definition: benchmark.cpp:17
void doNotOptimizeAway(Arg &&arg)
Makes sure none of the given arguments are optimized away by the compiler.
Definition: benchmark.h:208
void destroyBenchState(BenchState *state)
Definition: benchmark.cpp:152
void moveResult(BenchState *state, std::string name)
Definition: benchmark.cpp:179
BenchState * createBenchState(Benchmark &benchmark)
Definition: benchmark.cpp:151
void runIteration(BenchState *state)
Definition: benchmark.cpp:159
std::conditional< std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock >::type Clock
Definition: benchmark.h:46
size_t getNumIter(BenchState *state)
Definition: benchmark.cpp:154
BenchResult is a result of running the benchmark.
Definition: benchmark.h:83
Measure
Definition: benchmark.h:84
@ all
Definition: benchmark.h:93
@ iterations
Definition: benchmark.h:86
@ branch_misses
Definition: benchmark.h:92
@ context_switches
Definition: benchmark.h:89
@ instructions
Definition: benchmark.h:90
@ time_elapsed
Definition: benchmark.h:85
@ branch_instructions
Definition: benchmark.h:91
@ cpu_cycles
Definition: benchmark.h:88
@ page_faults
Definition: benchmark.h:87
PerfCountSet< double > mean() const
Definition: benchmark.cpp:229
PerfCountSet< double > average() const
Definition: benchmark.cpp:189
PerfCountSet< double > max() const
Definition: benchmark.cpp:216
PerfCountSet< double > min() const
Definition: benchmark.cpp:203
std::string name
Definition: benchmark.h:95
PerfCountSet< bool > has
Definition: benchmark.h:97
std::vector< PerfCountSet< double > > epoch_details
Definition: benchmark.h:96
Definition: benchmark.cpp:50
Benchmark create a core object for configuration of a benchmark. This class is a driver to run multip...
Definition: benchmark.h:120
uint64_t iter_per_epoch
Definition: benchmark.h:126
std::chrono::milliseconds ms
Definition: benchmark.h:129
const char * time_unit
Definition: benchmark.h:123
std::vector< BenchResult > result
Definition: benchmark.h:159
uint64_t warmup
Definition: benchmark.h:125
uint64_t minimalResolutionMutipler
Definition: benchmark.h:135
time mMaxEpochTime
Definition: benchmark.h:132
std::chrono::nanoseconds ns
Definition: benchmark.h:128
ns time
Definition: benchmark.h:130
Benchmark & run(Op &&op)
Definition: benchmark.h:155
time mMinEpochTime
Definition: benchmark.h:133
Benchmark(std::string title)
Definition: benchmark.h:137
uint64_t epochs
Definition: benchmark.h:124
void report()
Definition: benchmark.cpp:235
std::string title
Definition: benchmark.h:121
Benchmark & run(std::string name, Op &&op)
Definition: benchmark.h:141
const char * op_unit
Definition: benchmark.h:122
PerfCountSet is a set of performance counters.
Definition: benchmark.h:32
T iterations
Definition: benchmark.h:33
T & timeElapsed()
Definition: benchmark.h:36
T & branchMisses()
Definition: benchmark.h:42
T & pageFaults()
Definition: benchmark.h:37
T & branchInstructions()
Definition: benchmark.h:41
T data[7]
Definition: benchmark.h:34
T & instructions()
Definition: benchmark.h:40
T & cpuCycles()
Definition: benchmark.h:38
T & contextSwitches()
Definition: benchmark.h:39
PerformanceCounter is a class to measure the performance of a function.
Definition: benchmark.h:56
void updateResults(uint64_t numIters)
Definition: benchmark.cpp:631
PerfCountSet< bool > _has
Definition: benchmark.h:74
void endMeasure()
Definition: benchmark.cpp:625
PerformanceCounter()
Definition: benchmark.cpp:575
detail::LinuxPerformanceCounter * _perf
Definition: benchmark.h:76
Clock::time_point _start
Definition: benchmark.h:72
void beginMeasure()
Definition: benchmark.cpp:619
PerfCountSet< uint64_t > _val
Definition: benchmark.h:73
~PerformanceCounter()
Definition: benchmark.cpp:608
PerfCountSet< bool > has() const noexcept
Definition: benchmark.h:65
detail::WindowsPerformanceCounter * win_perf
Definition: benchmark.h:77
static PerformanceCounter & inst()
Definition: benchmark.cpp:614
Clock::duration elapsed
Definition: benchmark.h:69
PerfCountSet< uint64_t > const & val() const noexcept
Definition: benchmark.h:64