ZeroErr
Loading...
Searching...
No Matches
benchmark.h
Go to the documentation of this file.
1/*
2 * This benchmark component is modified from nanobench by Martin Ankerl
3 * https://github.com/martinus/nanobench
4 */
5
6#pragma once
8
9#include <chrono>
10#include <cstdint>
11#include <string>
12#include <vector>
13
14
16
17#define ZEROERR_CREATE_BENCHMARK_FUNC(function, name, ...) \
18 static void function(zeroerr::TestContext*); \
19 static zeroerr::detail::regTest ZEROERR_NAMEGEN(_zeroerr_reg)( \
20 {name, __FILE__, __LINE__, function, {__VA_ARGS__}}, zeroerr::TestType::bench); \
21 static void function(ZEROERR_UNUSED(zeroerr::TestContext* _ZEROERR_TEST_CONTEXT))
22
23#define BENCHMARK(name, ...) \
24 ZEROERR_CREATE_BENCHMARK_FUNC(ZEROERR_NAMEGEN(_zeroerr_benchmark), name, __VA_ARGS__)
25
26
27namespace zeroerr {
28
32template <typename T>
35 T data[7]{};
36
37 T& timeElapsed() { return data[0]; }
38 T& pageFaults() { return data[1]; }
39 T& cpuCycles() { return data[2]; }
40 T& contextSwitches() { return data[3]; }
41 T& instructions() { return data[4]; }
42 T& branchInstructions() { return data[5]; }
43 T& branchMisses() { return data[6]; }
44};
45
46using Clock = std::conditional<std::chrono::high_resolution_clock::is_steady,
47 std::chrono::high_resolution_clock, std::chrono::steady_clock>::type;
48
49namespace detail {
50struct LinuxPerformanceCounter;
51struct WindowsPerformanceCounter;
52} // namespace detail
53
60
61 void beginMeasure();
62 void endMeasure();
63 void updateResults(uint64_t numIters);
64
65 const PerfCountSet<uint64_t>& val() const noexcept { return _val; }
66 PerfCountSet<bool> has() const noexcept { return _has; }
67
68 static PerformanceCounter& inst();
69
70 Clock::duration elapsed;
71
72protected:
73 Clock::time_point _start;
76
77 detail::LinuxPerformanceCounter* _perf = nullptr;
78 detail::WindowsPerformanceCounter* win_perf = nullptr;
79};
80
85 enum Measure {
86 time_elapsed = 1 << 0,
87 iterations = 1 << 1,
88 page_faults = 1 << 2,
89 cpu_cycles = 1 << 3,
91 instructions = 1 << 5,
93 branch_misses = 1 << 7,
94 all = (1 << 8) - 1,
95 };
96 std::string name;
97 std::vector<PerfCountSet<double>> epoch_details;
99
104};
105
106struct Benchmark;
107struct BenchState;
109void destroyBenchState(BenchState* state);
110
111size_t getNumIter(BenchState* state);
112void runIteration(BenchState* state);
113void moveResult(BenchState* state, std::string name);
114
115
121struct Benchmark {
122 std::string title = "benchmark";
123 const char* op_unit = "op";
124 const char* time_unit = "ns";
125 uint64_t epochs = 10;
126 uint64_t warmup = 0;
127 uint64_t iter_per_epoch = 0;
128
129 using ns = std::chrono::nanoseconds;
130 using ms = std::chrono::milliseconds;
131 using time = ns;
132
135
137
138 Benchmark(std::string title) { this->title = title; }
139
140
141 template <typename Op>
142 Benchmark& run(std::string name, Op&& op) {
143 auto* s = createBenchState(*this);
144 auto& pc = PerformanceCounter::inst();
145 while (auto n = getNumIter(s)) {
146 pc.beginMeasure();
147 while (n-- > 0) op();
148 pc.endMeasure();
149 runIteration(s);
150 }
151 moveResult(s, name);
152 return *this;
153 }
154
155 template <typename Op>
156 Benchmark& run(Op&& op) {
157 return run("", std::forward<Op>(op));
158 }
159
160 std::vector<BenchResult> result;
161 void report();
162};
163
164
165namespace detail {
166
167#if defined(_MSC_VER)
168void doNotOptimizeAwaySink(const void*);
169
170template <typename T>
171void doNotOptimizeAway(const T& val) {
172 doNotOptimizeAwaySink(&val);
173}
174
175#else
176
177// These assembly magic is directly from what Google Benchmark is doing. I have previously used
178// what facebook's folly was doing, but this seemed to have compilation problems in some cases.
179// Google Benchmark seemed to be the most well tested anyways. see
180// https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307
181template <typename T>
182void doNotOptimizeAway(const T& val) {
183 // NOLINTNEXTLINE(hicpp-no-assembler)
184 asm volatile("" : : "r,m"(val) : "memory");
185}
186
187template <typename T>
188void doNotOptimizeAway(T& val) {
189#if defined(__clang__)
190 // NOLINTNEXTLINE(hicpp-no-assembler)
191 asm volatile("" : "+r,m"(val) : : "memory");
192#else
193 // NOLINTNEXTLINE(hicpp-no-assembler)
194 asm volatile("" : "+m,r"(val) : : "memory");
195#endif
196}
197#endif
198
199} // namespace detail
200
201
208template <typename Arg>
209void doNotOptimizeAway(Arg&& arg) {
210 detail::doNotOptimizeAway(std::forward<Arg>(arg));
211}
212
213} // namespace zeroerr
214
#define ZEROERR_SUPPRESS_COMMON_WARNINGS_POP
Definition config.h:265
#define ZEROERR_SUPPRESS_COMMON_WARNINGS_PUSH
Definition config.h:218
void doNotOptimizeAway(const T &val)
Definition benchmark.h:182
Definition benchmark.cpp:17
void doNotOptimizeAway(Arg &&arg)
Makes sure none of the given arguments are optimized away by the compiler.
Definition benchmark.h:209
void destroyBenchState(BenchState *state)
Definition benchmark.cpp:152
void moveResult(BenchState *state, std::string name)
Definition benchmark.cpp:179
BenchState * createBenchState(Benchmark &benchmark)
Definition benchmark.cpp:151
void runIteration(BenchState *state)
Definition benchmark.cpp:159
std::conditional< std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock >::type Clock
Definition benchmark.h:47
size_t getNumIter(BenchState *state)
Definition benchmark.cpp:154
BenchResult is a result of running the benchmark.
Definition benchmark.h:84
Measure
Definition benchmark.h:85
@ all
Definition benchmark.h:94
@ iterations
Definition benchmark.h:87
@ branch_misses
Definition benchmark.h:93
@ context_switches
Definition benchmark.h:90
@ instructions
Definition benchmark.h:91
@ time_elapsed
Definition benchmark.h:86
@ branch_instructions
Definition benchmark.h:92
@ cpu_cycles
Definition benchmark.h:89
@ page_faults
Definition benchmark.h:88
PerfCountSet< double > mean() const
Definition benchmark.cpp:229
PerfCountSet< double > average() const
Definition benchmark.cpp:189
PerfCountSet< double > max() const
Definition benchmark.cpp:216
PerfCountSet< double > min() const
Definition benchmark.cpp:203
std::string name
Definition benchmark.h:96
PerfCountSet< bool > has
Definition benchmark.h:98
std::vector< PerfCountSet< double > > epoch_details
Definition benchmark.h:97
Definition benchmark.cpp:50
Benchmark create a core object for configuration of a benchmark. This class is a driver to run multip...
Definition benchmark.h:121
uint64_t iter_per_epoch
Definition benchmark.h:127
std::chrono::milliseconds ms
Definition benchmark.h:130
const char * time_unit
Definition benchmark.h:124
Benchmark & run(std::string name, Op &&op)
Definition benchmark.h:142
std::vector< BenchResult > result
Definition benchmark.h:160
uint64_t warmup
Definition benchmark.h:126
uint64_t minimalResolutionMutipler
Definition benchmark.h:136
Benchmark & run(Op &&op)
Definition benchmark.h:156
time mMaxEpochTime
Definition benchmark.h:133
std::chrono::nanoseconds ns
Definition benchmark.h:129
ns time
Definition benchmark.h:131
time mMinEpochTime
Definition benchmark.h:134
Benchmark(std::string title)
Definition benchmark.h:138
uint64_t epochs
Definition benchmark.h:125
void report()
Definition benchmark.cpp:235
std::string title
Definition benchmark.h:122
const char * op_unit
Definition benchmark.h:123
PerfCountSet is a set of performance counters.
Definition benchmark.h:33
T iterations
Definition benchmark.h:34
T & timeElapsed()
Definition benchmark.h:37
T & branchInstructions()
Definition benchmark.h:42
T & pageFaults()
Definition benchmark.h:38
T & cpuCycles()
Definition benchmark.h:39
T data[7]
Definition benchmark.h:35
T & contextSwitches()
Definition benchmark.h:40
T & branchMisses()
Definition benchmark.h:43
T & instructions()
Definition benchmark.h:41
PerformanceCounter is a class to measure the performance of a function.
Definition benchmark.h:57
void updateResults(uint64_t numIters)
Definition benchmark.cpp:631
PerfCountSet< bool > _has
Definition benchmark.h:75
void endMeasure()
Definition benchmark.cpp:625
PerformanceCounter()
Definition benchmark.cpp:575
detail::LinuxPerformanceCounter * _perf
Definition benchmark.h:77
Clock::time_point _start
Definition benchmark.h:73
const PerfCountSet< uint64_t > & val() const noexcept
Definition benchmark.h:65
void beginMeasure()
Definition benchmark.cpp:619
PerfCountSet< uint64_t > _val
Definition benchmark.h:74
~PerformanceCounter()
Definition benchmark.cpp:608
PerfCountSet< bool > has() const noexcept
Definition benchmark.h:66
detail::WindowsPerformanceCounter * win_perf
Definition benchmark.h:78
static PerformanceCounter & inst()
Definition benchmark.cpp:614
Clock::duration elapsed
Definition benchmark.h:70