ZeroErr
载入中...
搜索中...
未找到
benchmark.h
浏览该文件的文档.
1/*
2 * This benchmark component is modified from nanobench by Martin Ankerl
3 * https://github.com/martinus/nanobench
4 */
5
6#pragma once
8
9#include <chrono>
10#include <cstdint>
11#include <string>
12#include <vector>
13
14
16
17#define ZEROERR_CREATE_BENCHMARK_FUNC(function, name, ...) \
18 static void function(zeroerr::TestContext*); \
19 static zeroerr::detail::regTest ZEROERR_NAMEGEN(_zeroerr_reg)( \
20 {name, __FILE__, __LINE__, function, {__VA_ARGS__}}, zeroerr::TestType::bench); \
21 static void function(ZEROERR_UNUSED(zeroerr::TestContext* _ZEROERR_TEST_CONTEXT))
22
23#define BENCHMARK(...) \
24 ZEROERR_SUPPRESS_VARIADIC_MACRO \
25 ZEROERR_CREATE_BENCHMARK_FUNC(ZEROERR_NAMEGEN(_zeroerr_benchmark), __VA_ARGS__) \
26 ZEROERR_SUPPRESS_VARIADIC_MACRO_POP \
27
28
29namespace zeroerr {
30
34template <typename T>
37 T data[7]{};
38
39 T& timeElapsed() { return data[0]; }
40 T& pageFaults() { return data[1]; }
41 T& cpuCycles() { return data[2]; }
42 T& contextSwitches() { return data[3]; }
43 T& instructions() { return data[4]; }
44 T& branchInstructions() { return data[5]; }
45 T& branchMisses() { return data[6]; }
46};
47
48using Clock = std::conditional<std::chrono::high_resolution_clock::is_steady,
49 std::chrono::high_resolution_clock, std::chrono::steady_clock>::type;
50
51namespace detail {
52struct LinuxPerformanceCounter;
53struct WindowsPerformanceCounter;
54} // namespace detail
55
62
63 void beginMeasure();
64 void endMeasure();
65 void updateResults(uint64_t numIters);
66
67 const PerfCountSet<uint64_t>& val() const noexcept { return _val; }
68 PerfCountSet<bool> has() const noexcept { return _has; }
69
70 static PerformanceCounter& inst();
71
72 Clock::duration elapsed;
73
74protected:
75 Clock::time_point _start;
78
79 detail::LinuxPerformanceCounter* _perf = nullptr;
80 detail::WindowsPerformanceCounter* win_perf = nullptr;
81};
82
87 enum Measure {
88 time_elapsed = 1 << 0,
89 iterations = 1 << 1,
90 page_faults = 1 << 2,
91 cpu_cycles = 1 << 3,
93 instructions = 1 << 5,
95 branch_misses = 1 << 7,
96 all = (1 << 8) - 1,
97 };
98 std::string name;
99 std::vector<PerfCountSet<double>> epoch_details;
101
106};
107
108struct Benchmark;
109struct BenchState;
111void destroyBenchState(BenchState* state);
112
113size_t getNumIter(BenchState* state);
114void runIteration(BenchState* state);
115void moveResult(BenchState* state, std::string name);
116
117
123struct Benchmark {
124 std::string title = "benchmark";
125 const char* op_unit = "op";
126 const char* time_unit = "ns";
127 uint64_t epochs = 10;
128 uint64_t warmup = 0;
129 uint64_t iter_per_epoch = 0;
130
131 using ns = std::chrono::nanoseconds;
132 using ms = std::chrono::milliseconds;
133 using time = ns;
134
137
139
140 Benchmark(std::string title) { this->title = title; }
141
142
143 template <typename Op>
144 Benchmark& run(std::string name, Op&& op) {
145 auto* s = createBenchState(*this);
146 auto& pc = PerformanceCounter::inst();
147 while (auto n = getNumIter(s)) {
148 pc.beginMeasure();
149 while (n-- > 0) op();
150 pc.endMeasure();
151 runIteration(s);
152 }
153 moveResult(s, name);
154 return *this;
155 }
156
157 template <typename Op>
158 Benchmark& run(Op&& op) {
159 return run("", std::forward<Op>(op));
160 }
161
162 std::vector<BenchResult> result;
163 void report();
164};
165
166
167namespace detail {
168
169#if defined(_MSC_VER)
170void doNotOptimizeAwaySink(const void*);
171
172template <typename T>
173void doNotOptimizeAway(const T& val) {
174 doNotOptimizeAwaySink(&val);
175}
176
177#else
178
179// These assembly magic is directly from what Google Benchmark is doing. I have previously used
180// what facebook's folly was doing, but this seemed to have compilation problems in some cases.
181// Google Benchmark seemed to be the most well tested anyways. see
182// https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307
183template <typename T>
184void doNotOptimizeAway(const T& val) {
185 // NOLINTNEXTLINE(hicpp-no-assembler)
186 asm volatile("" : : "r,m"(val) : "memory");
187}
188
189template <typename T>
190void doNotOptimizeAway(T& val) {
191#if defined(__clang__)
192 // NOLINTNEXTLINE(hicpp-no-assembler)
193 asm volatile("" : "+r,m"(val) : : "memory");
194#else
195 // NOLINTNEXTLINE(hicpp-no-assembler)
196 asm volatile("" : "+m,r"(val) : : "memory");
197#endif
198}
199#endif
200
201} // namespace detail
202
203
210template <typename Arg>
211void doNotOptimizeAway(Arg&& arg) {
212 detail::doNotOptimizeAway(std::forward<Arg>(arg));
213}
214
215} // namespace zeroerr
216
#define ZEROERR_SUPPRESS_COMMON_WARNINGS_POP
Definition config.h:268
#define ZEROERR_SUPPRESS_COMMON_WARNINGS_PUSH
Definition config.h:220
void doNotOptimizeAway(const T &val)
Definition benchmark.h:184
Definition benchmark.cpp:17
void doNotOptimizeAway(Arg &&arg)
Makes sure none of the given arguments are optimized away by the compiler.
Definition benchmark.h:211
void destroyBenchState(BenchState *state)
Definition benchmark.cpp:152
void moveResult(BenchState *state, std::string name)
Definition benchmark.cpp:179
BenchState * createBenchState(Benchmark &benchmark)
Definition benchmark.cpp:151
void runIteration(BenchState *state)
Definition benchmark.cpp:159
std::conditional< std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock >::type Clock
Definition benchmark.h:49
size_t getNumIter(BenchState *state)
Definition benchmark.cpp:154
BenchResult is a result of running the benchmark.
Definition benchmark.h:86
Measure
Definition benchmark.h:87
@ all
Definition benchmark.h:96
@ iterations
Definition benchmark.h:89
@ branch_misses
Definition benchmark.h:95
@ context_switches
Definition benchmark.h:92
@ instructions
Definition benchmark.h:93
@ time_elapsed
Definition benchmark.h:88
@ branch_instructions
Definition benchmark.h:94
@ cpu_cycles
Definition benchmark.h:91
@ page_faults
Definition benchmark.h:90
PerfCountSet< double > mean() const
Definition benchmark.cpp:229
PerfCountSet< double > average() const
Definition benchmark.cpp:189
PerfCountSet< double > max() const
Definition benchmark.cpp:216
PerfCountSet< double > min() const
Definition benchmark.cpp:203
std::string name
Definition benchmark.h:98
PerfCountSet< bool > has
Definition benchmark.h:100
std::vector< PerfCountSet< double > > epoch_details
Definition benchmark.h:99
Definition benchmark.cpp:50
Benchmark create a core object for configuration of a benchmark. This class is a driver to run multip...
Definition benchmark.h:123
uint64_t iter_per_epoch
Definition benchmark.h:129
std::chrono::milliseconds ms
Definition benchmark.h:132
const char * time_unit
Definition benchmark.h:126
Benchmark & run(std::string name, Op &&op)
Definition benchmark.h:144
std::vector< BenchResult > result
Definition benchmark.h:162
uint64_t warmup
Definition benchmark.h:128
uint64_t minimalResolutionMutipler
Definition benchmark.h:138
Benchmark & run(Op &&op)
Definition benchmark.h:158
time mMaxEpochTime
Definition benchmark.h:135
std::chrono::nanoseconds ns
Definition benchmark.h:131
ns time
Definition benchmark.h:133
time mMinEpochTime
Definition benchmark.h:136
Benchmark(std::string title)
Definition benchmark.h:140
uint64_t epochs
Definition benchmark.h:127
void report()
Definition benchmark.cpp:235
std::string title
Definition benchmark.h:124
const char * op_unit
Definition benchmark.h:125
PerfCountSet is a set of performance counters.
Definition benchmark.h:35
T iterations
Definition benchmark.h:36
T & timeElapsed()
Definition benchmark.h:39
T & branchInstructions()
Definition benchmark.h:44
T & pageFaults()
Definition benchmark.h:40
T & cpuCycles()
Definition benchmark.h:41
T data[7]
Definition benchmark.h:37
T & contextSwitches()
Definition benchmark.h:42
T & branchMisses()
Definition benchmark.h:45
T & instructions()
Definition benchmark.h:43
PerformanceCounter is a class to measure the performance of a function.
Definition benchmark.h:59
void updateResults(uint64_t numIters)
Definition benchmark.cpp:631
PerfCountSet< bool > _has
Definition benchmark.h:77
void endMeasure()
Definition benchmark.cpp:625
PerformanceCounter()
Definition benchmark.cpp:575
detail::LinuxPerformanceCounter * _perf
Definition benchmark.h:79
Clock::time_point _start
Definition benchmark.h:75
const PerfCountSet< uint64_t > & val() const noexcept
Definition benchmark.h:67
void beginMeasure()
Definition benchmark.cpp:619
PerfCountSet< uint64_t > _val
Definition benchmark.h:76
~PerformanceCounter()
Definition benchmark.cpp:608
PerfCountSet< bool > has() const noexcept
Definition benchmark.h:68
detail::WindowsPerformanceCounter * win_perf
Definition benchmark.h:80
static PerformanceCounter & inst()
Definition benchmark.cpp:614
Clock::duration elapsed
Definition benchmark.h:72