benchmark 1.9.5
 
Loading...
Searching...
No Matches
benchmark.h
1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Support for registering benchmarks for functions.
16
17/* Example usage:
18// Define a function that executes the code to be measured a
19// specified number of times:
20static void BM_StringCreation(benchmark::State& state) {
21 for (auto _ : state)
22 std::string empty_string;
23}
24
25// Register the function as a benchmark
26BENCHMARK(BM_StringCreation);
27
28// Define another benchmark
29static void BM_StringCopy(benchmark::State& state) {
30 std::string x = "hello";
31 for (auto _ : state)
32 std::string copy(x);
33}
34BENCHMARK(BM_StringCopy);
35
36// Augment the main() program to invoke benchmarks if specified
37// via the --benchmark_filter command line flag. E.g.,
38// my_unittest --benchmark_filter=all
39// my_unittest --benchmark_filter=BM_StringCreation
40// my_unittest --benchmark_filter=String
41// my_unittest --benchmark_filter='Copy|Creation'
42int main(int argc, char** argv) {
43 benchmark::MaybeReenterWithoutASLR(argc, argv);
44 benchmark::Initialize(&argc, argv);
45 benchmark::RunSpecifiedBenchmarks();
46 benchmark::Shutdown();
47 return 0;
48}
49
50// Sometimes a family of microbenchmarks can be implemented with
51// just one routine that takes an extra argument to specify which
52// one of the family of benchmarks to run. For example, the following
53// code defines a family of microbenchmarks for measuring the speed
54// of memcpy() calls of different lengths:
55
56static void BM_memcpy(benchmark::State& state) {
57 char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
58 memset(src, 'x', state.range(0));
59 for (auto _ : state)
60 memcpy(dst, src, state.range(0));
61 state.SetBytesProcessed(state.iterations() * state.range(0));
62 delete[] src; delete[] dst;
63}
64BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
65
66// The preceding code is quite repetitive, and can be replaced with the
67// following short-hand. The following invocation will pick a few
68// appropriate arguments in the specified range and will generate a
69// microbenchmark for each such argument.
70BENCHMARK(BM_memcpy)->Range(8, 8<<10);
71
72// You might have a microbenchmark that depends on two inputs. For
73// example, the following code defines a family of microbenchmarks for
74// measuring the speed of set insertion.
75static void BM_SetInsert(benchmark::State& state) {
76 set<int> data;
77 for (auto _ : state) {
78 state.PauseTiming();
79 data = ConstructRandomSet(state.range(0));
80 state.ResumeTiming();
81 for (int j = 0; j < state.range(1); ++j)
82 data.insert(RandomNumber());
83 }
84}
85BENCHMARK(BM_SetInsert)
86 ->Args({1<<10, 128})
87 ->Args({2<<10, 128})
88 ->Args({4<<10, 128})
89 ->Args({8<<10, 128})
90 ->Args({1<<10, 512})
91 ->Args({2<<10, 512})
92 ->Args({4<<10, 512})
93 ->Args({8<<10, 512});
94
95// The preceding code is quite repetitive, and can be replaced with
96// the following short-hand. The following macro will pick a few
97// appropriate arguments in the product of the two specified ranges
98// and will generate a microbenchmark for each such pair.
99BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
100
101// For more complex patterns of inputs, passing a custom function
102// to Apply allows programmatic specification of an
103// arbitrary set of arguments to run the microbenchmark on.
104// The following example enumerates a dense range on
105// one parameter, and a sparse range on the second.
106static void CustomArguments(benchmark::Benchmark* b) {
107 for (int i = 0; i <= 10; ++i)
108 for (int j = 32; j <= 1024*1024; j *= 8)
109 b->Args({i, j});
110}
111BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
112
113// Templated microbenchmarks work the same way:
114// Produce then consume 'size' messages 'iters' times
115// Measures throughput in the absence of multiprogramming.
116template <class Q> int BM_Sequential(benchmark::State& state) {
117 Q q;
118 typename Q::value_type v;
119 for (auto _ : state) {
120 for (int i = state.range(0); i--; )
121 q.push(v);
122 for (int e = state.range(0); e--; )
123 q.Wait(&v);
124 }
125 // actually messages, not bytes:
126 state.SetBytesProcessed(state.iterations() * state.range(0));
127}
128BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
129
130Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
131benchmark. This option overrides the `benchmark_min_time` flag.
132
133void BM_test(benchmark::State& state) {
134 ... body ...
135}
136BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
137
138In a multithreaded test, it is guaranteed that none of the threads will start
139until all have reached the loop start, and all will have finished before any
140thread exits the loop body. As such, any global setup or teardown you want to
141do can be wrapped in a check against the thread index:
142
143static void BM_MultiThreaded(benchmark::State& state) {
144 if (state.thread_index() == 0) {
145 // Setup code here.
146 }
147 for (auto _ : state) {
148 // Run the test as normal.
149 }
150 if (state.thread_index() == 0) {
151 // Teardown code here.
152 }
153}
154BENCHMARK(BM_MultiThreaded)->Threads(4);
155
156
157If a benchmark runs a few milliseconds it may be hard to visually compare the
158measured times, since the output data is given in nanoseconds per default. In
159order to manually set the time unit, you can specify it manually:
160
161BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
162*/
163
164#ifndef BENCHMARK_BENCHMARK_H_
165#define BENCHMARK_BENCHMARK_H_
166
167#include <stdint.h>
168
169#include <algorithm>
170#include <atomic>
171#include <cassert>
172#include <cstddef>
173#include <functional>
174#include <iosfwd>
175#include <limits>
176#include <map>
177#include <memory>
178#include <set>
179#include <string>
180#include <type_traits>
181#include <utility>
182#include <vector>
183
184#include "benchmark/export.h"
185
186#if defined(_MSC_VER)
187#include <intrin.h> // for _ReadWriteBarrier
188#endif
189
190#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
191 TypeName(const TypeName&) = delete; \
192 TypeName& operator=(const TypeName&) = delete
193
194#ifdef BENCHMARK_HAS_CXX17
195#define BENCHMARK_UNUSED [[maybe_unused]]
196#elif defined(__GNUC__) || defined(__clang__)
197#define BENCHMARK_UNUSED __attribute__((unused))
198#else
199#define BENCHMARK_UNUSED
200#endif
201
202// Used to annotate functions, methods and classes so they
203// are not optimized by the compiler. Useful for tests
204// where you expect loops to stay in place churning cycles
205#if defined(__clang__)
206#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone))
207#elif defined(__GNUC__) || defined(__GNUG__)
208#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0)))
209#else
210// MSVC & Intel do not have a no-optimize attribute, only line pragmas
211#define BENCHMARK_DONT_OPTIMIZE
212#endif
213
214#if defined(__GNUC__) || defined(__clang__)
215#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
216#elif defined(_MSC_VER) && !defined(__clang__)
217#define BENCHMARK_ALWAYS_INLINE __forceinline
218#define __func__ __FUNCTION__
219#else
220#define BENCHMARK_ALWAYS_INLINE
221#endif
222
223#define BENCHMARK_INTERNAL_TOSTRING2(x) #x
224#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
225
226// clang-format off
227#if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__)
228#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
229#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
230#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
231 _Pragma("GCC diagnostic push") \
232 _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
233#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
234#elif defined(__NVCOMPILER)
235#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
236#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
237#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
238 _Pragma("diagnostic push") \
239 _Pragma("diag_suppress deprecated_entity_with_custom_message")
240#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop")
241#elif defined(_MSC_VER)
242#define BENCHMARK_BUILTIN_EXPECT(x, y) x
243#define BENCHMARK_DEPRECATED_MSG(msg) __declspec(deprecated(msg))
244#define BENCHMARK_WARNING_MSG(msg) \
245 __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
246 __LINE__) ") : warning note: " msg))
247#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
248 __pragma(warning(push)) \
249 __pragma(warning(disable : 4996))
250#define BENCHMARK_RESTORE_DEPRECATED_WARNING __pragma(warning(pop))
251#else
252#define BENCHMARK_BUILTIN_EXPECT(x, y) x
253#define BENCHMARK_DEPRECATED_MSG(msg)
254#define BENCHMARK_WARNING_MSG(msg) \
255 __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
256 __LINE__) ") : warning note: " msg))
257#define BENCHMARK_DISABLE_DEPRECATED_WARNING
258#define BENCHMARK_RESTORE_DEPRECATED_WARNING
259#endif
260// clang-format on
261
262#if defined(__GNUC__) && !defined(__clang__)
263#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
264#endif
265
266#ifndef __has_builtin
267#define __has_builtin(x) 0
268#endif
269
270#if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
271#define BENCHMARK_UNREACHABLE() __builtin_unreachable()
272#elif defined(_MSC_VER)
273#define BENCHMARK_UNREACHABLE() __assume(false)
274#else
275#define BENCHMARK_UNREACHABLE() ((void)0)
276#endif
277
278#if defined(__GNUC__)
279// Determine the cacheline size based on architecture
280#if defined(__i386__) || defined(__x86_64__)
281#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
282#elif defined(__powerpc64__)
283#define BENCHMARK_INTERNAL_CACHELINE_SIZE 128
284#elif defined(__aarch64__)
285#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
286#elif defined(__arm__)
287// Cache line sizes for ARM: These values are not strictly correct since
288// cache line sizes depend on implementations, not architectures. There
289// are even implementations with cache line sizes configurable at boot
290// time.
291#if defined(__ARM_ARCH_5T__)
292#define BENCHMARK_INTERNAL_CACHELINE_SIZE 32
293#elif defined(__ARM_ARCH_7A__)
294#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
295#endif // ARM_ARCH
296#endif // arches
297#endif // __GNUC__
298
299#ifndef BENCHMARK_INTERNAL_CACHELINE_SIZE
300// A reasonable default guess. Note that overestimates tend to waste more
301// space, while underestimates tend to waste more time.
302#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
303#endif
304
305#if defined(__GNUC__)
306// Indicates that the declared object be cache aligned using
307// `BENCHMARK_INTERNAL_CACHELINE_SIZE` (see above).
308#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
309 __attribute__((aligned(BENCHMARK_INTERNAL_CACHELINE_SIZE)))
310#elif defined(_MSC_VER)
311#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
312 __declspec(align(BENCHMARK_INTERNAL_CACHELINE_SIZE))
313#else
314#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED
315#endif
316
317#if defined(_MSC_VER)
318#pragma warning(push)
319// C4251: <symbol> needs to have dll-interface to be used by clients of class
320#pragma warning(disable : 4251)
321#endif // _MSC_VER_
322
323namespace benchmark {
324
325namespace internal {
326#if (__cplusplus < 201402L || (defined(_MSC_VER) && _MSVC_LANG < 201402L))
327template <typename T, typename... Args>
328std::unique_ptr<T> make_unique(Args&&... args) {
329 return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
330}
331#else
332using ::std::make_unique;
333#endif
334} // namespace internal
335
337class State;
338
339using IterationCount = int64_t;
340
341// Define alias of Setup/Teardown callback function type
342using callback_function = std::function<void(const benchmark::State&)>;
343
344// Default number of minimum benchmark running time in seconds.
345const char kDefaultMinTimeStr[] = "0.5s";
346
347BENCHMARK_EXPORT void MaybeReenterWithoutASLR(int, char**);
348
349// Returns the version of the library.
350BENCHMARK_EXPORT std::string GetBenchmarkVersion();
351
352BENCHMARK_EXPORT void PrintDefaultHelp();
353
354BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
355 void (*HelperPrintf)() = PrintDefaultHelp);
356BENCHMARK_EXPORT void Shutdown();
357
358// Report to stdout all arguments in 'argv' as unrecognized except the first.
359// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
360BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv);
361
362// Returns the current value of --benchmark_filter.
363BENCHMARK_EXPORT std::string GetBenchmarkFilter();
364
365// Sets a new value to --benchmark_filter. (This will override this flag's
366// current value).
367// Should be called after `benchmark::Initialize()`, as
368// `benchmark::Initialize()` will override the flag's value.
369BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value);
370
371// Returns the current value of --v (command line value for verbosity).
372BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity();
373
374// Creates a default display reporter. Used by the library when no display
375// reporter is provided, but also made available for external use in case a
376// custom reporter should respect the `--benchmark_format` flag as a fallback
377BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter();
378
379// Generate a list of benchmarks matching the specified --benchmark_filter flag
380// and if --benchmark_list_tests is specified return after printing the name
381// of each matching benchmark. Otherwise run each matching benchmark and
382// report the results.
383//
384// spec : Specify the benchmarks to run. If users do not specify this arg,
385// then the value of FLAGS_benchmark_filter
386// will be used.
387//
388// The second and third overload use the specified 'display_reporter' and
389// 'file_reporter' respectively. 'file_reporter' will write to the file
390// specified
391// by '--benchmark_out'. If '--benchmark_out' is not given the
392// 'file_reporter' is ignored.
393//
394// RETURNS: The number of matching benchmarks.
395BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks();
396BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec);
397
398BENCHMARK_EXPORT size_t
399RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
400BENCHMARK_EXPORT size_t
401RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec);
402
403BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(
404 BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter);
405BENCHMARK_EXPORT size_t
406RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
407 BenchmarkReporter* file_reporter, std::string spec);
408
409// TimeUnit is passed to a benchmark in order to specify the order of magnitude
410// for the measured time.
411enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
412
413BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit();
414
415// Sets the default time unit the benchmarks use
416// Has to be called before the benchmark loop to take effect
417BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit);
418
419// If a MemoryManager is registered (via RegisterMemoryManager()),
420// it can be used to collect and report allocation metrics for a run of the
421// benchmark.
423 public:
424 static constexpr int64_t TombstoneValue = std::numeric_limits<int64_t>::max();
425
426 struct Result {
427 Result()
428 : num_allocs(0),
429 max_bytes_used(0),
430 total_allocated_bytes(TombstoneValue),
431 net_heap_growth(TombstoneValue),
432 memory_iterations(0) {}
433
434 // The number of allocations made in total between Start and Stop.
435 int64_t num_allocs;
436
437 // The peak memory use between Start and Stop.
438 int64_t max_bytes_used;
439
440 // The total memory allocated, in bytes, between Start and Stop.
441 // Init'ed to TombstoneValue if metric not available.
442 int64_t total_allocated_bytes;
443
444 // The net changes in memory, in bytes, between Start and Stop.
445 // ie., total_allocated_bytes - total_deallocated_bytes.
446 // Init'ed to TombstoneValue if metric not available.
447 int64_t net_heap_growth;
448
449 IterationCount memory_iterations;
450 };
451
452 virtual ~MemoryManager() {}
453
454 // Implement this to start recording allocation information.
455 virtual void Start() = 0;
456
457 // Implement this to stop recording and fill out the given Result structure.
458 virtual void Stop(Result& result) = 0;
459};
460
461// Register a MemoryManager instance that will be used to collect and report
462// allocation measurements for benchmark runs.
463BENCHMARK_EXPORT
464void RegisterMemoryManager(MemoryManager* memory_manager);
465
466// If a ProfilerManager is registered (via RegisterProfilerManager()), the
467// benchmark will be run an additional time under the profiler to collect and
468// report profile metrics for the run of the benchmark.
470 public:
471 virtual ~ProfilerManager() {}
472
473 // This is called after `Setup()` code and right before the benchmark is run.
474 virtual void AfterSetupStart() = 0;
475
476 // This is called before `Teardown()` code and right after the benchmark
477 // completes.
478 virtual void BeforeTeardownStop() = 0;
479};
480
481// Register a ProfilerManager instance that will be used to collect and report
482// profile measurements for benchmark runs.
483BENCHMARK_EXPORT
484void RegisterProfilerManager(ProfilerManager* profiler_manager);
485
486// Add a key-value pair to output as part of the context stanza in the report.
487BENCHMARK_EXPORT
488void AddCustomContext(std::string key, std::string value);
489
490class Benchmark;
491
492namespace internal {
493class BenchmarkImp;
494class BenchmarkFamilies;
495
496BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext();
497
498BENCHMARK_EXPORT
499void UseCharPointer(char const volatile*);
500
501// Take ownership of the pointer and register the benchmark. Return the
502// registered benchmark.
503BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(
504 std::unique_ptr<Benchmark>);
505
506// Ensure that the standard streams are properly initialized in every TU.
507BENCHMARK_EXPORT int InitializeStreams();
508BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
509
510} // namespace internal
511
512#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
513 defined(__EMSCRIPTEN__)
514#define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
515#endif
516
517// Force the compiler to flush pending writes to global memory. Acts as an
518// effective read/write barrier
519inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
520 std::atomic_signal_fence(std::memory_order_acq_rel);
521}
522
523// The DoNotOptimize(...) function can be used to prevent a value or
524// expression from being optimized away by the compiler. This function is
525// intended to add little to no overhead.
526// See: https://youtu.be/nXaxk27zwlk?t=2441
527#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
528#if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
529template <class Tp>
530BENCHMARK_DEPRECATED_MSG(
531 "The const-ref version of this method can permit "
532 "undesired compiler optimizations in benchmarks")
533inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
534 asm volatile("" : : "r,m"(value) : "memory");
535}
536
537template <class Tp>
538inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
539#if defined(__clang__)
540 asm volatile("" : "+r,m"(value) : : "memory");
541#else
542 asm volatile("" : "+m,r"(value) : : "memory");
543#endif
544}
545
546template <class Tp>
547inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
548#if defined(__clang__)
549 asm volatile("" : "+r,m"(value) : : "memory");
550#else
551 asm volatile("" : "+m,r"(value) : : "memory");
552#endif
553}
554// !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
555#elif (__GNUC__ >= 5)
556// Workaround for a bug with full argument copy overhead with GCC.
557// See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519
558template <class Tp>
559BENCHMARK_DEPRECATED_MSG(
560 "The const-ref version of this method can permit "
561 "undesired compiler optimizations in benchmarks")
562inline BENCHMARK_ALWAYS_INLINE
563 typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
564 (sizeof(Tp) <= sizeof(Tp*))>::type
565 DoNotOptimize(Tp const& value) {
566 asm volatile("" : : "r,m"(value) : "memory");
567}
568
569template <class Tp>
570BENCHMARK_DEPRECATED_MSG(
571 "The const-ref version of this method can permit "
572 "undesired compiler optimizations in benchmarks")
573inline BENCHMARK_ALWAYS_INLINE
574 typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
575 (sizeof(Tp) > sizeof(Tp*))>::type
576 DoNotOptimize(Tp const& value) {
577 asm volatile("" : : "m"(value) : "memory");
578}
579
580template <class Tp>
581inline BENCHMARK_ALWAYS_INLINE
582 typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
583 (sizeof(Tp) <= sizeof(Tp*))>::type
584 DoNotOptimize(Tp& value) {
585 asm volatile("" : "+m,r"(value) : : "memory");
586}
587
588template <class Tp>
589inline BENCHMARK_ALWAYS_INLINE
590 typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
591 (sizeof(Tp) > sizeof(Tp*))>::type
592 DoNotOptimize(Tp& value) {
593 asm volatile("" : "+m"(value) : : "memory");
594}
595
596template <class Tp>
597inline BENCHMARK_ALWAYS_INLINE
598 typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
599 (sizeof(Tp) <= sizeof(Tp*))>::type
600 DoNotOptimize(Tp&& value) {
601 asm volatile("" : "+m,r"(value) : : "memory");
602}
603
604template <class Tp>
605inline BENCHMARK_ALWAYS_INLINE
606 typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
607 (sizeof(Tp) > sizeof(Tp*))>::type
608 DoNotOptimize(Tp&& value) {
609 asm volatile("" : "+m"(value) : : "memory");
610}
611// !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
612#endif
613
614#elif defined(_MSC_VER)
615template <class Tp>
616BENCHMARK_DEPRECATED_MSG(
617 "The const-ref version of this method can permit "
618 "undesired compiler optimizations in benchmarks")
619inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
620 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
621 _ReadWriteBarrier();
622}
623
624template <class Tp>
625inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
626 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
627 _ReadWriteBarrier();
628}
629
630template <class Tp>
631inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
632 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
633 _ReadWriteBarrier();
634}
635#else
636template <class Tp>
637inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
638 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
639}
640// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
641#endif
642
643// This class is used for user-defined counters.
644class Counter {
645 public:
646 enum Flags {
647 kDefaults = 0,
648 // Mark the counter as a rate. It will be presented divided
649 // by the duration of the benchmark.
650 kIsRate = 1 << 0,
651 // Mark the counter as a thread-average quantity. It will be
652 // presented divided by the number of threads.
653 kAvgThreads = 1 << 1,
654 // Mark the counter as a thread-average rate. See above.
655 kAvgThreadsRate = kIsRate | kAvgThreads,
656 // Mark the counter as a constant value, valid/same for *every* iteration.
657 // When reporting, it will be *multiplied* by the iteration count.
658 kIsIterationInvariant = 1 << 2,
659 // Mark the counter as a constant rate.
660 // When reporting, it will be *multiplied* by the iteration count
661 // and then divided by the duration of the benchmark.
662 kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
663 // Mark the counter as a iteration-average quantity.
664 // It will be presented divided by the number of iterations.
665 kAvgIterations = 1 << 3,
666 // Mark the counter as a iteration-average rate. See above.
667 kAvgIterationsRate = kIsRate | kAvgIterations,
668
669 // In the end, invert the result. This is always done last!
670 kInvert = 1 << 31
671 };
672
673 enum OneK {
674 // 1'000 items per 1k
675 kIs1000 = 1000,
676 // 1'024 items per 1k
677 kIs1024 = 1024
678 };
679
680 double value;
681 Flags flags;
682 OneK oneK;
683
684 BENCHMARK_ALWAYS_INLINE
685 Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
686 : value(v), flags(f), oneK(k) {}
687
688 BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
689 BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
690};
691
692// A helper for user code to create unforeseen combinations of Flags, without
693// having to do this cast manually each time, or providing this operator.
694Counter::Flags inline operator|(const Counter::Flags& LHS,
695 const Counter::Flags& RHS) {
696 return static_cast<Counter::Flags>(static_cast<int>(LHS) |
697 static_cast<int>(RHS));
698}
699
700// This is the container for the user-defined counters.
701typedef std::map<std::string, Counter> UserCounters;
702
703// BigO is passed to a benchmark in order to specify the asymptotic
704// computational
705// complexity for the benchmark. In case oAuto is selected, complexity will be
706// calculated automatically to the best fit.
707enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
708
709typedef int64_t ComplexityN;
710
711enum StatisticUnit { kTime, kPercentage };
712
713// BigOFunc is passed to a benchmark in order to specify the asymptotic
714// computational complexity for the benchmark.
715typedef double(BigOFunc)(ComplexityN);
716
717// StatisticsFunc is passed to a benchmark in order to compute some descriptive
718// statistics over all the measurements of some type
719typedef double(StatisticsFunc)(const std::vector<double>&);
720
721namespace internal {
722struct Statistics {
723 std::string name_;
724 StatisticsFunc* compute_;
725 StatisticUnit unit_;
726
727 Statistics(const std::string& name, StatisticsFunc* compute,
728 StatisticUnit unit = kTime)
729 : name_(name), compute_(compute), unit_(unit) {}
730};
731
733class ThreadTimer;
734class ThreadManager;
736
737enum AggregationReportMode : unsigned {
738 // The mode has not been manually specified
739 ARM_Unspecified = 0,
740 // The mode is user-specified.
741 // This may or may not be set when the following bit-flags are set.
742 ARM_Default = 1U << 0U,
743 // File reporter should only output aggregates.
744 ARM_FileReportAggregatesOnly = 1U << 1U,
745 // Display reporter should only output aggregates
746 ARM_DisplayReportAggregatesOnly = 1U << 2U,
747 // Both reporters should only display aggregates.
748 ARM_ReportAggregatesOnly =
749 ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
750};
751
752enum Skipped : unsigned {
753 NotSkipped = 0,
754 SkippedWithMessage,
755 SkippedWithError
756};
757
758} // namespace internal
759
760#if defined(_MSC_VER)
761#pragma warning(push)
762// C4324: 'benchmark::State': structure was padded due to alignment specifier
763#pragma warning(disable : 4324)
764#endif // _MSC_VER_
765// State is passed to a running Benchmark and contains state for the
766// benchmark to use.
767class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State {
768 public:
769 struct StateIterator;
770 friend struct StateIterator;
771
772 // Returns iterators used to run each iteration of a benchmark using a
773 // C++11 ranged-based for loop. These functions should not be called directly.
774 //
775 // REQUIRES: The benchmark has not started running yet. Neither begin nor end
776 // have been called previously.
777 //
778 // NOTE: KeepRunning may not be used after calling either of these functions.
779 inline BENCHMARK_ALWAYS_INLINE StateIterator begin();
780 inline BENCHMARK_ALWAYS_INLINE StateIterator end();
781
782 // Returns true if the benchmark should continue through another iteration.
783 // NOTE: A benchmark may not return from the test until KeepRunning() has
784 // returned false.
785 inline bool KeepRunning();
786
787 // Returns true iff the benchmark should run n more iterations.
788 // REQUIRES: 'n' > 0.
789 // NOTE: A benchmark must not return from the test until KeepRunningBatch()
790 // has returned false.
791 // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
792 //
793 // Intended usage:
794 // while (state.KeepRunningBatch(1000)) {
795 // // process 1000 elements
796 // }
797 inline bool KeepRunningBatch(IterationCount n);
798
799 // REQUIRES: timer is running and 'SkipWithMessage(...)' or
800 // 'SkipWithError(...)' has not been called by the current thread.
801 // Stop the benchmark timer. If not called, the timer will be
802 // automatically stopped after the last iteration of the benchmark loop.
803 //
804 // For threaded benchmarks the PauseTiming() function only pauses the timing
805 // for the current thread.
806 //
807 // NOTE: The "real time" measurement is per-thread. If different threads
808 // report different measurements the largest one is reported.
809 //
810 // NOTE: PauseTiming()/ResumeTiming() are relatively
811 // heavyweight, and so their use should generally be avoided
812 // within each benchmark iteration, if possible.
813 void PauseTiming();
814
815 // REQUIRES: timer is not running and 'SkipWithMessage(...)' or
816 // 'SkipWithError(...)' has not been called by the current thread.
817 // Start the benchmark timer. The timer is NOT running on entrance to the
818 // benchmark function. It begins running after control flow enters the
819 // benchmark loop.
820 //
821 // NOTE: PauseTiming()/ResumeTiming() are relatively
822 // heavyweight, and so their use should generally be avoided
823 // within each benchmark iteration, if possible.
824 void ResumeTiming();
825
826 // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
827 // called previously by the current thread.
828 // Report the benchmark as resulting in being skipped with the specified
829 // 'msg'.
830 // After this call the user may explicitly 'return' from the benchmark.
831 //
832 // If the ranged-for style of benchmark loop is used, the user must explicitly
833 // break from the loop, otherwise all future iterations will be run.
834 // If the 'KeepRunning()' loop is used the current thread will automatically
835 // exit the loop at the end of the current iteration.
836 //
837 // For threaded benchmarks only the current thread stops executing and future
838 // calls to `KeepRunning()` will block until all threads have completed
839 // the `KeepRunning()` loop. If multiple threads report being skipped only the
840 // first skip message is used.
841 //
842 // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit
843 // the current scope immediately. If the function is called from within
844 // the 'KeepRunning()' loop the current iteration will finish. It is the users
845 // responsibility to exit the scope as needed.
846 void SkipWithMessage(const std::string& msg);
847
848 // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
849 // called previously by the current thread.
850 // Report the benchmark as resulting in an error with the specified 'msg'.
851 // After this call the user may explicitly 'return' from the benchmark.
852 //
853 // If the ranged-for style of benchmark loop is used, the user must explicitly
854 // break from the loop, otherwise all future iterations will be run.
855 // If the 'KeepRunning()' loop is used the current thread will automatically
856 // exit the loop at the end of the current iteration.
857 //
858 // For threaded benchmarks only the current thread stops executing and future
859 // calls to `KeepRunning()` will block until all threads have completed
860 // the `KeepRunning()` loop. If multiple threads report an error only the
861 // first error message is used.
862 //
863 // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
864 // the current scope immediately. If the function is called from within
865 // the 'KeepRunning()' loop the current iteration will finish. It is the users
866 // responsibility to exit the scope as needed.
867 void SkipWithError(const std::string& msg);
868
869 // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called.
870 bool skipped() const { return internal::NotSkipped != skipped_; }
871
872 // Returns true if an error has been reported with 'SkipWithError(...)'.
873 bool error_occurred() const { return internal::SkippedWithError == skipped_; }
874
875 // REQUIRES: called exactly once per iteration of the benchmarking loop.
876 // Set the manually measured time for this benchmark iteration, which
877 // is used instead of automatically measured time if UseManualTime() was
878 // specified.
879 //
880 // For threaded benchmarks the final value will be set to the largest
881 // reported values.
882 void SetIterationTime(double seconds);
883
884 // Set the number of bytes processed by the current benchmark
885 // execution. This routine is typically called once at the end of a
886 // throughput oriented benchmark.
887 //
888 // REQUIRES: a benchmark has exited its benchmarking loop.
889 BENCHMARK_ALWAYS_INLINE
890 void SetBytesProcessed(int64_t bytes) {
891 counters["bytes_per_second"] =
892 Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
893 }
894
895 BENCHMARK_ALWAYS_INLINE
896 int64_t bytes_processed() const {
897 if (counters.find("bytes_per_second") != counters.end())
898 return static_cast<int64_t>(counters.at("bytes_per_second"));
899 return 0;
900 }
901
902 // If this routine is called with complexity_n > 0 and complexity report is
903 // requested for the
904 // family benchmark, then current benchmark will be part of the computation
905 // and complexity_n will
906 // represent the length of N.
907 BENCHMARK_ALWAYS_INLINE
908 void SetComplexityN(ComplexityN complexity_n) {
909 complexity_n_ = complexity_n;
910 }
911
912 BENCHMARK_ALWAYS_INLINE
913 ComplexityN complexity_length_n() const { return complexity_n_; }
914
915 // If this routine is called with items > 0, then an items/s
916 // label is printed on the benchmark report line for the currently
917 // executing benchmark. It is typically called at the end of a processing
918 // benchmark where a processing items/second output is desired.
919 //
920 // REQUIRES: a benchmark has exited its benchmarking loop.
921 BENCHMARK_ALWAYS_INLINE
922 void SetItemsProcessed(int64_t items) {
923 counters["items_per_second"] =
924 Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
925 }
926
927 BENCHMARK_ALWAYS_INLINE
928 int64_t items_processed() const {
929 if (counters.find("items_per_second") != counters.end())
930 return static_cast<int64_t>(counters.at("items_per_second"));
931 return 0;
932 }
933
934 // If this routine is called, the specified label is printed at the
935 // end of the benchmark report line for the currently executing
936 // benchmark. Example:
937 // static void BM_Compress(benchmark::State& state) {
938 // ...
939 // double compress = input_size / output_size;
940 // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
941 // }
942 // Produces output that looks like:
943 // BM_Compress 50 50 14115038 compress:27.3%
944 //
945 // REQUIRES: a benchmark has exited its benchmarking loop.
946 void SetLabel(const std::string& label);
947
948 // Range arguments for this run. CHECKs if the argument has been set.
949 BENCHMARK_ALWAYS_INLINE
950 int64_t range(std::size_t pos = 0) const {
951 assert(range_.size() > pos);
952 return range_[pos];
953 }
954
955 BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
956 int64_t range_x() const { return range(0); }
957
958 BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
959 int64_t range_y() const { return range(1); }
960
961 // Number of threads concurrently executing the benchmark.
962 BENCHMARK_ALWAYS_INLINE
963 int threads() const { return threads_; }
964
965 // Index of the executing thread. Values from [0, threads).
966 BENCHMARK_ALWAYS_INLINE
967 int thread_index() const { return thread_index_; }
968
969 BENCHMARK_ALWAYS_INLINE
970 IterationCount iterations() const {
971 if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
972 return 0;
973 }
974 return max_iterations - total_iterations_ + batch_leftover_;
975 }
976
977 BENCHMARK_ALWAYS_INLINE
978 std::string name() const { return name_; }
979
980 size_t range_size() const { return range_.size(); }
981
982 private:
983 // items we expect on the first cache line (ie 64 bytes of the struct)
984 // When total_iterations_ is 0, KeepRunning() and friends will return false.
985 // May be larger than max_iterations.
986 IterationCount total_iterations_;
987
988 // When using KeepRunningBatch(), batch_leftover_ holds the number of
989 // iterations beyond max_iters that were run. Used to track
990 // completed_iterations_ accurately.
991 IterationCount batch_leftover_;
992
993 public:
994 const IterationCount max_iterations;
995
996 private:
997 bool started_;
998 bool finished_;
999 internal::Skipped skipped_;
1000
1001 // items we don't need on the first cache line
1002 std::vector<int64_t> range_;
1003
1004 ComplexityN complexity_n_;
1005
1006 public:
1007 // Container for user-defined counters.
1008 UserCounters counters;
1009
1010 private:
1011 State(std::string name, IterationCount max_iters,
1012 const std::vector<int64_t>& ranges, int thread_i, int n_threads,
1014 internal::PerfCountersMeasurement* perf_counters_measurement,
1015 ProfilerManager* profiler_manager);
1016
1017 void StartKeepRunning();
1018 // Implementation of KeepRunning() and KeepRunningBatch().
1019 // is_batch must be true unless n is 1.
1020 inline bool KeepRunningInternal(IterationCount n, bool is_batch);
1021 void FinishKeepRunning();
1022
1023 const std::string name_;
1024 const int thread_index_;
1025 const int threads_;
1026
1027 internal::ThreadTimer* const timer_;
1028 internal::ThreadManager* const manager_;
1029 internal::PerfCountersMeasurement* const perf_counters_measurement_;
1030 ProfilerManager* const profiler_manager_;
1031
1032 friend class internal::BenchmarkInstance;
1033};
1034#if defined(_MSC_VER)
1035#pragma warning(pop)
1036#endif // _MSC_VER_
1037
1038inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
1039 return KeepRunningInternal(1, /*is_batch=*/false);
1040}
1041
1042inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
1043 return KeepRunningInternal(n, /*is_batch=*/true);
1044}
1045
1046inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
1047 bool is_batch) {
1048 // total_iterations_ is set to 0 by the constructor, and always set to a
1049 // nonzero value by StartKepRunning().
1050 assert(n > 0);
1051 // n must be 1 unless is_batch is true.
1052 assert(is_batch || n == 1);
1053 if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
1054 total_iterations_ -= n;
1055 return true;
1056 }
1057 if (!started_) {
1058 StartKeepRunning();
1059 if (!skipped() && total_iterations_ >= n) {
1060 total_iterations_ -= n;
1061 return true;
1062 }
1063 }
1064 // For non-batch runs, total_iterations_ must be 0 by now.
1065 if (is_batch && total_iterations_ != 0) {
1066 batch_leftover_ = n - total_iterations_;
1067 total_iterations_ = 0;
1068 return true;
1069 }
1070 FinishKeepRunning();
1071 return false;
1072}
1073
1075 struct BENCHMARK_UNUSED Value {};
1076 typedef std::forward_iterator_tag iterator_category;
1077 typedef Value value_type;
1078 typedef Value reference;
1079 typedef Value pointer;
1080 typedef std::ptrdiff_t difference_type;
1081
1082 private:
1083 friend class State;
1084 BENCHMARK_ALWAYS_INLINE
1085 StateIterator() : cached_(0), parent_() {}
1086
1087 BENCHMARK_ALWAYS_INLINE
1088 explicit StateIterator(State* st)
1089 : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {}
1090
1091 public:
1092 BENCHMARK_ALWAYS_INLINE
1093 Value operator*() const { return Value(); }
1094
1095 BENCHMARK_ALWAYS_INLINE
1096 StateIterator& operator++() {
1097 assert(cached_ > 0);
1098 --cached_;
1099 return *this;
1100 }
1101
1102 BENCHMARK_ALWAYS_INLINE
1103 bool operator!=(StateIterator const&) const {
1104 if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
1105 parent_->FinishKeepRunning();
1106 return false;
1107 }
1108
1109 private:
1110 IterationCount cached_;
1111 State* const parent_;
1112};
1113
1114inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
1115 return StateIterator(this);
1116}
1117inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
1118 StartKeepRunning();
1119 return StateIterator();
1120}
1121
1122// Base class for user-defined multi-threading
1124 virtual ~ThreadRunnerBase() {}
1125 virtual void RunThreads(const std::function<void(int)>& fn) = 0;
1126};
1127
1128// Define alias of ThreadRunner factory function type
1129using threadrunner_factory =
1130 std::function<std::unique_ptr<ThreadRunnerBase>(int)>;
1131
1132// ------------------------------------------------------
1133// Benchmark registration object. The BENCHMARK() macro expands into a
1134// Benchmark* object. Various methods can be called on this object to
1135// change the properties of the benchmark. Each method returns "this" so
1136// that multiple method calls can chained into one expression.
1137class BENCHMARK_EXPORT Benchmark {
1138 public:
1139 virtual ~Benchmark();
1140
1141 // Note: the following methods all return "this" so that multiple
1142 // method calls can be chained together in one expression.
1143
1144 // Specify the name of the benchmark
1145 Benchmark* Name(const std::string& name);
1146
1147 // Run this benchmark once with "x" as the extra argument passed
1148 // to the function.
1149 // REQUIRES: The function passed to the constructor must accept an arg1.
1150 Benchmark* Arg(int64_t x);
1151
1152 // Run this benchmark with the given time unit for the generated output report
1153 Benchmark* Unit(TimeUnit unit);
1154
1155 // Run this benchmark once for a number of values picked from the
1156 // range [start..limit]. (start and limit are always picked.)
1157 // REQUIRES: The function passed to the constructor must accept an arg1.
1158 Benchmark* Range(int64_t start, int64_t limit);
1159
1160 // Run this benchmark once for all values in the range [start..limit] with
1161 // specific step
1162 // REQUIRES: The function passed to the constructor must accept an arg1.
1163 Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
1164
1165 // Run this benchmark once with "args" as the extra arguments passed
1166 // to the function.
1167 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1168 Benchmark* Args(const std::vector<int64_t>& args);
1169
1170 // Equivalent to Args({x, y})
1171 // NOTE: This is a legacy C++03 interface provided for compatibility only.
1172 // New code should use 'Args'.
1173 Benchmark* ArgPair(int64_t x, int64_t y) {
1174 std::vector<int64_t> args;
1175 args.push_back(x);
1176 args.push_back(y);
1177 return Args(args);
1178 }
1179
1180 // Run this benchmark once for a number of values picked from the
1181 // ranges [start..limit]. (starts and limits are always picked.)
1182 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1183 Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t>>& ranges);
1184
1185 // Run this benchmark once for each combination of values in the (cartesian)
1186 // product of the supplied argument lists.
1187 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1188 Benchmark* ArgsProduct(const std::vector<std::vector<int64_t>>& arglists);
1189
1190 // Equivalent to ArgNames({name})
1191 Benchmark* ArgName(const std::string& name);
1192
1193 // Set the argument names to display in the benchmark name. If not called,
1194 // only argument values will be shown.
1195 Benchmark* ArgNames(const std::vector<std::string>& names);
1196
1197 // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
1198 // NOTE: This is a legacy C++03 interface provided for compatibility only.
1199 // New code should use 'Ranges'.
1200 Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
1201 std::vector<std::pair<int64_t, int64_t>> ranges;
1202 ranges.push_back(std::make_pair(lo1, hi1));
1203 ranges.push_back(std::make_pair(lo2, hi2));
1204 return Ranges(ranges);
1205 }
1206
1207 // Have "setup" and/or "teardown" invoked once for every benchmark run.
1208 // If the benchmark is multi-threaded (will run in k threads concurrently),
1209 // the setup callback will be be invoked exactly once (not k times) before
1210 // each run with k threads. Time allowing (e.g. for a short benchmark), there
1211 // may be multiple such runs per benchmark, each run with its own
1212 // "setup"/"teardown".
1213 //
1214 // If the benchmark uses different size groups of threads (e.g. via
1215 // ThreadRange), the above will be true for each size group.
1216 //
1217 // The callback will be passed a State object, which includes the number
1218 // of threads, thread-index, benchmark arguments, etc.
1219 Benchmark* Setup(callback_function&&);
1220 Benchmark* Setup(const callback_function&);
1221 Benchmark* Teardown(callback_function&&);
1222 Benchmark* Teardown(const callback_function&);
1223
1224 // Pass this benchmark object to *func, which can customize
1225 // the benchmark by calling various methods like Arg, Args,
1226 // Threads, etc.
1227 Benchmark* Apply(const std::function<void(Benchmark* benchmark)>&);
1228
1229 // Set the range multiplier for non-dense range. If not called, the range
1230 // multiplier kRangeMultiplier will be used.
1231 Benchmark* RangeMultiplier(int multiplier);
1232
1233 // Set the minimum amount of time to use when running this benchmark. This
1234 // option overrides the `benchmark_min_time` flag.
1235 // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
1236 Benchmark* MinTime(double t);
1237
1238 // Set the minimum amount of time to run the benchmark before taking runtimes
1239 // of this benchmark into account. This
1240 // option overrides the `benchmark_min_warmup_time` flag.
1241 // REQUIRES: `t >= 0` and `Iterations` has not been called on this benchmark.
1242 Benchmark* MinWarmUpTime(double t);
1243
1244 // Specify the amount of iterations that should be run by this benchmark.
1245 // This option overrides the `benchmark_min_time` flag.
1246 // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
1247 //
1248 // NOTE: This function should only be used when *exact* iteration control is
1249 // needed and never to control or limit how long a benchmark runs, where
1250 // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead.
1251 Benchmark* Iterations(IterationCount n);
1252
1253 // Specify the amount of times to repeat this benchmark. This option overrides
1254 // the `benchmark_repetitions` flag.
1255 // REQUIRES: `n > 0`
1256 Benchmark* Repetitions(int n);
1257
1258 // Specify if each repetition of the benchmark should be reported separately
1259 // or if only the final statistics should be reported. If the benchmark
1260 // is not repeated then the single result is always reported.
1261 // Applies to *ALL* reporters (display and file).
1262 Benchmark* ReportAggregatesOnly(bool value = true);
1263
1264 // Same as ReportAggregatesOnly(), but applies to display reporter only.
1265 Benchmark* DisplayAggregatesOnly(bool value = true);
1266
1267 // By default, the CPU time is measured only for the main thread, which may
1268 // be unrepresentative if the benchmark uses threads internally. If called,
1269 // the total CPU time spent by all the threads will be measured instead.
1270 // By default, only the main thread CPU time will be measured.
1271 Benchmark* MeasureProcessCPUTime();
1272
1273 // If a particular benchmark should use the Wall clock instead of the CPU time
1274 // (be it either the CPU time of the main thread only (default), or the
1275 // total CPU usage of the benchmark), call this method. If called, the elapsed
1276 // (wall) time will be used to control how many iterations are run, and in the
1277 // printing of items/second or MB/seconds values.
1278 // If not called, the CPU time used by the benchmark will be used.
1279 Benchmark* UseRealTime();
1280
1281 // If a benchmark must measure time manually (e.g. if GPU execution time is
1282 // being
1283 // measured), call this method. If called, each benchmark iteration should
1284 // call
1285 // SetIterationTime(seconds) to report the measured time, which will be used
1286 // to control how many iterations are run, and in the printing of items/second
1287 // or MB/second values.
1288 Benchmark* UseManualTime();
1289
1290 // Set the asymptotic computational complexity for the benchmark. If called
1291 // the asymptotic computational complexity will be shown on the output.
1292 Benchmark* Complexity(BigO complexity = benchmark::oAuto);
1293
1294 // Set the asymptotic computational complexity for the benchmark. If called
1295 // the asymptotic computational complexity will be shown on the output.
1296 Benchmark* Complexity(BigOFunc* complexity);
1297
1298 // Add this statistics to be computed over all the values of benchmark run
1299 Benchmark* ComputeStatistics(const std::string& name,
1300 StatisticsFunc* statistics,
1301 StatisticUnit unit = kTime);
1302
1303 // Support for running multiple copies of the same benchmark concurrently
1304 // in multiple threads. This may be useful when measuring the scaling
1305 // of some piece of code.
1306
1307 // Run one instance of this benchmark concurrently in t threads.
1308 Benchmark* Threads(int t);
1309
1310 // Pick a set of values T from [min_threads,max_threads].
1311 // min_threads and max_threads are always included in T. Run this
1312 // benchmark once for each value in T. The benchmark run for a
1313 // particular value t consists of t threads running the benchmark
1314 // function concurrently. For example, consider:
1315 // BENCHMARK(Foo)->ThreadRange(1,16);
1316 // This will run the following benchmarks:
1317 // Foo in 1 thread
1318 // Foo in 2 threads
1319 // Foo in 4 threads
1320 // Foo in 8 threads
1321 // Foo in 16 threads
1322 Benchmark* ThreadRange(int min_threads, int max_threads);
1323
1324 // For each value n in the range, run this benchmark once using n threads.
1325 // min_threads and max_threads are always included in the range.
1326 // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
1327 // a benchmark with 1, 4, 7 and 8 threads.
1328 Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
1329
1330 // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
1331 Benchmark* ThreadPerCpu();
1332
1333 // Sets a user-defined threadrunner (see ThreadRunnerBase)
1334 Benchmark* ThreadRunner(threadrunner_factory&& factory);
1335
1336 virtual void Run(State& state) = 0;
1337
1338 TimeUnit GetTimeUnit() const;
1339
1340 protected:
1341 explicit Benchmark(const std::string& name);
1342 void SetName(const std::string& name);
1343
1344 public:
1345 const char* GetName() const;
1346 int ArgsCnt() const;
1347 const char* GetArgName(int arg) const;
1348
1349 private:
1350 friend class internal::BenchmarkFamilies;
1351 friend class internal::BenchmarkInstance;
1352
1353 std::string name_;
1354 internal::AggregationReportMode aggregation_report_mode_;
1355 std::vector<std::string> arg_names_; // Args for all benchmark runs
1356 std::vector<std::vector<int64_t>> args_; // Args for all benchmark runs
1357
1358 TimeUnit time_unit_;
1359 bool use_default_time_unit_;
1360
1361 int range_multiplier_;
1362 double min_time_;
1363 double min_warmup_time_;
1364 IterationCount iterations_;
1365 int repetitions_;
1366 bool measure_process_cpu_time_;
1367 bool use_real_time_;
1368 bool use_manual_time_;
1369 BigO complexity_;
1370 BigOFunc* complexity_lambda_;
1371 std::vector<internal::Statistics> statistics_;
1372 std::vector<int> thread_counts_;
1373
1374 callback_function setup_;
1375 callback_function teardown_;
1376
1377 threadrunner_factory threadrunner_;
1378
1379 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(Benchmark);
1380};
1381
1382namespace internal {
1383
1384// clang-format off
1385typedef BENCHMARK_DEPRECATED_MSG("Use ::benchmark::Benchmark instead")
1386 ::benchmark::Benchmark Benchmark;
1387typedef BENCHMARK_DEPRECATED_MSG(
1388 "Use ::benchmark::threadrunner_factory instead")
1389 ::benchmark::threadrunner_factory threadrunner_factory;
1390// clang-format on
1391
1392typedef void(Function)(State&);
1393
1394} // namespace internal
1395
1396// Create and register a benchmark with the specified 'name' that invokes
1397// the specified functor 'fn'.
1398//
1399// RETURNS: A pointer to the registered benchmark.
1400Benchmark* RegisterBenchmark(const std::string& name, internal::Function* fn);
1401
1402template <class Lambda>
1403Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn);
1404
1405// Remove all registered benchmarks. All pointers to previously registered
1406// benchmarks are invalidated.
1407BENCHMARK_EXPORT void ClearRegisteredBenchmarks();
1408
1409namespace internal {
1410// The class used to hold all Benchmarks created from static function.
1411// (ie those created using the BENCHMARK(...) macros.
1412class BENCHMARK_EXPORT FunctionBenchmark : public benchmark::Benchmark {
1413 public:
1414 FunctionBenchmark(const std::string& name, Function* func)
1415 : Benchmark(name), func_(func) {}
1416
1417 void Run(State& st) override;
1418
1419 private:
1420 Function* func_;
1421};
1422
1423template <class Lambda>
1424class LambdaBenchmark : public benchmark::Benchmark {
1425 public:
1426 void Run(State& st) override { lambda_(st); }
1427
1428 template <class OLambda>
1429 LambdaBenchmark(const std::string& name, OLambda&& lam)
1430 : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1431
1432 private:
1433 LambdaBenchmark(LambdaBenchmark const&) = delete;
1434 Lambda lambda_;
1435};
1436} // namespace internal
1437
1438inline Benchmark* RegisterBenchmark(const std::string& name,
1439 internal::Function* fn) {
1440 return internal::RegisterBenchmarkInternal(
1441 ::benchmark::internal::make_unique<internal::FunctionBenchmark>(name,
1442 fn));
1443}
1444
1445template <class Lambda>
1446Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) {
1447 using BenchType =
1448 internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1449 return internal::RegisterBenchmarkInternal(
1450 ::benchmark::internal::make_unique<BenchType>(name,
1451 std::forward<Lambda>(fn)));
1452}
1453
1454template <class Lambda, class... Args>
1455Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn,
1456 Args&&... args) {
1457 return benchmark::RegisterBenchmark(
1458 name, [=](benchmark::State& st) { fn(st, args...); });
1459}
1460
1461// The base class for all fixture tests.
1462class Fixture : public Benchmark {
1463 public:
1464 Fixture() : Benchmark("") {}
1465
1466 void Run(State& st) override {
1467 this->SetUp(st);
1468 this->BenchmarkCase(st);
1469 this->TearDown(st);
1470 }
1471
1472 // These will be deprecated ...
1473 virtual void SetUp(const State&) {}
1474 virtual void TearDown(const State&) {}
1475 // ... In favor of these.
1476 virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
1477 virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1478
1479 protected:
1480 virtual void BenchmarkCase(State&) = 0;
1481};
1482} // namespace benchmark
1483
1484// ------------------------------------------------------
1485// Macro to register benchmarks
1486
1487// clang-format off
1488#if defined(__clang__)
1489#define BENCHMARK_DISABLE_COUNTER_WARNING \
1490 _Pragma("GCC diagnostic push") \
1491 _Pragma("GCC diagnostic ignored \"-Wunknown-warning-option\"") \
1492 _Pragma("GCC diagnostic ignored \"-Wc2y-extensions\"")
1493#define BENCHMARK_RESTORE_COUNTER_WARNING _Pragma("GCC diagnostic pop")
1494#else
1495#define BENCHMARK_DISABLE_COUNTER_WARNING
1496#define BENCHMARK_RESTORE_COUNTER_WARNING
1497#endif
1498// clang-format on
1499
1500// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1501// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1502// empty. If X is empty the expression becomes (+1 == +0).
1503BENCHMARK_DISABLE_COUNTER_WARNING
1504#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1505#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1506#else
1507#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1508#endif
1509BENCHMARK_RESTORE_COUNTER_WARNING
1510
1511// Helpers for generating unique variable names
1512#define BENCHMARK_PRIVATE_NAME(...) \
1513 BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \
1514 __VA_ARGS__)
1515
1516#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1517#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1518// Helper for concatenation with macro name expansion
1519#define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
1520 BaseClass##_##Method##_Benchmark
1521
1522#define BENCHMARK_PRIVATE_DECLARE(n) \
1523 BENCHMARK_DISABLE_COUNTER_WARNING \
1524 /* NOLINTNEXTLINE(misc-use-anonymous-namespace) */ \
1525 static ::benchmark::Benchmark const* const BENCHMARK_PRIVATE_NAME(n) \
1526 BENCHMARK_RESTORE_COUNTER_WARNING BENCHMARK_UNUSED
1527
1528#define BENCHMARK(...) \
1529 BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1530 (::benchmark::internal::RegisterBenchmarkInternal( \
1531 ::benchmark::internal::make_unique< \
1532 ::benchmark::internal::FunctionBenchmark>( \
1533 #__VA_ARGS__, \
1534 static_cast<::benchmark::internal::Function*>(__VA_ARGS__))))
1535
1536// Old-style macros
1537#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1538#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1539#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1540#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1541#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1542 BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1543
1544// Register a benchmark which invokes the function specified by `func`
1545// with the additional arguments specified by `...`.
1546//
1547// For example:
1548//
1549// template <class ...ExtraArgs>`
1550// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1551// [...]
1552//}
1553// /* Registers a benchmark named "BM_takes_args/int_string_test` */
1554// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1555#define BENCHMARK_CAPTURE(func, test_case_name, ...) \
1556 BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1557 (::benchmark::internal::RegisterBenchmarkInternal( \
1558 ::benchmark::internal::make_unique< \
1559 ::benchmark::internal::FunctionBenchmark>( \
1560 #func "/" #test_case_name, \
1561 [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1562
1563// This will register a benchmark for a templatized function. For example:
1564//
1565// template<int arg>
1566// void BM_Foo(int iters);
1567//
1568// BENCHMARK_TEMPLATE(BM_Foo, 1);
1569//
1570// will register BM_Foo<1> as a benchmark.
1571#define BENCHMARK_TEMPLATE1(n, a) \
1572 BENCHMARK_PRIVATE_DECLARE(n) = \
1573 (::benchmark::internal::RegisterBenchmarkInternal( \
1574 ::benchmark::internal::make_unique< \
1575 ::benchmark::internal::FunctionBenchmark>( \
1576 #n "<" #a ">", \
1577 static_cast<::benchmark::internal::Function*>(n<a>))))
1578
1579#define BENCHMARK_TEMPLATE2(n, a, b) \
1580 BENCHMARK_PRIVATE_DECLARE(n) = \
1581 (::benchmark::internal::RegisterBenchmarkInternal( \
1582 ::benchmark::internal::make_unique< \
1583 ::benchmark::internal::FunctionBenchmark>( \
1584 #n "<" #a "," #b ">", \
1585 static_cast<::benchmark::internal::Function*>(n<a, b>))))
1586
1587#define BENCHMARK_TEMPLATE(n, ...) \
1588 BENCHMARK_PRIVATE_DECLARE(n) = \
1589 (::benchmark::internal::RegisterBenchmarkInternal( \
1590 ::benchmark::internal::make_unique< \
1591 ::benchmark::internal::FunctionBenchmark>( \
1592 #n "<" #__VA_ARGS__ ">", \
1593 static_cast<::benchmark::internal::Function*>(n<__VA_ARGS__>))))
1594
1595// This will register a benchmark for a templatized function,
1596// with the additional arguments specified by `...`.
1597//
1598// For example:
1599//
1600// template <typename T, class ...ExtraArgs>`
1601// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1602// [...]
1603//}
1604// /* Registers a benchmark named "BM_takes_args<void>/int_string_test` */
1605// BENCHMARK_TEMPLATE1_CAPTURE(BM_takes_args, void, int_string_test, 42,
1606// std::string("abc"));
1607#define BENCHMARK_TEMPLATE1_CAPTURE(func, a, test_case_name, ...) \
1608 BENCHMARK_CAPTURE(func<a>, test_case_name, __VA_ARGS__)
1609
1610#define BENCHMARK_TEMPLATE2_CAPTURE(func, a, b, test_case_name, ...) \
1611 BENCHMARK_PRIVATE_DECLARE(func) = \
1612 (::benchmark::internal::RegisterBenchmarkInternal( \
1613 ::benchmark::internal::make_unique< \
1614 ::benchmark::internal::FunctionBenchmark>( \
1615 #func "<" #a "," #b ">" \
1616 "/" #test_case_name, \
1617 [](::benchmark::State& st) { func<a, b>(st, __VA_ARGS__); })))
1618
1619#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1620 class BaseClass##_##Method##_Benchmark : public BaseClass { \
1621 public: \
1622 BaseClass##_##Method##_Benchmark() { \
1623 this->SetName(#BaseClass "/" #Method); \
1624 } \
1625 \
1626 protected: \
1627 void BenchmarkCase(::benchmark::State&) override; \
1628 };
1629
1630#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1631 class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
1632 public: \
1633 BaseClass##_##Method##_Benchmark() { \
1634 this->SetName(#BaseClass "<" #a ">/" #Method); \
1635 } \
1636 \
1637 protected: \
1638 void BenchmarkCase(::benchmark::State&) override; \
1639 };
1640
1641#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1642 class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
1643 public: \
1644 BaseClass##_##Method##_Benchmark() { \
1645 this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
1646 } \
1647 \
1648 protected: \
1649 void BenchmarkCase(::benchmark::State&) override; \
1650 };
1651
1652#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
1653 class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1654 public: \
1655 BaseClass##_##Method##_Benchmark() { \
1656 this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
1657 } \
1658 \
1659 protected: \
1660 void BenchmarkCase(::benchmark::State&) override; \
1661 };
1662
1663#define BENCHMARK_DEFINE_F(BaseClass, Method) \
1664 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1665 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1666
1667#define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \
1668 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1669 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1670
1671#define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \
1672 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1673 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1674
1675#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \
1676 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1677 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1678
1679#define BENCHMARK_REGISTER_F(BaseClass, Method) \
1680 BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))
1681
1682#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1683 BENCHMARK_PRIVATE_DECLARE(TestName) = \
1684 (::benchmark::internal::RegisterBenchmarkInternal( \
1685 ::benchmark::internal::make_unique<TestName>()))
1686
1687#define BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(BaseClass, Method) \
1688 BaseClass##_##Method##_BenchmarkTemplate
1689
1690#define BENCHMARK_TEMPLATE_METHOD_F(BaseClass, Method) \
1691 template <class... Args> \
1692 class BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(BaseClass, Method) \
1693 : public BaseClass<Args...> { \
1694 protected: \
1695 using Base = BaseClass<Args...>; \
1696 void BenchmarkCase(::benchmark::State&) override; \
1697 }; \
1698 template <class... Args> \
1699 void BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F( \
1700 BaseClass, Method)<Args...>::BenchmarkCase
1701
1702#define BENCHMARK_TEMPLATE_PRIVATE_INSTANTIATE_F(BaseClass, Method, \
1703 UniqueName, ...) \
1704 class UniqueName : public BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F( \
1705 BaseClass, Method)<__VA_ARGS__> { \
1706 public: \
1707 UniqueName() { this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); } \
1708 }; \
1709 BENCHMARK_PRIVATE_DECLARE(BaseClass##_##Method##_Benchmark) = \
1710 (::benchmark::internal::RegisterBenchmarkInternal( \
1711 ::benchmark::internal::make_unique<UniqueName>()))
1712
1713#define BENCHMARK_TEMPLATE_INSTANTIATE_F(BaseClass, Method, ...) \
1714 BENCHMARK_DISABLE_COUNTER_WARNING \
1715 BENCHMARK_TEMPLATE_PRIVATE_INSTANTIATE_F( \
1716 BaseClass, Method, BENCHMARK_PRIVATE_NAME(BaseClass##Method), \
1717 __VA_ARGS__) \
1718 BENCHMARK_RESTORE_COUNTER_WARNING
1719
1720// This macro will define and register a benchmark within a fixture class.
1721#define BENCHMARK_F(BaseClass, Method) \
1722 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1723 BENCHMARK_REGISTER_F(BaseClass, Method); \
1724 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1725
1726#define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \
1727 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1728 BENCHMARK_REGISTER_F(BaseClass, Method); \
1729 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1730
1731#define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \
1732 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1733 BENCHMARK_REGISTER_F(BaseClass, Method); \
1734 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1735
1736#define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \
1737 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1738 BENCHMARK_REGISTER_F(BaseClass, Method); \
1739 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1740
1741// Helper macro to create a main routine in a test that runs the benchmarks
1742// Note the workaround for Hexagon simulator passing argc != 0, argv = NULL.
1743#define BENCHMARK_MAIN() \
1744 int main(int argc, char** argv) { \
1745 benchmark::MaybeReenterWithoutASLR(argc, argv); \
1746 char arg0_default[] = "benchmark"; \
1747 char* args_default = reinterpret_cast<char*>(arg0_default); \
1748 if (!argv) { \
1749 argc = 1; \
1750 argv = &args_default; \
1751 } \
1752 ::benchmark::Initialize(&argc, argv); \
1753 if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1754 ::benchmark::RunSpecifiedBenchmarks(); \
1755 ::benchmark::Shutdown(); \
1756 return 0; \
1757 } \
1758 int main(int, char**)
1759
1760// ------------------------------------------------------
1761// Benchmark Reporters
1762
1763namespace benchmark {
1764
1765struct BENCHMARK_EXPORT CPUInfo {
1766 struct CacheInfo {
1767 std::string type;
1768 int level;
1769 int size;
1770 int num_sharing;
1771 };
1772
1773 enum Scaling { UNKNOWN, ENABLED, DISABLED };
1774
1775 int num_cpus;
1776 Scaling scaling;
1777 double cycles_per_second;
1778 std::vector<CacheInfo> caches;
1779 std::vector<double> load_avg;
1780
1781 static const CPUInfo& Get();
1782
1783 private:
1784 CPUInfo();
1785 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1786};
1787
1788// Adding Struct for System Information
1789struct BENCHMARK_EXPORT SystemInfo {
1790 enum class ASLR { UNKNOWN, ENABLED, DISABLED };
1791
1792 std::string name;
1793 ASLR ASLRStatus;
1794 static const SystemInfo& Get();
1795
1796 private:
1797 SystemInfo();
1798 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1799};
1800
1801// BenchmarkName contains the components of the Benchmark's name
1802// which allows individual fields to be modified or cleared before
1803// building the final name using 'str()'.
1804struct BENCHMARK_EXPORT BenchmarkName {
1805 std::string function_name;
1806 std::string args;
1807 std::string min_time;
1808 std::string min_warmup_time;
1809 std::string iterations;
1810 std::string repetitions;
1811 std::string time_type;
1812 std::string threads;
1813
1814 // Return the full name of the benchmark with each non-empty
1815 // field separated by a '/'
1816 std::string str() const;
1817};
1818
1819// Interface for custom benchmark result printers.
1820// By default, benchmark reports are printed to stdout. However an application
1821// can control the destination of the reports by calling
1822// RunSpecifiedBenchmarks and passing it a custom reporter object.
1823// The reporter object must implement the following interface.
1824class BENCHMARK_EXPORT BenchmarkReporter {
1825 public:
1826 struct Context {
1827 CPUInfo const& cpu_info;
1828 SystemInfo const& sys_info;
1829 // The number of chars in the longest benchmark name.
1830 size_t name_field_width = 0;
1831 static const char* executable_name;
1832 Context();
1833 };
1834
1835 struct BENCHMARK_EXPORT Run {
1836 static const int64_t no_repetition_index = -1;
1837 enum RunType { RT_Iteration, RT_Aggregate };
1838
1839 Run()
1840 : run_type(RT_Iteration),
1841 aggregate_unit(kTime),
1842 skipped(internal::NotSkipped),
1843 iterations(1),
1844 threads(1),
1845 time_unit(GetDefaultTimeUnit()),
1846 real_accumulated_time(0),
1847 cpu_accumulated_time(0),
1848 max_heapbytes_used(0),
1849 use_real_time_for_initial_big_o(false),
1850 complexity(oNone),
1851 complexity_lambda(),
1852 complexity_n(0),
1853 statistics(),
1854 report_big_o(false),
1855 report_rms(false),
1856 allocs_per_iter(0.0) {}
1857
1858 std::string benchmark_name() const;
1859 BenchmarkName run_name;
1860 int64_t family_index;
1861 int64_t per_family_instance_index;
1862 RunType run_type;
1863 std::string aggregate_name;
1864 StatisticUnit aggregate_unit;
1865 std::string report_label; // Empty if not set by benchmark.
1866 internal::Skipped skipped;
1867 std::string skip_message;
1868
1869 IterationCount iterations;
1870 int64_t threads;
1871 int64_t repetition_index;
1872 int64_t repetitions;
1873 TimeUnit time_unit;
1874 double real_accumulated_time;
1875 double cpu_accumulated_time;
1876
1877 // Return a value representing the real time per iteration in the unit
1878 // specified by 'time_unit'.
1879 // NOTE: If 'iterations' is zero the returned value represents the
1880 // accumulated time.
1881 double GetAdjustedRealTime() const;
1882
1883 // Return a value representing the cpu time per iteration in the unit
1884 // specified by 'time_unit'.
1885 // NOTE: If 'iterations' is zero the returned value represents the
1886 // accumulated time.
1887 double GetAdjustedCPUTime() const;
1888
1889 // This is set to 0.0 if memory tracing is not enabled.
1890 double max_heapbytes_used;
1891
1892 // By default Big-O is computed for CPU time, but that is not what you want
1893 // to happen when manual time was requested, which is stored as real time.
1894 bool use_real_time_for_initial_big_o;
1895
1896 // Keep track of arguments to compute asymptotic complexity
1897 BigO complexity;
1898 BigOFunc* complexity_lambda;
1899 ComplexityN complexity_n;
1900
1901 // what statistics to compute from the measurements
1902 const std::vector<internal::Statistics>* statistics;
1903
1904 // Inform print function whether the current run is a complexity report
1905 bool report_big_o;
1906 bool report_rms;
1907
1908 UserCounters counters;
1909
1910 // Memory metrics.
1911 MemoryManager::Result memory_result;
1912 double allocs_per_iter;
1913 };
1914
1915 struct PerFamilyRunReports {
1916 PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1917
1918 // How many runs will all instances of this benchmark perform?
1919 int num_runs_total;
1920
1921 // How many runs have happened already?
1922 int num_runs_done;
1923
1924 // The reports about (non-errneous!) runs of this family.
1925 std::vector<BenchmarkReporter::Run> Runs;
1926 };
1927
1928 // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1929 // and the error stream set to 'std::cerr'
1930 BenchmarkReporter();
1931
1932 // Called once for every suite of benchmarks run.
1933 // The parameter "context" contains information that the
1934 // reporter may wish to use when generating its report, for example the
1935 // platform under which the benchmarks are running. The benchmark run is
1936 // never started if this function returns false, allowing the reporter
1937 // to skip runs based on the context information.
1938 virtual bool ReportContext(const Context& context) = 0;
1939
1940 // Called once for each group of benchmark runs, gives information about
1941 // the configurations of the runs.
1942 virtual void ReportRunsConfig(double /*min_time*/,
1943 bool /*has_explicit_iters*/,
1944 IterationCount /*iters*/) {}
1945
1946 // Called once for each group of benchmark runs, gives information about
1947 // cpu-time and heap memory usage during the benchmark run. If the group
1948 // of runs contained more than two entries then 'report' contains additional
1949 // elements representing the mean and standard deviation of those runs.
1950 // Additionally if this group of runs was the last in a family of benchmarks
1951 // 'reports' contains additional entries representing the asymptotic
1952 // complexity and RMS of that benchmark family.
1953 virtual void ReportRuns(const std::vector<Run>& report) = 0;
1954
1955 // Called once and only once after ever group of benchmarks is run and
1956 // reported.
1957 virtual void Finalize() {}
1958
1959 // REQUIRES: The object referenced by 'out' is valid for the lifetime
1960 // of the reporter.
1961 void SetOutputStream(std::ostream* out) {
1962 assert(out);
1963 output_stream_ = out;
1964 }
1965
1966 // REQUIRES: The object referenced by 'err' is valid for the lifetime
1967 // of the reporter.
1968 void SetErrorStream(std::ostream* err) {
1969 assert(err);
1970 error_stream_ = err;
1971 }
1972
1973 std::ostream& GetOutputStream() const { return *output_stream_; }
1974
1975 std::ostream& GetErrorStream() const { return *error_stream_; }
1976
1977 virtual ~BenchmarkReporter();
1978
1979 // Write a human readable string to 'out' representing the specified
1980 // 'context'.
1981 // REQUIRES: 'out' is non-null.
1982 static void PrintBasicContext(std::ostream* out, Context const& context);
1983
1984 private:
1985 std::ostream* output_stream_;
1986 std::ostream* error_stream_;
1987};
1988
1989// Simple reporter that outputs benchmark data to the console. This is the
1990// default reporter used by RunSpecifiedBenchmarks().
1991class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
1992 public:
1993 enum OutputOptions {
1994 OO_None = 0,
1995 OO_Color = 1,
1996 OO_Tabular = 2,
1997 OO_ColorTabular = OO_Color | OO_Tabular,
1998 OO_Defaults = OO_ColorTabular
1999 };
2000 explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
2001 : output_options_(opts_), name_field_width_(0), printed_header_(false) {}
2002
2003 bool ReportContext(const Context& context) override;
2004 void ReportRuns(const std::vector<Run>& reports) override;
2005
2006 protected:
2007 virtual void PrintRunData(const Run& result);
2008 virtual void PrintHeader(const Run& run);
2009
2010 OutputOptions output_options_;
2011 size_t name_field_width_;
2012 UserCounters prev_counters_;
2013 bool printed_header_;
2014};
2015
2016class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter {
2017 public:
2018 JSONReporter() : first_report_(true) {}
2019 bool ReportContext(const Context& context) override;
2020 void ReportRuns(const std::vector<Run>& reports) override;
2021 void Finalize() override;
2022
2023 private:
2024 void PrintRunData(const Run& run);
2025
2026 bool first_report_;
2027};
2028
2029class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG(
2030 "The CSV Reporter will be removed in a future release") CSVReporter
2031 : public BenchmarkReporter {
2032 public:
2033 CSVReporter() : printed_header_(false) {}
2034 bool ReportContext(const Context& context) override;
2035 void ReportRuns(const std::vector<Run>& reports) override;
2036
2037 private:
2038 void PrintRunData(const Run& run);
2039
2040 bool printed_header_;
2041 std::set<std::string> user_counter_names_;
2042};
2043
2044inline const char* GetTimeUnitString(TimeUnit unit) {
2045 switch (unit) {
2046 case kSecond:
2047 return "s";
2048 case kMillisecond:
2049 return "ms";
2050 case kMicrosecond:
2051 return "us";
2052 case kNanosecond:
2053 return "ns";
2054 }
2055 BENCHMARK_UNREACHABLE();
2056}
2057
2058inline double GetTimeUnitMultiplier(TimeUnit unit) {
2059 switch (unit) {
2060 case kSecond:
2061 return 1;
2062 case kMillisecond:
2063 return 1e3;
2064 case kMicrosecond:
2065 return 1e6;
2066 case kNanosecond:
2067 return 1e9;
2068 }
2069 BENCHMARK_UNREACHABLE();
2070}
2071
2072// Creates a list of integer values for the given range and multiplier.
2073// This can be used together with ArgsProduct() to allow multiple ranges
2074// with different multipliers.
2075// Example:
2076// ArgsProduct({
2077// CreateRange(0, 1024, /*multi=*/32),
2078// CreateRange(0, 100, /*multi=*/4),
2079// CreateDenseRange(0, 4, /*step=*/1),
2080// });
2081BENCHMARK_EXPORT
2082std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
2083
2084// Creates a list of integer values for the given range and step.
2085BENCHMARK_EXPORT
2086std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step);
2087
2088} // namespace benchmark
2089
2090#if defined(_MSC_VER)
2091#pragma warning(pop)
2092#endif
2093
2094#endif // BENCHMARK_BENCHMARK_H_
Definition benchmark.h:1824
Definition benchmark.h:1137
Definition benchmark.h:644
Definition benchmark.h:422
Definition benchmark.h:469
Definition benchmark.h:767
Definition benchmark_register.cc:73
Definition benchmark_api_internal.h:18
Definition perf_counters.h:149
Definition thread_manager.h:12
Definition thread_timer.h:10
Definition benchmark.h:1804
Definition benchmark.h:1826
Definition benchmark.h:1835
Definition benchmark.h:1766
Definition benchmark.h:1765
Definition benchmark.h:426
Definition benchmark.h:1075
Definition benchmark.h:1074
Definition benchmark.h:1789
Definition benchmark.h:1123