From 3e131ce8c69f44e1499426446e53b5c38580af4f Mon Sep 17 00:00:00 2001 From: Jeremy Rifkin <51220084+jeremy-rifkin@users.noreply.github.com> Date: Sun, 18 Aug 2024 18:59:50 -0500 Subject: [PATCH] Experiment with some benchmarking --- CMakeLists.txt | 4 +++ README.md | 10 +++---- benchmarking/unwinding.cpp | 55 +++++++++++++++++++++++++++++++++++++ cmake/OptionVariables.cmake | 2 ++ 4 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 benchmarking/unwinding.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ff285d0..8f0d225 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -605,3 +605,7 @@ if(CPPTRACE_BUILD_TESTING) endif() add_subdirectory(test) endif() + +if(CPPTRACE_BUILD_BENCHMARKING) + add_subdirectory(benchmarking) +endif() diff --git a/README.md b/README.md index 1dd1c9d..ee79652 100644 --- a/README.md +++ b/README.md @@ -382,14 +382,14 @@ CPPTRACE_TRY { } ``` -This functionality is entirely opt-in, the macros are in `cpptrace/from_current.hpp`. +This functionality is entirely opt-in, to access this use `#include `. Any declarator `catch` accepts works with `CPPTRACE_CATCH`, including `...`. ![from_current](res/from_current.png) -There are a few extraneous frames at the top of the stack corresponding to internals of exception handling in the -standard library. These are a small price to pay for stack traces on all exceptions. +There are a few extraneous frames at the top of the stack corresponding to standard library exception handling +internals. These are a small price to pay for stack traces on all exceptions. API functions: - `cpptrace::raw_trace_from_current_exception`: Returns `const raw_trace&` from the current exception. @@ -420,8 +420,8 @@ paths, how exception handling is usually used, and the shallowness of most call aware of. To put the scale of this performance consideration into perspective: In my benchmarking I have found generation of raw -traces to take on the order of `75ns` per frame. Thus, even if there were 100 non-matching handlers before a matching -handler in a 100-deep call stack the total time would stil be on the order of less than one millisecond. +traces to take on the order of `100ns` per frame. Thus, even if there were 100 non-matching handlers before a matching +handler in a 100-deep call stack the total time would stil be on the order of one millisecond. It's possible to avoid this by adding some bookkeeping to the `CPPTRACE_TRY` block. With the tradeoff between zero-overhead try-catch in the happy path and a little extra overhead in the unhappy throwing path I decided to keep diff --git a/benchmarking/unwinding.cpp b/benchmarking/unwinding.cpp new file mode 100644 index 0000000..f05b391 --- /dev/null +++ b/benchmarking/unwinding.cpp @@ -0,0 +1,55 @@ +#include + +#include + +#include + +struct unwind_benchmark_info { + benchmark::State& state; + size_t& stack_depth; +}; + +void unwind_loop(unwind_benchmark_info info) { + auto& [state, depth] = info; + depth = cpptrace::generate_raw_trace().frames.size(); + for(auto _ : state) { + benchmark::DoNotOptimize(cpptrace::generate_raw_trace()); + } +} + +void foo(unwind_benchmark_info info, int n) { + if(n == 0) { + unwind_loop(info); + } else { + foo(info, n - 1); + } +} + +template +void foo(unwind_benchmark_info info, int, Args... args) { + foo(info, args...); +} + +void function_two(unwind_benchmark_info info, int, float) { + foo(info, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +} + +void function_one(unwind_benchmark_info info, int) { + function_two(info, 0, 0); +} + +static void unwinding(benchmark::State& state) { + size_t stack_depth = 0; + function_one({state, stack_depth}, 0); + static bool did_print = false; + if(!did_print) { + did_print = true; + std::cerr<<"[info] Unwinding benchmark stack depth: "<