Experiment with some benchmarking

2024-08-18 18:59:50 -05:00 · 2024-08-18 18:59:50 -05:00 · 3e131ce8c6
commit 3e131ce8c6
parent 5e30d2ae60
4 changed files with 66 additions and 5 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -605,3 +605,7 @@ if(CPPTRACE_BUILD_TESTING)
  endif()
  add_subdirectory(test)
 endif()
+
+if(CPPTRACE_BUILD_BENCHMARKING)
+  add_subdirectory(benchmarking)
+endif()
--- a/README.md
+++ b/README.md
@ -382,14 +382,14 @@ CPPTRACE_TRY {
 }
 ```

-This functionality is entirely opt-in, the macros are in `cpptrace/from_current.hpp`.
+This functionality is entirely opt-in, to access this use `#include <cpptrace/from_current.hpp>`.

 Any declarator `catch` accepts works with `CPPTRACE_CATCH`, including `...`.

 ![from_current](res/from_current.png)

-There are a few extraneous frames at the top of the stack corresponding to internals of exception handling in the
-standard library. These are a small price to pay for stack traces on all exceptions.
+There are a few extraneous frames at the top of the stack corresponding to standard library exception handling
+internals. These are a small price to pay for stack traces on all exceptions.

 API functions:
 - `cpptrace::raw_trace_from_current_exception`: Returns `const raw_trace&` from the current exception.
@ -420,8 +420,8 @@ paths, how exception handling is usually used, and the shallowness of most call
 aware of.

 To put the scale of this performance consideration into perspective: In my benchmarking I have found generation of raw
-traces to take on the order of `75ns` per frame. Thus, even if there were 100 non-matching handlers before a matching
-handler in a 100-deep call stack the total time would stil be on the order of less than one millisecond.
+traces to take on the order of `100ns` per frame. Thus, even if there were 100 non-matching handlers before a matching
+handler in a 100-deep call stack the total time would stil be on the order of one millisecond.

 It's possible to avoid this by adding some bookkeeping to the `CPPTRACE_TRY` block. With the tradeoff between
 zero-overhead try-catch in the happy path and a little extra overhead in the unhappy throwing path I decided to keep
--- a/benchmarking/unwinding.cpp
+++ b/benchmarking/unwinding.cpp
@ -0,0 +1,55 @@
+#include <cpptrace/cpptrace.hpp>
+
+#include <benchmark/benchmark.h>
+
+#include <iostream>
+
+struct unwind_benchmark_info {
+    benchmark::State& state;
+    size_t& stack_depth;
+};
+
+void unwind_loop(unwind_benchmark_info info) {
+    auto& [state, depth] = info;
+    depth = cpptrace::generate_raw_trace().frames.size();
+    for(auto _ : state) {
+        benchmark::DoNotOptimize(cpptrace::generate_raw_trace());
+    }
+}
+
+void foo(unwind_benchmark_info info, int n) {
+    if(n == 0) {
+        unwind_loop(info);
+    } else {
+        foo(info, n - 1);
+    }
+}
+
+template<typename... Args>
+void foo(unwind_benchmark_info info, int, Args... args) {
+    foo(info, args...);
+}
+
+void function_two(unwind_benchmark_info info, int, float) {
+    foo(info, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+}
+
+void function_one(unwind_benchmark_info info, int) {
+    function_two(info, 0, 0);
+}
+
+static void unwinding(benchmark::State& state) {
+    size_t stack_depth = 0;
+    function_one({state, stack_depth}, 0);
+    static bool did_print = false;
+    if(!did_print) {
+        did_print = true;
+        std::cerr<<"[info] Unwinding benchmark stack depth: "<<stack_depth<<std::endl;
+    }
+}
+
+// Register the function as a benchmark
+BENCHMARK(unwinding);
+
+// Run the benchmark
+BENCHMARK_MAIN();
--- a/cmake/OptionVariables.cmake
+++ b/cmake/OptionVariables.cmake
@ -151,11 +151,13 @@ option(CPPTRACE_ADDR2LINE_SEARCH_SYSTEM_PATH "" OFF)

 if(PROJECT_IS_TOP_LEVEL)
  option(CPPTRACE_BUILD_TESTING "" OFF)
+  option(CPPTRACE_BUILD_BENCHMARK "" OFF)
  option(CPPTRACE_BUILD_TESTING_SPLIT_DWARF "" OFF)
  set(CPPTRACE_BUILD_TESTING_DWARF_VERSION "0" CACHE STRING "")
  option(CPPTRACE_BUILD_TEST_RDYNAMIC "" OFF)
  mark_as_advanced(
    CPPTRACE_BUILD_TESTING
+    CPPTRACE_BUILD_BENCHMARKING
    CPPTRACE_BUILD_TESTING_SPLIT_DWARF
    CPPTRACE_BUILD_TESTING_DWARF_VERSION
    CPPTRACE_BUILD_TEST_RDYNAMIC