From 3e131ce8c69f44e1499426446e53b5c38580af4f Mon Sep 17 00:00:00 2001
From: Jeremy Rifkin <51220084+jeremy-rifkin@users.noreply.github.com>
Date: Sun, 18 Aug 2024 18:59:50 -0500
Subject: [PATCH] Experiment with some benchmarking

---
 CMakeLists.txt              |  4 +++
 README.md                   | 10 +++----
 benchmarking/unwinding.cpp  | 55 +++++++++++++++++++++++++++++++++++++
 cmake/OptionVariables.cmake |  2 ++
 4 files changed, 66 insertions(+), 5 deletions(-)
 create mode 100644 benchmarking/unwinding.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ff285d0..8f0d225 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -605,3 +605,7 @@ if(CPPTRACE_BUILD_TESTING)
   endif()
   add_subdirectory(test)
 endif()
+
+if(CPPTRACE_BUILD_BENCHMARKING)
+  add_subdirectory(benchmarking)
+endif()
diff --git a/README.md b/README.md
index 1dd1c9d..ee79652 100644
--- a/README.md
+++ b/README.md
@@ -382,14 +382,14 @@ CPPTRACE_TRY {
 }
 ```
 
-This functionality is entirely opt-in, the macros are in `cpptrace/from_current.hpp`.
+This functionality is entirely opt-in, to access this use `#include <cpptrace/from_current.hpp>`.
 
 Any declarator `catch` accepts works with `CPPTRACE_CATCH`, including `...`.
 
 ![from_current](res/from_current.png)
 
-There are a few extraneous frames at the top of the stack corresponding to internals of exception handling in the
-standard library. These are a small price to pay for stack traces on all exceptions.
+There are a few extraneous frames at the top of the stack corresponding to standard library exception handling
+internals. These are a small price to pay for stack traces on all exceptions.
 
 API functions:
 - `cpptrace::raw_trace_from_current_exception`: Returns `const raw_trace&` from the current exception.
@@ -420,8 +420,8 @@ paths, how exception handling is usually used, and the shallowness of most call
 aware of.
 
 To put the scale of this performance consideration into perspective: In my benchmarking I have found generation of raw
-traces to take on the order of `75ns` per frame. Thus, even if there were 100 non-matching handlers before a matching
-handler in a 100-deep call stack the total time would stil be on the order of less than one millisecond.
+traces to take on the order of `100ns` per frame. Thus, even if there were 100 non-matching handlers before a matching
+handler in a 100-deep call stack the total time would stil be on the order of one millisecond.
 
 It's possible to avoid this by adding some bookkeeping to the `CPPTRACE_TRY` block. With the tradeoff between
 zero-overhead try-catch in the happy path and a little extra overhead in the unhappy throwing path I decided to keep
diff --git a/benchmarking/unwinding.cpp b/benchmarking/unwinding.cpp
new file mode 100644
index 0000000..f05b391
--- /dev/null
+++ b/benchmarking/unwinding.cpp
@@ -0,0 +1,55 @@
+#include <cpptrace/cpptrace.hpp>
+
+#include <benchmark/benchmark.h>
+
+#include <iostream>
+
+struct unwind_benchmark_info {
+    benchmark::State& state;
+    size_t& stack_depth;
+};
+
+void unwind_loop(unwind_benchmark_info info) {
+    auto& [state, depth] = info;
+    depth = cpptrace::generate_raw_trace().frames.size();
+    for(auto _ : state) {
+        benchmark::DoNotOptimize(cpptrace::generate_raw_trace());
+    }
+}
+
+void foo(unwind_benchmark_info info, int n) {
+    if(n == 0) {
+        unwind_loop(info);
+    } else {
+        foo(info, n - 1);
+    }
+}
+
+template<typename... Args>
+void foo(unwind_benchmark_info info, int, Args... args) {
+    foo(info, args...);
+}
+
+void function_two(unwind_benchmark_info info, int, float) {
+    foo(info, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+}
+
+void function_one(unwind_benchmark_info info, int) {
+    function_two(info, 0, 0);
+}
+
+static void unwinding(benchmark::State& state) {
+    size_t stack_depth = 0;
+    function_one({state, stack_depth}, 0);
+    static bool did_print = false;
+    if(!did_print) {
+        did_print = true;
+        std::cerr<<"[info] Unwinding benchmark stack depth: "<<stack_depth<<std::endl;
+    }
+}
+
+// Register the function as a benchmark
+BENCHMARK(unwinding);
+
+// Run the benchmark
+BENCHMARK_MAIN();
diff --git a/cmake/OptionVariables.cmake b/cmake/OptionVariables.cmake
index ad619e9..1a4aa79 100644
--- a/cmake/OptionVariables.cmake
+++ b/cmake/OptionVariables.cmake
@@ -151,11 +151,13 @@ option(CPPTRACE_ADDR2LINE_SEARCH_SYSTEM_PATH "" OFF)
 
 if(PROJECT_IS_TOP_LEVEL)
   option(CPPTRACE_BUILD_TESTING "" OFF)
+  option(CPPTRACE_BUILD_BENCHMARK "" OFF)
   option(CPPTRACE_BUILD_TESTING_SPLIT_DWARF "" OFF)
   set(CPPTRACE_BUILD_TESTING_DWARF_VERSION "0" CACHE STRING "")
   option(CPPTRACE_BUILD_TEST_RDYNAMIC "" OFF)
   mark_as_advanced(
     CPPTRACE_BUILD_TESTING
+    CPPTRACE_BUILD_BENCHMARKING
     CPPTRACE_BUILD_TESTING_SPLIT_DWARF
     CPPTRACE_BUILD_TESTING_DWARF_VERSION
     CPPTRACE_BUILD_TEST_RDYNAMIC