From f153d39e67dbc89ef6fbf2a69eb55a0f1b82ee4b Mon Sep 17 00:00:00 2001 From: Jeremy <51220084+jeremy-rifkin@users.noreply.github.com> Date: Mon, 27 May 2024 17:12:52 -0500 Subject: [PATCH] Scraps of an implementation of debug-fission handling --- README.md | 21 ++- src/symbols/dwarf/dwarf_resolver.cpp | 214 +++++++++++++++++++++++++-- src/utils/dwarf.hpp | 10 +- src/utils/path.hpp | 41 +++++ 4 files changed, 263 insertions(+), 23 deletions(-) create mode 100644 src/utils/path.hpp diff --git a/README.md b/README.md index 0b3d5e7..2051189 100644 --- a/README.md +++ b/README.md @@ -572,14 +572,19 @@ namespace cpptrace { # Supported Debug Formats -| Format | Supported | -| ------------------------------------------------ | --------- | -| DWARF in binary | ✔️ | -| DWARF in separate binary (binary gnu debug link) | ️️✔️ | -| DWARF in separate binary (split dwarf) | ✔️ | -| DWARF in dSYM | ✔️ | -| DWARF in via Mach-O debug map | ✔️ | -| Windows debug symbols in PDB | ✔️ | +| Format | Supported | +| --------------------------------- | --------- | +| DWARF in binary | ✔️ | +| GNU debug link | ️️✔️ | +| Split dwarf with dwarf 5 | ✔️* | +| Split dwarf with extended dwarf 4 | ❌** | +| DWARF in dSYM | ✔️ | +| DWARF via Mach-O debug map | ✔️ | +| Windows debug symbols in PDB | ✔️ | + +*There seem to be a couple issues upstream with libdwarf however they will hopefully be resolved soon + +**Waiting on support from upstream libdwarf DWARF5 added DWARF package files. As far as I can tell no compiler implements these yet. diff --git a/src/symbols/dwarf/dwarf_resolver.cpp b/src/symbols/dwarf/dwarf_resolver.cpp index fa4da59..1eadd62 100644 --- a/src/symbols/dwarf/dwarf_resolver.cpp +++ b/src/symbols/dwarf/dwarf_resolver.cpp @@ -8,6 +8,7 @@ #include "../../utils/dwarf.hpp" // has dwarf #includes #include "../../utils/error.hpp" #include "../../utils/utils.hpp" +#include "../../utils/path.hpp" #include "../../utils/program_name.hpp" // For CPPTRACE_MAX_PATH #include "../../binary/mach-o.hpp" @@ -33,7 +34,7 @@ namespace detail { namespace libdwarf { // printbugging as we go constexpr bool dump_dwarf = false; - constexpr bool trace_dwarf = false; + constexpr bool trace_dwarf = true; struct subprogram_entry { die_object die; @@ -67,6 +68,16 @@ namespace libdwarf { std::vector line_entries; }; + using addr_table = std::vector; + + // used to describe data from an upstream binary to a resolver for the .dwo + struct split_info { + // addr_table addresses; + // std::size_t addr_base; + rangelist_entries ranges_vec; + Dwarf_Debug executable_dbg; + }; + class dwarf_resolver : public symbol_resolver { std::string object_path; Dwarf_Debug dbg = nullptr; @@ -83,6 +94,8 @@ namespace libdwarf { bool generated_cu_cache = false; // Map from CU -> {srcfiles, count} std::unordered_map> srcfiles_cache; + // info for resolving a dwo object + optional parent_info; private: // Error handling helper @@ -112,8 +125,10 @@ namespace libdwarf { public: CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING - dwarf_resolver(const std::string& _object_path) { - object_path = _object_path; + dwarf_resolver(const std::string& object_path, optional parent_info_ = nullopt) + : object_path(object_path), + parent_info(std::move(parent_info_)) + { // use a buffer when invoking dwarf_init_path, which allows it to automatically find debuglink or dSYM // sources bool use_buffer = true; @@ -170,10 +185,15 @@ namespace libdwarf { PANIC("Unknown return code from dwarf_init_path"); } + if(parent_info) { + VERIFY(wrap(dwarf_set_tied_dbg, dbg, parent_info.unwrap().executable_dbg) == DW_DLV_OK); + } + if(ok) { // Check for .debug_aranges for fast lookup wrap(dwarf_get_aranges, dbg, &aranges, &arange_count); } + std::cout< vec; preprocess_subprograms(cu_die, dwversion, vec); std::sort(vec.begin(), vec.end(), [] (const subprogram_entry& a, const subprogram_entry& b) { return a.low < b.low; }); + std::cout<<"---"< lookup_cu(Dwarf_Addr pc) { // Check for .debug_aranges for fast lookup - if(aranges) { + if(aranges && !parent_info) { // don't bother under split dwarf // Try to find pc in aranges Dwarf_Arange arange; if(wrap(dwarf_get_arange, aranges, arange_count, pc, &arange) == DW_DLV_OK) { @@ -850,7 +891,7 @@ namespace libdwarf { if(get_cache_mode() == cache_mode::prioritize_memory) { // walk for the cu and go from there optional info; - walk_compilation_units([pc, &info] (const die_object& cu_die) { + walk_compilation_units([this, pc, &info] (const die_object& cu_die) { Dwarf_Half offset_size = 0; Dwarf_Half dwversion = 0; dwarf_get_version_of_die(cu_die.get(), &dwversion, &offset_size); @@ -860,7 +901,8 @@ namespace libdwarf { if(trace_dwarf) { std::fprintf(stderr, "CU: %d %s\n", dwversion, cu_die.get_name().c_str()); } - if(cu_die.pc_in_die(dwversion, pc)) { + // TODO: Somewhat hackilly I'm assuming a single CU and the address should be in it + if(parent_info || cu_die.pc_in_die(dwversion, pc)) { if(trace_dwarf) { std::fprintf( stderr, @@ -890,12 +932,16 @@ namespace libdwarf { ); // If the vector has been empty this can happen if(vec_it != cu_cache.end()) { + std::cout<<"Here2"<die.print(); - if(vec_it->die.pc_in_die(vec_it->dwversion, pc)) { + // TODO: Cache the range list? + // TODO: Assumption + if(parent_info || vec_it->die.pc_in_die(vec_it->dwversion, pc)) { // resolve_pc(vec_it->die, vec_it->dwversion, pc, frame, inlines); return cu_info{maybe_owned_die_object::ref(vec_it->die), vec_it->dwversion}; } } else { + std::cout<<"Here3"< get_dwo_name(const die_object& cu_die) { + if(auto dwo_name = cu_die.get_string_attribute(DW_AT_GNU_dwo_name)) { + return dwo_name; + } else if(auto dwo_name = cu_die.get_string_attribute(DW_AT_dwo_name)) { + return dwo_name; + } else { + return nullopt; + } + } + + void perform_dwarf_fission_resolution( + Dwarf_Addr pc, + const die_object& cu_die, + const object_frame& object_frame_info, + stacktrace_frame& frame, + std::vector& inlines + ) { + // Split dwarf / debug fission / dwo is handled here + // https://gcc.gnu.org/wiki/DebugFission + // Some oddities: + // - The .debug_line table remains in the main object + // - Due to relocations the dwo file won't have object addresses and instead DW_AT_low_pc will be of + // type DW_FORM_addr_index referencing the .debug_addr table stored in the main object + // - These index is relative to the DW_AT_addr_base + // DW_AT_dwo_name/DW_AT_GNU_dwo_name + // DW_AT_comp_dir + // DW_AT_addr_base/DW_AT_GNU_addr_base + // DW_AT_GNU_dwo_id/...? + // DW_TAG_skeleton_unit vs ... + // TODO: Handle gnu dwarf4 extensions for this + // TODO: DWO ID + std::cout<<"------------------------"<& inlines ) { @@ -915,10 +1095,17 @@ namespace libdwarf { std::fprintf(stderr, "%llx\n", to_ull(pc)); } optional cu = lookup_cu(pc); + std::cout<<"Here1 "< inlines; resolve_pc( frame_info.object_address, + frame_info, frame, inlines ); diff --git a/src/utils/dwarf.hpp b/src/utils/dwarf.hpp index 3cbef03..8243841 100644 --- a/src/utils/dwarf.hpp +++ b/src/utils/dwarf.hpp @@ -28,6 +28,8 @@ namespace libdwarf { static_assert(std::is_pointer::value, "Dwarf_Die not a pointer"); static_assert(std::is_pointer::value, "Dwarf_Debug not a pointer"); + using rangelist_entries = std::vector>; + [[noreturn]] inline void handle_dwarf_error(Dwarf_Debug dbg, Dwarf_Error error) { Dwarf_Unsigned ev = dwarf_errno(error); char* msg = dwarf_errmsg(error); @@ -387,10 +389,13 @@ namespace libdwarf { // callback should return true to keep going void dwarf_ranges(int version, F callback) const { Dwarf_Addr lowpc = (std::numeric_limits::max)(); + std::cout<<1<= 5) { dwarf5_ranges(callback); } else { @@ -406,8 +412,8 @@ namespace libdwarf { } } - std::vector> get_rangelist_entries(int version) const { - std::vector> vec; + rangelist_entries get_rangelist_entries(int version) const { + rangelist_entries vec; dwarf_ranges(version, [&vec] (Dwarf_Addr low, Dwarf_Addr high) { // Simple coalescing optimization: // Sometimes the range list entries are really continuous: [100, 200), [200, 300) diff --git a/src/utils/path.hpp b/src/utils/path.hpp new file mode 100644 index 0000000..be23302 --- /dev/null +++ b/src/utils/path.hpp @@ -0,0 +1,41 @@ +#ifndef PATH_HPP +#define PATH_HPP + +#include "common.hpp" + +#if IS_WINDOWS +#include +#endif + +namespace cpptrace { +namespace detail { + #if IS_WINDOWS + constexpr char PATH_SEP = '\\'; + inline bool is_absolute(const std::string& path) { + // I don't want to bring in shlwapi as a dependency just for PathIsRelativeA so I'm following the guidance of + // https://stackoverflow.com/a/71941552/15675011 and + // https://github.com/wine-mirror/wine/blob/b210a204137dec8d2126ca909d762454fd47e963/dlls/kernelbase/path.c#L982 + if(path.empty() || IsDBCSLeadByte(path[0])) { + return false; + } + if(path[0] == '\\') { + return true; + } + if(path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':') { + return true; + } + return false; + } + #else + constexpr char PATH_SEP = '/'; + inline bool is_absolute(const std::string& path) { + if(path.empty()) { + return false; + } + return path[0] == '/'; + } + #endif +} +} + +#endif