From b364d37f780d928f96ca8296a59585138615f862 Mon Sep 17 00:00:00 2001 From: Jeremy Rifkin <51220084+jeremy-rifkin@users.noreply.github.com> Date: Sun, 18 Aug 2024 15:51:40 -0600 Subject: [PATCH] Implement better dwarf rangelist base address logic (#157) Related to https://github.com/davea42/libdwarf-code/issues/255, allows re-enabling the clang+sanitizer+rel+dsym tests. --- ci/unittest.py | 7 ----- src/symbols/dwarf/dwarf.hpp | 47 ++++++++++++++++++++-------- src/symbols/dwarf/dwarf_resolver.cpp | 42 ++++++++++++++++--------- 3 files changed, 62 insertions(+), 34 deletions(-) diff --git a/ci/unittest.py b/ci/unittest.py index a520bcf..f2b9b28 100644 --- a/ci/unittest.py +++ b/ci/unittest.py @@ -150,13 +150,6 @@ def run_macos_matrix(): "compiler": "g++-12", "sanitizers": "ON", }, - { - # disabled until https://github.com/davea42/libdwarf-code/issues/255 is fixed - "compiler": "clang++", - "sanitizers": "ON", - "build_type": "RelWithDebInfo", - "dSYM": True - }, ] ).run(build_and_test) diff --git a/src/symbols/dwarf/dwarf.hpp b/src/symbols/dwarf/dwarf.hpp index 8583f1b..073bae4 100644 --- a/src/symbols/dwarf/dwarf.hpp +++ b/src/symbols/dwarf/dwarf.hpp @@ -241,6 +241,30 @@ namespace libdwarf { } } + Dwarf_Unsigned get_ranges_base_address(const die_object& cu_die) const { + // After libdwarf v0.11.0 this can use dwarf_get_ranges_baseaddress, however, in the interest of not + // requiring v0.11.0 just yet the logic is implemented here too. + // The base address is: + // - If the die has a rangelist, use the low_pc for that die + // - Otherwise use the low_pc from the CU if present + // - Otherwise 0 + if(has_attr(DW_AT_ranges)) { + if(has_attr(DW_AT_low_pc)) { + Dwarf_Addr lowpc; + if(wrap(dwarf_lowpc, die, &lowpc) == DW_DLV_OK) { + return lowpc; + } + } + } + if(cu_die.has_attr(DW_AT_low_pc)) { + Dwarf_Addr lowpc; + if(wrap(dwarf_lowpc, cu_die.get(), &lowpc) == DW_DLV_OK) { + return lowpc; + } + } + return 0; + } + Dwarf_Unsigned get_ranges_offset(Dwarf_Attribute attr) const { Dwarf_Unsigned off = 0; Dwarf_Half form = 0; @@ -334,7 +358,7 @@ namespace libdwarf { template // callback should return true to keep going - void dwarf4_ranges(Dwarf_Addr lowpc, F callback) const { + void dwarf4_ranges(Dwarf_Addr baseaddr, F callback) const { Dwarf_Attribute attr = nullptr; if(wrap(dwarf_attr, die, DW_AT_ranges, &attr) != DW_DLV_OK) { return; @@ -344,10 +368,7 @@ namespace libdwarf { if(wrap(dwarf_global_formref, attr, &offset) != DW_DLV_OK) { return; } - Dwarf_Addr baseaddr = 0; - if(lowpc != (std::numeric_limits::max)()) { - baseaddr = lowpc; - } + Dwarf_Addr baseaddr_original = baseaddr; Dwarf_Ranges* ranges = nullptr; Dwarf_Signed count = 0; VERIFY( @@ -375,15 +396,15 @@ namespace libdwarf { baseaddr = ranges[i].dwr_addr2; } else { ASSERT(ranges[i].dwr_type == DW_RANGES_END); - baseaddr = lowpc; + baseaddr = baseaddr_original; } } } template // callback should return true to keep going - void dwarf_ranges(int version, F callback) const { - Dwarf_Addr lowpc = (std::numeric_limits::max)(); + void dwarf_ranges(const die_object& cu_die, int version, F callback) const { + Dwarf_Addr lowpc; if(wrap(dwarf_lowpc, die, &lowpc) == DW_DLV_OK) { Dwarf_Addr highpc = 0; enum Dwarf_Form_Class return_class; @@ -399,13 +420,13 @@ namespace libdwarf { if(version >= 5) { dwarf5_ranges(callback); } else { - dwarf4_ranges(lowpc, callback); + dwarf4_ranges(get_ranges_base_address(cu_die), callback); } } - rangelist_entries get_rangelist_entries(int version) const { + rangelist_entries get_rangelist_entries(const die_object& cu_die, int version) const { rangelist_entries vec; - dwarf_ranges(version, [&vec] (Dwarf_Addr low, Dwarf_Addr high) { + dwarf_ranges(cu_die, version, [&vec] (Dwarf_Addr low, Dwarf_Addr high) { // Simple coalescing optimization: // Sometimes the range list entries are really continuous: [100, 200), [200, 300) // Other times there's just one byte of separation [300, 399), [400, 500) @@ -422,9 +443,9 @@ namespace libdwarf { return vec; } - Dwarf_Bool pc_in_die(int version, Dwarf_Addr pc) const { + Dwarf_Bool pc_in_die(const die_object& cu_die, int version, Dwarf_Addr pc) const { bool found = false; - dwarf_ranges(version, [&found, pc] (Dwarf_Addr low, Dwarf_Addr high) { + dwarf_ranges(cu_die, version, [&found, pc] (Dwarf_Addr low, Dwarf_Addr high) { if(pc >= low && pc < high) { found = true; return false; diff --git a/src/symbols/dwarf/dwarf_resolver.cpp b/src/symbols/dwarf/dwarf_resolver.cpp index 144a90d..76edc0e 100644 --- a/src/symbols/dwarf/dwarf_resolver.cpp +++ b/src/symbols/dwarf/dwarf_resolver.cpp @@ -281,14 +281,15 @@ namespace libdwarf { // NOTE: If we have a corresponding skeleton, we assume we have one CU matching the skeleton CU // Precedence for this assumption is https://dwarfstd.org/doc/DWARF5.pdf#subsection.3.1.3 // TODO: Also assuming same dwversion - auto ranges_vec = skeleton.unwrap().cu_die.get_rangelist_entries(dwversion); + const auto& skeleton_cu = skeleton.unwrap().cu_die; + auto ranges_vec = skeleton_cu.get_rangelist_entries(skeleton_cu, dwversion); for(auto range : ranges_vec) { // TODO: Reduce cloning here cu_cache.push_back({ cu_die.clone(), dwversion, range.first, range.second }); } return false; } else { - auto ranges_vec = cu_die.get_rangelist_entries(dwversion); + auto ranges_vec = cu_die.get_rangelist_entries(cu_die, dwversion); for(auto range : ranges_vec) { // TODO: Reduce cloning here cu_cache.push_back({ cu_die.clone(), dwversion, range.first, range.second }); @@ -388,7 +389,7 @@ namespace libdwarf { walk_die_list( child, [this, &cu_die, pc, dwversion, &inlines, &target_die, ¤t_obj_holder] (const die_object& die) { - if(die.get_tag() == DW_TAG_inlined_subroutine && die.pc_in_die(dwversion, pc)) { + if(die.get_tag() == DW_TAG_inlined_subroutine && die.pc_in_die(cu_die, dwversion, pc)) { const auto name = subprogram_symbol(die, dwversion); auto file_i = die.get_unsigned_attribute(DW_AT_call_file); // TODO: Refactor.... Probably put logic in resolve_filename. @@ -480,7 +481,7 @@ namespace libdwarf { die.get_name().c_str() ); } - if(!(die.get_tag() == DW_TAG_namespace || die.pc_in_die(dwversion, pc))) { + if(!(die.get_tag() == DW_TAG_namespace || die.pc_in_die(cu_die, dwversion, pc))) { if(dump_dwarf) { std::fprintf(stderr, "pc not in die\n"); } @@ -522,17 +523,18 @@ namespace libdwarf { CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING void preprocess_subprograms( + const die_object& cu_die, const die_object& die, Dwarf_Half dwversion, std::vector& vec ) { walk_die_list( die, - [this, dwversion, &vec] (const die_object& die) { + [this, &cu_die, dwversion, &vec] (const die_object& die) { switch(die.get_tag()) { case DW_TAG_subprogram: { - auto ranges_vec = die.get_rangelist_entries(dwversion); + auto ranges_vec = die.get_rangelist_entries(cu_die, dwversion); // TODO: Feels super inefficient and some day should maybe use an interval tree. for(auto range : ranges_vec) { // TODO: Reduce cloning here @@ -543,7 +545,7 @@ namespace libdwarf { // On clang it's better auto child = die.get_child(); if(child) { - preprocess_subprograms(child, dwversion, vec); + preprocess_subprograms(cu_die, child, dwversion, vec); } } break; @@ -556,7 +558,7 @@ namespace libdwarf { { auto child = die.get_child(); if(child) { - preprocess_subprograms(child, dwversion, vec); + preprocess_subprograms(cu_die, child, dwversion, vec); } } break; @@ -587,7 +589,7 @@ namespace libdwarf { if(it == subprograms_cache.end()) { // TODO: Refactor. Do the sort in the preprocess function and return the vec directly. std::vector vec; - preprocess_subprograms(cu_die, dwversion, vec); + preprocess_subprograms(cu_die, cu_die, dwversion, vec); std::sort(vec.begin(), vec.end(), [] (const subprogram_entry& a, const subprogram_entry& b) { return a.low < b.low; }); @@ -605,7 +607,7 @@ namespace libdwarf { ); // If the vector has been empty this can happen if(vec_it != vec.end()) { - if(vec_it->die.pc_in_die(dwversion, pc)) { + if(vec_it->die.pc_in_die(cu_die, dwversion, pc)) { frame.symbol = retrieve_symbol_for_subprogram(cu_die, vec_it->die, pc, dwversion, inlines); } } else { @@ -882,8 +884,14 @@ namespace libdwarf { } // NOTE: If we have a corresponding skeleton, we assume we have one CU matching the skeleton CU if( - (skeleton && skeleton.unwrap().cu_die.pc_in_die(skeleton.unwrap().dwversion, pc)) - || cu_die.pc_in_die(dwversion, pc) + ( + skeleton + && skeleton.unwrap().cu_die.pc_in_die( + skeleton.unwrap().cu_die, + skeleton.unwrap().dwversion, + pc + ) + ) || cu_die.pc_in_die(cu_die, dwversion, pc) ) { if(trace_dwarf) { std::fprintf( @@ -917,8 +925,14 @@ namespace libdwarf { // TODO: Cache the range list? // NOTE: If we have a corresponding skeleton, we assume we have one CU matching the skeleton CU if( - (skeleton && skeleton.unwrap().cu_die.pc_in_die(skeleton.unwrap().dwversion, pc)) - || vec_it->die.pc_in_die(vec_it->dwversion, pc) + ( + skeleton + && skeleton.unwrap().cu_die.pc_in_die( + skeleton.unwrap().cu_die, + skeleton.unwrap().dwversion, + pc + ) + ) || vec_it->die.pc_in_die(vec_it->die, vec_it->dwversion, pc) ) { return cu_info{maybe_owned_die_object::ref(vec_it->die), vec_it->dwversion}; }