Implement better dwarf rangelist base address logic (#157)

Related to https://github.com/davea42/libdwarf-code/issues/255, allows
re-enabling the clang+sanitizer+rel+dsym tests.
This commit is contained in:
Jeremy Rifkin 2024-08-18 15:51:40 -06:00 committed by GitHub
parent a4d75a3894
commit b364d37f78
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 62 additions and 34 deletions

View File

@ -150,13 +150,6 @@ def run_macos_matrix():
"compiler": "g++-12",
"sanitizers": "ON",
},
{
# disabled until https://github.com/davea42/libdwarf-code/issues/255 is fixed
"compiler": "clang++",
"sanitizers": "ON",
"build_type": "RelWithDebInfo",
"dSYM": True
},
]
).run(build_and_test)

View File

@ -241,6 +241,30 @@ namespace libdwarf {
}
}
Dwarf_Unsigned get_ranges_base_address(const die_object& cu_die) const {
// After libdwarf v0.11.0 this can use dwarf_get_ranges_baseaddress, however, in the interest of not
// requiring v0.11.0 just yet the logic is implemented here too.
// The base address is:
// - If the die has a rangelist, use the low_pc for that die
// - Otherwise use the low_pc from the CU if present
// - Otherwise 0
if(has_attr(DW_AT_ranges)) {
if(has_attr(DW_AT_low_pc)) {
Dwarf_Addr lowpc;
if(wrap(dwarf_lowpc, die, &lowpc) == DW_DLV_OK) {
return lowpc;
}
}
}
if(cu_die.has_attr(DW_AT_low_pc)) {
Dwarf_Addr lowpc;
if(wrap(dwarf_lowpc, cu_die.get(), &lowpc) == DW_DLV_OK) {
return lowpc;
}
}
return 0;
}
Dwarf_Unsigned get_ranges_offset(Dwarf_Attribute attr) const {
Dwarf_Unsigned off = 0;
Dwarf_Half form = 0;
@ -334,7 +358,7 @@ namespace libdwarf {
template<typename F>
// callback should return true to keep going
void dwarf4_ranges(Dwarf_Addr lowpc, F callback) const {
void dwarf4_ranges(Dwarf_Addr baseaddr, F callback) const {
Dwarf_Attribute attr = nullptr;
if(wrap(dwarf_attr, die, DW_AT_ranges, &attr) != DW_DLV_OK) {
return;
@ -344,10 +368,7 @@ namespace libdwarf {
if(wrap(dwarf_global_formref, attr, &offset) != DW_DLV_OK) {
return;
}
Dwarf_Addr baseaddr = 0;
if(lowpc != (std::numeric_limits<Dwarf_Addr>::max)()) {
baseaddr = lowpc;
}
Dwarf_Addr baseaddr_original = baseaddr;
Dwarf_Ranges* ranges = nullptr;
Dwarf_Signed count = 0;
VERIFY(
@ -375,15 +396,15 @@ namespace libdwarf {
baseaddr = ranges[i].dwr_addr2;
} else {
ASSERT(ranges[i].dwr_type == DW_RANGES_END);
baseaddr = lowpc;
baseaddr = baseaddr_original;
}
}
}
template<typename F>
// callback should return true to keep going
void dwarf_ranges(int version, F callback) const {
Dwarf_Addr lowpc = (std::numeric_limits<Dwarf_Addr>::max)();
void dwarf_ranges(const die_object& cu_die, int version, F callback) const {
Dwarf_Addr lowpc;
if(wrap(dwarf_lowpc, die, &lowpc) == DW_DLV_OK) {
Dwarf_Addr highpc = 0;
enum Dwarf_Form_Class return_class;
@ -399,13 +420,13 @@ namespace libdwarf {
if(version >= 5) {
dwarf5_ranges(callback);
} else {
dwarf4_ranges(lowpc, callback);
dwarf4_ranges(get_ranges_base_address(cu_die), callback);
}
}
rangelist_entries get_rangelist_entries(int version) const {
rangelist_entries get_rangelist_entries(const die_object& cu_die, int version) const {
rangelist_entries vec;
dwarf_ranges(version, [&vec] (Dwarf_Addr low, Dwarf_Addr high) {
dwarf_ranges(cu_die, version, [&vec] (Dwarf_Addr low, Dwarf_Addr high) {
// Simple coalescing optimization:
// Sometimes the range list entries are really continuous: [100, 200), [200, 300)
// Other times there's just one byte of separation [300, 399), [400, 500)
@ -422,9 +443,9 @@ namespace libdwarf {
return vec;
}
Dwarf_Bool pc_in_die(int version, Dwarf_Addr pc) const {
Dwarf_Bool pc_in_die(const die_object& cu_die, int version, Dwarf_Addr pc) const {
bool found = false;
dwarf_ranges(version, [&found, pc] (Dwarf_Addr low, Dwarf_Addr high) {
dwarf_ranges(cu_die, version, [&found, pc] (Dwarf_Addr low, Dwarf_Addr high) {
if(pc >= low && pc < high) {
found = true;
return false;

View File

@ -281,14 +281,15 @@ namespace libdwarf {
// NOTE: If we have a corresponding skeleton, we assume we have one CU matching the skeleton CU
// Precedence for this assumption is https://dwarfstd.org/doc/DWARF5.pdf#subsection.3.1.3
// TODO: Also assuming same dwversion
auto ranges_vec = skeleton.unwrap().cu_die.get_rangelist_entries(dwversion);
const auto& skeleton_cu = skeleton.unwrap().cu_die;
auto ranges_vec = skeleton_cu.get_rangelist_entries(skeleton_cu, dwversion);
for(auto range : ranges_vec) {
// TODO: Reduce cloning here
cu_cache.push_back({ cu_die.clone(), dwversion, range.first, range.second });
}
return false;
} else {
auto ranges_vec = cu_die.get_rangelist_entries(dwversion);
auto ranges_vec = cu_die.get_rangelist_entries(cu_die, dwversion);
for(auto range : ranges_vec) {
// TODO: Reduce cloning here
cu_cache.push_back({ cu_die.clone(), dwversion, range.first, range.second });
@ -388,7 +389,7 @@ namespace libdwarf {
walk_die_list(
child,
[this, &cu_die, pc, dwversion, &inlines, &target_die, &current_obj_holder] (const die_object& die) {
if(die.get_tag() == DW_TAG_inlined_subroutine && die.pc_in_die(dwversion, pc)) {
if(die.get_tag() == DW_TAG_inlined_subroutine && die.pc_in_die(cu_die, dwversion, pc)) {
const auto name = subprogram_symbol(die, dwversion);
auto file_i = die.get_unsigned_attribute(DW_AT_call_file);
// TODO: Refactor.... Probably put logic in resolve_filename.
@ -480,7 +481,7 @@ namespace libdwarf {
die.get_name().c_str()
);
}
if(!(die.get_tag() == DW_TAG_namespace || die.pc_in_die(dwversion, pc))) {
if(!(die.get_tag() == DW_TAG_namespace || die.pc_in_die(cu_die, dwversion, pc))) {
if(dump_dwarf) {
std::fprintf(stderr, "pc not in die\n");
}
@ -522,17 +523,18 @@ namespace libdwarf {
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
void preprocess_subprograms(
const die_object& cu_die,
const die_object& die,
Dwarf_Half dwversion,
std::vector<subprogram_entry>& vec
) {
walk_die_list(
die,
[this, dwversion, &vec] (const die_object& die) {
[this, &cu_die, dwversion, &vec] (const die_object& die) {
switch(die.get_tag()) {
case DW_TAG_subprogram:
{
auto ranges_vec = die.get_rangelist_entries(dwversion);
auto ranges_vec = die.get_rangelist_entries(cu_die, dwversion);
// TODO: Feels super inefficient and some day should maybe use an interval tree.
for(auto range : ranges_vec) {
// TODO: Reduce cloning here
@ -543,7 +545,7 @@ namespace libdwarf {
// On clang it's better
auto child = die.get_child();
if(child) {
preprocess_subprograms(child, dwversion, vec);
preprocess_subprograms(cu_die, child, dwversion, vec);
}
}
break;
@ -556,7 +558,7 @@ namespace libdwarf {
{
auto child = die.get_child();
if(child) {
preprocess_subprograms(child, dwversion, vec);
preprocess_subprograms(cu_die, child, dwversion, vec);
}
}
break;
@ -587,7 +589,7 @@ namespace libdwarf {
if(it == subprograms_cache.end()) {
// TODO: Refactor. Do the sort in the preprocess function and return the vec directly.
std::vector<subprogram_entry> vec;
preprocess_subprograms(cu_die, dwversion, vec);
preprocess_subprograms(cu_die, cu_die, dwversion, vec);
std::sort(vec.begin(), vec.end(), [] (const subprogram_entry& a, const subprogram_entry& b) {
return a.low < b.low;
});
@ -605,7 +607,7 @@ namespace libdwarf {
);
// If the vector has been empty this can happen
if(vec_it != vec.end()) {
if(vec_it->die.pc_in_die(dwversion, pc)) {
if(vec_it->die.pc_in_die(cu_die, dwversion, pc)) {
frame.symbol = retrieve_symbol_for_subprogram(cu_die, vec_it->die, pc, dwversion, inlines);
}
} else {
@ -882,8 +884,14 @@ namespace libdwarf {
}
// NOTE: If we have a corresponding skeleton, we assume we have one CU matching the skeleton CU
if(
(skeleton && skeleton.unwrap().cu_die.pc_in_die(skeleton.unwrap().dwversion, pc))
|| cu_die.pc_in_die(dwversion, pc)
(
skeleton
&& skeleton.unwrap().cu_die.pc_in_die(
skeleton.unwrap().cu_die,
skeleton.unwrap().dwversion,
pc
)
) || cu_die.pc_in_die(cu_die, dwversion, pc)
) {
if(trace_dwarf) {
std::fprintf(
@ -917,8 +925,14 @@ namespace libdwarf {
// TODO: Cache the range list?
// NOTE: If we have a corresponding skeleton, we assume we have one CU matching the skeleton CU
if(
(skeleton && skeleton.unwrap().cu_die.pc_in_die(skeleton.unwrap().dwversion, pc))
|| vec_it->die.pc_in_die(vec_it->dwversion, pc)
(
skeleton
&& skeleton.unwrap().cu_die.pc_in_die(
skeleton.unwrap().cu_die,
skeleton.unwrap().dwversion,
pc
)
) || vec_it->die.pc_in_die(vec_it->die, vec_it->dwversion, pc)
) {
return cu_info{maybe_owned_die_object::ref(vec_it->die), vec_it->dwversion};
}