Parsing of mach-o symbol tables, generation of debug maps, and resolution through object files (#82)

This commit is contained in:
Jeremy Rifkin 2024-01-29 22:12:59 -06:00 committed by GitHub
parent ea30c99f35
commit 79931c8823
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 622 additions and 106 deletions

View File

@ -23,8 +23,8 @@ jobs:
mkdir libdwarf
cd libdwarf
git init
git remote add origin https://github.com/davea42/libdwarf-code.git
git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7
git remote add origin https://github.com/flagarde/libdwarf-code.git
git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032
git checkout FETCH_HEAD
mkdir build
cd build
@ -52,8 +52,8 @@ jobs:
mkdir libdwarf
cd libdwarf
git init
git remote add origin https://github.com/davea42/libdwarf-code.git
git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7
git remote add origin https://github.com/flagarde/libdwarf-code.git
git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032
git checkout FETCH_HEAD
mkdir build
cd build

View File

@ -26,8 +26,8 @@ jobs:
mkdir libdwarf
cd libdwarf
git init
git remote add origin https://github.com/davea42/libdwarf-code.git
git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7
git remote add origin https://github.com/flagarde/libdwarf-code.git
git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032
git checkout FETCH_HEAD
mkdir build
cd build
@ -53,8 +53,8 @@ jobs:
mkdir libdwarf
cd libdwarf
git init
git remote add origin https://github.com/davea42/libdwarf-code.git
git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7
git remote add origin https://github.com/flagarde/libdwarf-code.git
git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032
git checkout FETCH_HEAD
mkdir build
cd build
@ -68,6 +68,17 @@ jobs:
- name: build and test
run: |
python3 ci/test-all-configs.py --${{matrix.compiler}}
# - name: bundle artifacts
# if: always()
# run: |
# tar czfH bundle.tar.gz build
# - name: upload artifacts
# uses: actions/upload-artifact@v4
# if: always()
# with:
# name: build-macos-${{matrix.compiler}}${{matrix.shared}}
# path: bundle.tar.gz
# retention-days: 2
test-windows:
runs-on: windows-2022
strategy:

View File

@ -27,12 +27,12 @@ if(PROJECT_IS_TOP_LEVEL)
if(CMAKE_GENERATOR STREQUAL "Ninja")
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
endif()
if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fcolor-diagnostics")
endif()
endif()
@ -358,10 +358,13 @@ if(CPPTRACE_GET_SYMBOLS_WITH_LIBDWARF)
# GIT_REPOSITORY https://github.com/davea42/libdwarf-code.git
# GIT_TAG 6216e185863f41d6f19ab850caabfff7326020d7 # v0.8.0
# GIT_TAG 8b0bd09d8c77d45a68cb1bb00a54186a92b683d9 # v0.9.0
# GIT_TAG c0cfba34ec80996426b5be2523f6447a2c9b7b39 # v0.9.0 + mach-o changes
# Using a lightweight mirror that's optimized for clone + configure speed
GIT_REPOSITORY https://github.com/jeremy-rifkin/libdwarf-lite.git
GIT_TAG c78e984f3abbd20f6e01d6f51819e826b1691f65 # v0.8.0
# GIT_REPOSITORY https://github.com/jeremy-rifkin/libdwarf-lite.git
# GIT_TAG c78e984f3abbd20f6e01d6f51819e826b1691f65 # v0.8.0
# GIT_TAG 71090c680b4c943448ba87a0f1f864f174e4edda # v0.9.0
GIT_REPOSITORY https://github.com/flagarde/libdwarf-code.git
GIT_TAG d1a559b7af0840194dfa51f7e3013e0f80614032
GIT_SHALLOW 1
)
FetchContent_MakeAvailable(libdwarf)
@ -533,20 +536,13 @@ macro(add_test_dependencies exec_name)
if(CPPTRACE_BUILD_TEST_RDYNAMIC)
set_property(TARGET ${exec_name} PROPERTY ENABLE_EXPORTS ON)
endif()
if(APPLE) # TODO: Temporary
add_custom_command(
TARGET ${exec_name}
POST_BUILD
COMMAND dsymutil $<TARGET_FILE:${exec_name}>
)
endif()
endmacro()
if(CPPTRACE_BUILD_TESTING)
add_executable(test test/test.cpp)
add_executable(demo test/demo.cpp)
add_executable(c_demo test/ctrace_demo.cpp)
add_test_dependencies(test)
add_test_dependencies(demo)
add_test_dependencies(c_demo)

View File

@ -115,7 +115,7 @@ endif()
Be sure to configure with `-DCMAKE_BUILD_TYPE=Debug` or `-DDCMAKE_BUILD_TYPE=RelWithDebInfo` for symbols and line
information.
On macos a little extra work to generate a .dSYM file is required, see [Platform Logistics](#platform-logistics) below.
On macOS it is recommended to generate a .dSYM file, see [Platform Logistics](#platform-logistics) below.
For other ways to use the library, such as through package managers, a system-wide installation, or on a platform
without internet access see [Usage](#usage) below.
@ -140,9 +140,6 @@ method to get lightweight raw traces, which are just vectors of program counters
**Note:** Debug info (`-g`/`/Z7`/`/Zi`/`/DEBUG`) is generally required for good trace information.
**Note:** Currently on Mac .dSYM files are required, which can be generated with `dsymutil yourbinary`. A cmake snippet
for generating these is provided in [Platform Logistics](#platform-logistics) below.
All functions are thread-safe unless otherwise noted.
### Stack Traces
@ -506,7 +503,7 @@ namespace cpptrace {
| DWARF in separate binary (binary gnu debug link) | ️️✔️ |
| DWARF in separate binary (split dwarf) | ✔️ |
| DWARF in dSYM | ✔️ |
| DWARF in via Mach-O debug map | Soon |
| DWARF in via Mach-O debug map | ✔️ |
| Windows debug symbols in PDB | ✔️ |
DWARF5 added DWARF package files. As far as I can tell no compiler implements these yet.
@ -719,7 +716,7 @@ if(WIN32)
endif()
```
Generating a .dSYM file on macos:
On macOS it's recommended to generate a dSYM file containing debug information for your program:
In xcode cmake this can be done with
@ -872,7 +869,6 @@ and time-memory tradeoffs. If you find the current implementation is either slow
to explore some of these options.
A couple things I'd like to improve in the future:
- On MacOS .dSYM files are required
- On Windows when collecting symbols with dbghelp (msvc/clang) parameter types are almost perfect but due to limitations
in dbghelp the library cannot accurately show const and volatile qualifiers or rvalue references (these appear as
pointers).

View File

@ -30,7 +30,7 @@ def similarity(name: str, target: List[str]) -> int:
return -1
return c
def output_matches(output: str, params: Tuple[str]):
def output_matches(raw_output: str, params: Tuple[str]):
target = []
if params[0].startswith("gcc") or params[0].startswith("g++"):
@ -72,31 +72,41 @@ def output_matches(output: str, params: Tuple[str]):
print(f"Reading from {file}")
with open(os.path.join(expected_dir, file), "r") as f:
expected = f.read()
raw_expected = f.read()
if output.strip() == "":
if raw_output.strip() == "":
print(f"Error: No output from test")
return False
expected = [line.strip().split("||") for line in expected.split("\n")]
output = [line.strip().split("||") for line in output.split("\n")]
expected = [line.strip().split("||") for line in raw_expected.split("\n")]
output = [line.strip().split("||") for line in raw_output.split("\n")]
max_line_diff = 0
errored = False
for i, ((output_file, output_line, output_symbol), (expected_file, expected_line, expected_symbol)) in enumerate(zip(output, expected)):
if output_file != expected_file:
print(f"Error: File name mismatch on line {i + 1}, found \"{output_file}\" expected \"{expected_file}\"")
errored = True
if abs(int(output_line) - int(expected_line)) > max_line_diff:
print(f"Error: File line mismatch on line {i + 1}, found {output_line} expected {expected_line}")
errored = True
if output_symbol != expected_symbol:
print(f"Error: File symbol mismatch on line {i + 1}, found \"{output_symbol}\" expected \"{expected_symbol}\"")
errored = True
if expected_symbol == "main" or expected_symbol == "main()":
break
try:
for i, ((output_file, output_line, output_symbol), (expected_file, expected_line, expected_symbol)) in enumerate(zip(output, expected)):
if output_file != expected_file:
print(f"Error: File name mismatch on line {i + 1}, found \"{output_file}\" expected \"{expected_file}\"")
errored = True
if abs(int(output_line) - int(expected_line)) > max_line_diff:
print(f"Error: File line mismatch on line {i + 1}, found {output_line} expected {expected_line}")
errored = True
if output_symbol != expected_symbol:
print(f"Error: File symbol mismatch on line {i + 1}, found \"{output_symbol}\" expected \"{expected_symbol}\"")
errored = True
if expected_symbol == "main" or expected_symbol == "main()":
break
except ValueError:
print("ValueError during output checking")
errored = True
if errored:
print("Output:")
print(raw_output)
print("Expected:")
print(raw_expected)
return not errored
@ -126,7 +136,7 @@ def run_test(test_binary, params: Tuple[str]):
print(Style.RESET_ALL, end="") # makefile in parallel sometimes messes up colors
if test.returncode != 0:
print("[🔴 Test command failed]")
print(f"[🔴 Test command failed with code {test.returncode}]")
print("stderr:")
print(test_stderr.decode("utf-8"), end="")
print("stdout:")

View File

@ -12,7 +12,13 @@
#include <cstdio>
#include <cstring>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include <iostream>
#include <iomanip>
#include <mach-o/loader.h>
#include <mach-o/swap.h>
@ -23,7 +29,7 @@
namespace cpptrace {
namespace detail {
static bool is_mach_o(std::uint32_t magic) {
inline bool is_mach_o(std::uint32_t magic) {
switch(magic) {
case FAT_MAGIC:
case FAT_CIGAM:
@ -37,36 +43,57 @@ namespace detail {
}
}
static bool is_fat_magic(std::uint32_t magic) {
inline bool file_is_mach_o(const std::string& object_path) noexcept {
try {
FILE* file = std::fopen(object_path.c_str(), "rb");
if(file == nullptr) {
return false;
}
auto magic = load_bytes<std::uint32_t>(file, 0);
return is_mach_o(magic);
} catch(...) {
return false;
}
}
inline bool is_fat_magic(std::uint32_t magic) {
return magic == FAT_MAGIC || magic == FAT_CIGAM;
}
// Based on https://github.com/AlexDenisov/segment_dumper/blob/master/main.c
// and https://lowlevelbits.org/parsing-mach-o-files/
static bool is_magic_64(std::uint32_t magic) {
inline bool is_magic_64(std::uint32_t magic) {
return magic == MH_MAGIC_64 || magic == MH_CIGAM_64;
}
static bool should_swap_bytes(std::uint32_t magic) {
inline bool should_swap_bytes(std::uint32_t magic) {
return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM;
}
static void swap_mach_header(mach_header_64& header) {
inline void swap_mach_header(mach_header_64& header) {
swap_mach_header_64(&header, NX_UnknownByteOrder);
}
static void swap_mach_header(mach_header& header) {
inline void swap_mach_header(mach_header& header) {
swap_mach_header(&header, NX_UnknownByteOrder);
}
static void swap_segment_command(segment_command_64& segment) {
inline void swap_segment_command(segment_command_64& segment) {
swap_segment_command_64(&segment, NX_UnknownByteOrder);
}
static void swap_segment_command(segment_command& segment) {
inline void swap_segment_command(segment_command& segment) {
swap_segment_command(&segment, NX_UnknownByteOrder);
}
inline void swap_nlist(struct nlist& entry) {
swap_nlist(&entry, 1, NX_UnknownByteOrder);
}
inline void swap_nlist(struct nlist_64& entry) {
swap_nlist_64(&entry, 1, NX_UnknownByteOrder);
}
#ifdef __LP64__
#define LP(x) x##_64
#else
@ -89,12 +116,28 @@ namespace detail {
std::uint32_t n_load_commands;
std::uint32_t sizeof_load_commands;
std::uint32_t flags;
std::size_t bits = 0; // 32 or 64 once load_mach is called
std::size_t load_base = 0;
std::size_t fat_index = std::numeric_limits<std::size_t>::max();
std::vector<load_command_entry> load_commands;
struct symtab_info_data {
symtab_command symtab;
std::unique_ptr<char[]> stringtab;
const char* get_string(std::size_t index) const {
if(stringtab && index < symtab.strsize) {
return stringtab.get() + index;
} else {
throw std::runtime_error("can't retrieve symbol from symtab");
}
}
};
bool tried_to_load_symtab = false;
optional<symtab_info_data> symtab_info;
public:
mach_o(const std::string& object_path) : object_path(object_path) {
file = std::fopen(object_path.c_str(), "rb");
@ -117,7 +160,7 @@ namespace detail {
~mach_o() {
if(file) {
fclose(file);
std::fclose(file);
}
}
@ -163,12 +206,195 @@ namespace detail {
}
}
optional<symtab_info_data>& get_symtab_info() {
if(!symtab_info.has_value() && !tried_to_load_symtab) {
// don't try to load the symtab again if for some reason loading here fails
tried_to_load_symtab = true;
for(const auto& command : load_commands) {
if(command.cmd == LC_SYMTAB) {
symtab_info_data info;
info.symtab = load_symbol_table_command(command.file_offset);
info.stringtab = load_string_table(info.symtab.stroff, info.symtab.strsize);
symtab_info = std::move(info);
break;
}
}
}
return symtab_info;
}
void print_symbol_table_entry(
const nlist_64& entry,
const std::unique_ptr<char[]>& stringtab,
std::size_t stringsize,
std::size_t j
) const {
const char* type = "";
if(entry.n_type & N_STAB) {
switch(entry.n_type) {
case N_SO: type = "N_SO"; break;
case N_OSO: type = "N_OSO"; break;
case N_BNSYM: type = "N_BNSYM"; break;
case N_ENSYM: type = "N_ENSYM"; break;
case N_FUN: type = "N_FUN"; break;
}
} else if((entry.n_type & N_TYPE) == N_SECT) {
type = "N_SECT";
}
fprintf(
stderr,
"%5llu %8llx %2llx %7s %2llu %4llx %16llx %s\n",
to_ull(j),
to_ull(entry.n_un.n_strx),
to_ull(entry.n_type),
type,
to_ull(entry.n_sect),
to_ull(entry.n_desc),
to_ull(entry.n_value),
stringtab == nullptr
? "Stringtab error"
: entry.n_un.n_strx < stringsize
? stringtab.get() + entry.n_un.n_strx
: "String index out of bounds"
);
}
void print_symbol_table() {
int i = 0;
for(const auto& command : load_commands) {
if(command.cmd == LC_SYMTAB) {
auto symtab = load_symbol_table_command(command.file_offset);
fprintf(stderr, "Load command %d\n", i);
fprintf(stderr, " cmd %llu\n", to_ull(symtab.cmd));
fprintf(stderr, " cmdsize %llu\n", to_ull(symtab.cmdsize));
fprintf(stderr, " symoff 0x%llu\n", to_ull(symtab.symoff));
fprintf(stderr, " nsyms %llu\n", to_ull(symtab.nsyms));
fprintf(stderr, " stroff 0x%llu\n", to_ull(symtab.stroff));
fprintf(stderr, " strsize %llu\n", to_ull(symtab.strsize));
auto stringtab = load_string_table(symtab.stroff, symtab.strsize);
for(std::size_t j = 0; j < symtab.nsyms; j++) {
nlist_64 entry = bits == 32
? load_symtab_entry<32>(symtab.symoff, j)
: load_symtab_entry<64>(symtab.symoff, j);
print_symbol_table_entry(entry, stringtab, symtab.strsize, j);
}
}
i++;
}
}
struct debug_map_entry {
uint64_t source_address;
uint64_t size;
std::string name;
};
struct symbol_entry {
uint64_t address;
std::string name;
};
// map from object file to a vector of symbols to resolve
using debug_map = std::unordered_map<std::string, std::vector<debug_map_entry>>;
// produce information similar to dsymutil -dump-debug-map
debug_map get_debug_map() {
// we have a bunch of symbols in our binary we need to pair up with symbols from various .o files
// first collect symbols and the objects they come from
debug_map debug_map;
const auto& symtab_info = get_symtab_info().unwrap();
const auto& symtab = symtab_info.symtab;
// TODO: Take timestamp into account?
std::string current_module;
optional<debug_map_entry> current_function;
for(std::size_t j = 0; j < symtab.nsyms; j++) {
nlist_64 entry = bits == 32
? load_symtab_entry<32>(symtab.symoff, j)
: load_symtab_entry<64>(symtab.symoff, j);
// entry.n_type & N_STAB indicates symbolic debug info
if(!(entry.n_type & N_STAB)) {
continue;
}
switch(entry.n_type) {
case N_SO:
// pass - these encode path and filename for the module, if applicable
break;
case N_OSO:
// sets the module
current_module = symtab_info.get_string(entry.n_un.n_strx);
break;
case N_BNSYM: break; // pass
case N_ENSYM: break; // pass
case N_FUN:
{
const char* str = symtab_info.get_string(entry.n_un.n_strx);
if(str[0] == 0) {
// end of function scope
if(!current_function) { /**/ }
current_function.unwrap().size = entry.n_value;
debug_map[current_module].push_back(std::move(current_function).unwrap());
} else {
current_function = debug_map_entry{};
current_function.unwrap().source_address = entry.n_value;
current_function.unwrap().name = str;
}
}
break;
}
}
return debug_map;
}
std::vector<symbol_entry> symbol_table() {
// we have a bunch of symbols in our binary we need to pair up with symbols from various .o files
// first collect symbols and the objects they come from
std::vector<symbol_entry> symbols;
const auto& symtab_info = get_symtab_info().unwrap();
const auto& symtab = symtab_info.symtab;
// TODO: Take timestamp into account?
for(std::size_t j = 0; j < symtab.nsyms; j++) {
nlist_64 entry = bits == 32
? load_symtab_entry<32>(symtab.symoff, j)
: load_symtab_entry<64>(symtab.symoff, j);
if(entry.n_type & N_STAB) {
continue;
}
if((entry.n_type & N_TYPE) == N_SECT) {
symbols.push_back({
entry.n_value,
symtab_info.get_string(entry.n_un.n_strx)
});
}
}
return symbols;
}
// produce information similar to dsymutil -dump-debug-map
static void print_debug_map(const debug_map& debug_map) {
for(const auto& entry : debug_map) {
std::cout<<entry.first<<": "<<std::endl;
for(const auto& symbol : entry.second) {
std::cerr
<< " "
<< symbol.name
<< " "
<< std::hex
<< symbol.source_address
<< " "
<< symbol.size
<< std::dec
<< std::endl;
}
}
}
private:
template<std::size_t Bits>
void load_mach(
bool allow_arch_mismatch
) {
static_assert(Bits == 32 || Bits == 64, "Unexpected Bits argument");
bits = Bits;
using Mach_Header = typename std::conditional<Bits == 32, mach_header, mach_header_64>::type;
std::size_t header_size = sizeof(Mach_Header);
Mach_Header header = load_bytes<Mach_Header>(file, load_base);
@ -268,6 +494,41 @@ namespace detail {
return common;
}
symtab_command load_symbol_table_command(std::uint32_t offset) const {
symtab_command symtab = load_bytes<symtab_command>(file, offset);
ASSERT(symtab.cmd == LC_SYMTAB);
if(should_swap()) {
swap_symtab_command(&symtab, NX_UnknownByteOrder);
}
return symtab;
}
template<std::size_t Bits>
nlist_64 load_symtab_entry(std::uint32_t symbol_base, std::size_t index) const {
using Nlist = typename std::conditional<Bits == 32, struct nlist, struct nlist_64>::type;
uint32_t offset = load_base + symbol_base + index * sizeof(Nlist);
Nlist entry = load_bytes<Nlist>(file, offset);
if(should_swap()) {
swap_nlist(entry);
}
// fields match just u64 instead of u32
nlist_64 common;
common.n_un.n_strx = entry.n_un.n_strx;
common.n_type = entry.n_type;
common.n_sect = entry.n_sect;
common.n_desc = entry.n_desc;
common.n_value = entry.n_value;
return common;
}
std::unique_ptr<char[]> load_string_table(std::uint32_t offset, std::uint32_t byte_count) const {
std::unique_ptr<char[]> buffer(new char[byte_count + 1]);
VERIFY(std::fseek(file, load_base + offset, SEEK_SET) == 0, "fseek error");
VERIFY(std::fread(buffer.get(), sizeof(char), byte_count, file) == byte_count, "fread error");
buffer[byte_count] = 0; // just out of an abundance of caution
return buffer;
}
bool should_swap() const {
return should_swap_bytes(magic);
}

View File

@ -20,6 +20,9 @@
#include <unordered_map>
#include <vector>
#include <iostream>
#include <iomanip>
// It's been tricky to piece together how to handle all this dwarf stuff. Some resources I've used are
// https://www.prevanders.net/libdwarf.pdf
// https://github.com/davea42/libdwarf-addr2line
@ -74,7 +77,14 @@ namespace libdwarf {
std::vector<line_entry> line_entries;
};
struct dwarf_resolver {
class symbol_resolver {
public:
virtual ~symbol_resolver() = default;
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
virtual frame_with_inlines resolve_frame(const object_frame& frame_info) = 0;
};
class dwarf_resolver : public symbol_resolver {
std::string object_path;
Dwarf_Debug dbg = nullptr;
bool ok = false;
@ -91,6 +101,7 @@ namespace libdwarf {
// Map from CU -> {srcfiles, count}
std::unordered_map<Dwarf_Off, std::pair<char**, Dwarf_Signed>> srcfiles_cache;
private:
// Error handling helper
// For some reason R (*f)(Args..., void*)-style deduction isn't possible, seems like a bug in all compilers
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56190
@ -116,14 +127,25 @@ namespace libdwarf {
return ret;
}
public:
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
dwarf_resolver(const std::string& _object_path) {
object_path = _object_path;
// use a buffer when invoking dwarf_init_path, which allows it to automatically find debuglink or dSYM
// sources
bool use_buffer = true;
// for universal / fat mach-o files
unsigned universal_number = 0;
#if IS_APPLE
if(directory_exists(object_path + ".dSYM")) {
object_path += ".dSYM/Contents/Resources/DWARF/" + basename(object_path);
// Possibly depends on the build system but a obj.cpp.o.dSYM/Contents/Resources/DWARF/obj.cpp.o can be
// created alongside .o files. These are text files containing directives, as opposed to something we
// can actually use
std::string dsym_resource = object_path + ".dSYM/Contents/Resources/DWARF/" + basename(object_path);
if(file_is_mach_o(dsym_resource)) {
object_path = std::move(dsym_resource);
}
use_buffer = false; // we resolved dSYM above as appropriate
}
if(macho_is_fat(object_path)) {
universal_number = mach_o(object_path).get_fat_index();
@ -132,7 +154,10 @@ namespace libdwarf {
// Giving libdwarf a buffer for a true output path is needed for its automatic resolution of debuglink and
// dSYM files. We don't utilize the dSYM logic here, we just care about debuglink.
std::unique_ptr<char[]> buffer(new char[CPPTRACE_MAX_PATH]);
std::unique_ptr<char[]> buffer;
if(use_buffer) {
buffer = std::unique_ptr<char[]>(new char[CPPTRACE_MAX_PATH]);
}
auto ret = wrap(
dwarf_init_path_a,
object_path.c_str(),
@ -214,6 +239,7 @@ namespace libdwarf {
return *this;
}
private:
// walk all CU's in a dbg, callback is called on each die and should return true to
// continue traversal
void walk_compilation_units(const std::function<bool(const die_object&)>& fn) {
@ -521,19 +547,14 @@ namespace libdwarf {
it = subprograms_cache.find(off);
}
auto& vec = it->second;
auto vec_it = std::lower_bound(
auto vec_it = first_less_than_or_equal(
vec.begin(),
vec.end(),
pc,
[] (const subprogram_entry& entry, Dwarf_Addr pc) {
return entry.low < pc;
[] (Dwarf_Addr pc, const subprogram_entry& entry) {
return pc < entry.low;
}
);
// vec_it is first >= pc
// we want first <= pc
if(vec_it != vec.begin()) {
vec_it--;
}
// If the vector has been empty this can happen
if(vec_it != vec.end()) {
//vec_it->die.print();
@ -648,19 +669,14 @@ namespace libdwarf {
if(get_cache_mode() == cache_mode::prioritize_speed) {
// Lookup in the table
auto& line_entries = table_info.line_entries;
auto table_it = std::lower_bound(
auto table_it = first_less_than_or_equal(
line_entries.begin(),
line_entries.end(),
pc,
[] (const line_entry& entry, Dwarf_Addr pc) {
return entry.low < pc;
[] (Dwarf_Addr pc, const line_entry& entry) {
return pc < entry.low;
}
);
// vec_it is first >= pc
// we want first <= pc
if(table_it != line_entries.begin()) {
table_it--;
}
// If the vector has been empty this can happen
if(table_it != line_entries.end()) {
Dwarf_Line line = table_it->line;
@ -824,19 +840,14 @@ namespace libdwarf {
} else {
lazy_generate_cu_cache();
// look up the cu
auto vec_it = std::lower_bound(
auto vec_it = first_less_than_or_equal(
cu_cache.begin(),
cu_cache.end(),
pc,
[] (const cu_entry& entry, Dwarf_Addr pc) {
return entry.low < pc;
[] (Dwarf_Addr pc, const cu_entry& entry) {
return pc < entry.low;
}
);
// vec_it is first >= pc
// we want first <= pc
if(vec_it != cu_cache.begin()) {
vec_it--;
}
// If the vector has been empty this can happen
if(vec_it != cu_cache.end()) {
//vec_it->die.print();
@ -850,8 +861,22 @@ namespace libdwarf {
}
}
public:
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
frame_with_inlines resolve_frame(const object_frame& frame_info) {
frame_with_inlines resolve_frame(const object_frame& frame_info) override {
if(!ok) {
return {
{
frame_info.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
frame_info.object_path,
"",
false
},
{}
};
}
stacktrace_frame frame = null_frame;
frame.filename = frame_info.object_path;
frame.address = frame_info.raw_address;
@ -873,55 +898,249 @@ namespace libdwarf {
}
};
class null_resolver : public symbol_resolver {
public:
null_resolver() = default;
null_resolver(const std::string&) {}
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
frame_with_inlines resolve_frame(const object_frame& frame_info) override {
return {
{
frame_info.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
frame_info.object_path,
"",
false
},
{}
};
};
};
#if IS_APPLE
struct target_object {
std::string object_path;
bool path_ok = true;
optional<std::unordered_map<std::string, uint64_t>> symbols;
std::unique_ptr<symbol_resolver> resolver;
target_object(std::string object_path) : object_path(object_path) {}
std::unique_ptr<symbol_resolver>& get_resolver() {
if(!resolver) {
// this seems silly but it's an attempt to not repeatedly try to initialize new dwarf_resolvers if
// exceptions are thrown, e.g. if the path doesn't exist
resolver = std::unique_ptr<null_resolver>(new null_resolver);
resolver = std::unique_ptr<dwarf_resolver>(new dwarf_resolver(object_path));
}
return resolver;
}
std::unordered_map<std::string, uint64_t>& get_symbols() {
if(!symbols) {
// this is an attempt to not repeatedly try to reprocess mach-o files if exceptions are thrown, e.g. if
// the path doesn't exist
std::unordered_map<std::string, uint64_t> symbols;
this->symbols = symbols;
auto symbol_table = mach_o(object_path).symbol_table();
for(const auto& symbol : symbol_table) {
symbols[symbol.name] = symbol.address;
}
this->symbols = std::move(symbols);
}
return symbols.unwrap();
}
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
frame_with_inlines resolve_frame(
const object_frame& frame_info,
const std::string& symbol_name,
std::size_t offset
) {
const auto& symbol_table = get_symbols();
auto it = symbol_table.find(symbol_name);
if(it != symbol_table.end()) {
auto frame = frame_info;
frame.object_address = it->second + offset;
return get_resolver()->resolve_frame(frame);
} else {
return {
{
frame_info.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
frame_info.object_path,
symbol_name,
false
},
{}
};
}
}
};
struct debug_map_symbol_info {
uint64_t source_address;
uint64_t size;
std::string name;
nullable<uint64_t> target_address; // T(-1) is used as a sentinel
std::size_t object_index;
};
class debug_map_resolver : public symbol_resolver {
std::vector<target_object> target_objects;
std::vector<debug_map_symbol_info> symbols;
public:
debug_map_resolver(const std::string& source_object_path) {
// load mach-o
// TODO: Cache somehow?
mach_o source_mach(source_object_path);
auto source_debug_map = source_mach.get_debug_map();
// get symbol entries from debug map, as well as the various object files used to make this binary
for(auto& entry : source_debug_map) {
// object it came from
target_objects.push_back({std::move(entry.first)});
// push the symbols
auto& map_entry_symbols = entry.second;
symbols.reserve(symbols.size() + map_entry_symbols.size());
for(auto& symbol : map_entry_symbols) {
symbols.push_back({
symbol.source_address,
symbol.size,
std::move(symbol.name),
nullable<uint64_t>::null(),
target_objects.size() - 1
});
}
}
// sort for binary lookup later
std::sort(
symbols.begin(),
symbols.end(),
[] (
const debug_map_symbol_info& a,
const debug_map_symbol_info& b
) {
return a.source_address < b.source_address;
}
);
}
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
frame_with_inlines resolve_frame(const object_frame& frame_info) override {
// resolve object frame:
// find the symbol in this executable corresponding to the object address
// resolve the symbol in the object it came from, based on the symbol name
auto closest_symbol_it = first_less_than_or_equal(
symbols.begin(),
symbols.end(),
frame_info.object_address,
[] (
Dwarf_Addr pc,
const debug_map_symbol_info& symbol
) {
return pc < symbol.source_address;
}
);
if(closest_symbol_it != symbols.end()) {
if(frame_info.object_address <= closest_symbol_it->source_address + closest_symbol_it->size) {
return target_objects[closest_symbol_it->object_index].resolve_frame(
{
frame_info.raw_address,
// the resolver doesn't care about the object address here, only the offset from the start
// of the symbol and it'll lookup the symbol's base-address
0,
frame_info.object_path
},
closest_symbol_it->name,
frame_info.object_address - closest_symbol_it->source_address
);
}
}
// There was either no closest symbol or the closest symbol didn't end up containing the address we're
// looking for, so just return a blank frame
return {
{
frame_info.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
frame_info.object_path,
"",
false
},
{}
};
};
};
#endif
std::unique_ptr<symbol_resolver> get_resolver_for_object(const std::string& object_path) {
#if IS_APPLE
// Check if dSYM exist, if not fallback to debug map
if(!directory_exists(object_path + ".dSYM")) {
return std::unique_ptr<debug_map_resolver>(new debug_map_resolver(object_path));
}
#endif
return std::unique_ptr<dwarf_resolver>(new dwarf_resolver(object_path));
}
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
std::vector<stacktrace_frame> resolve_frames(const std::vector<object_frame>& frames) {
std::vector<frame_with_inlines> trace(frames.size(), {null_frame, {}});
static std::mutex mutex;
// cache resolvers since objects are likely to be traced more than once
static std::unordered_map<std::string, dwarf_resolver> resolver_map;
static std::unordered_map<std::string, std::unique_ptr<symbol_resolver>> resolver_map;
// Locking around all libdwarf interaction per https://github.com/davea42/libdwarf-code/discussions/184
// And also interactions with the above static map
const std::lock_guard<std::mutex> lock(mutex);
for(const auto& object_entry : collate_frames(frames, trace)) {
try {
const auto& object_name = object_entry.first;
optional<dwarf_resolver> resolver_object = nullopt;
dwarf_resolver* resolver = nullptr;
std::unique_ptr<symbol_resolver> resolver_object;
symbol_resolver* resolver = nullptr;
auto it = resolver_map.find(object_name);
if(it != resolver_map.end()) {
resolver = &it->second;
resolver = it->second.get();
} else {
resolver_object = dwarf_resolver(object_name);
resolver = &resolver_object.unwrap();
resolver_object = get_resolver_for_object(object_name);
resolver = resolver_object.get();
}
// If there's no debug information it'll mark itself as not ok
if(resolver->ok) {
for(const auto& entry : object_entry.second) {
try {
const auto& dlframe = entry.first.get();
auto& frame = entry.second.get();
frame = resolver->resolve_frame(dlframe);
} catch(...) {
if(!should_absorb_trace_exceptions()) {
throw;
}
}
}
} else {
// at least copy the addresses
for(const auto& entry : object_entry.second) {
for(const auto& entry : object_entry.second) {
try {
const auto& dlframe = entry.first.get();
auto& frame = entry.second.get();
frame.frame.address = dlframe.raw_address;
frame = resolver->resolve_frame(dlframe);
} catch(...) {
if(!should_absorb_trace_exceptions()) {
throw;
}
}
}
if(resolver_object.has_value() && get_cache_mode() == cache_mode::prioritize_speed) {
if(resolver_object && get_cache_mode() == cache_mode::prioritize_speed) {
// .emplace needed, for some reason .insert tries to copy <= gcc 7.2
resolver_map.emplace(object_name, std::move(resolver_object).unwrap());
resolver_map.emplace(object_name, std::move(resolver_object));
}
} catch(...) { // NOSONAR
if(!should_absorb_trace_exceptions()) {
throw;
}
for(const auto& entry : object_entry.second) {
const auto& dlframe = entry.first.get();
auto& frame = entry.second.get();
frame = {
{
dlframe.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
dlframe.object_path,
"",
false
},
{}
};
}
}
}
// flatten trace with inlines

View File

@ -1,6 +1,7 @@
#ifndef UTILS_HPP
#define UTILS_HPP
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
@ -72,6 +73,28 @@ namespace detail {
return str;
}
// first value in a sorted range such that *it <= value
template<typename ForwardIt, typename T>
ForwardIt first_less_than_or_equal(ForwardIt begin, ForwardIt end, const T& value) {
auto it = std::upper_bound(begin, end, value);
// it is first > value, we want first <= value
if(it != begin) {
return --it;
}
return end;
}
// first value in a sorted range such that *it <= value
template<typename ForwardIt, typename T, typename Compare>
ForwardIt first_less_than_or_equal(ForwardIt begin, ForwardIt end, const T& value, Compare compare) {
auto it = std::upper_bound(begin, end, value, compare);
// it is first > value, we want first <= value
if(it != begin) {
return --it;
}
return end;
}
constexpr const char* const whitespace = " \t\n\r\f\v";
inline std::string trim(const std::string& str) {