Macos addr2line / atos support (#14)

This commit is contained in:
Jeremy Rifkin 2023-07-23 09:27:01 -04:00 committed by GitHub
parent 1feee6ee14
commit baf785cc49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 229 additions and 5 deletions

View File

@ -129,7 +129,7 @@ can hold addresses for 100 frames (beyond the `skip` frames). This is configurab
| Library | CMake config | Platforms | Info | | Library | CMake config | Platforms | Info |
| ------------ | ---------------------------------------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ------------ | ---------------------------------------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| libbacktrace | `CPPTRACE_GET_SYMBOLS_WITH_LIBBACKTRACE` | linux, macos*, mingw* | Libbacktrace is already installed on most systems or available through the compiler directly. For clang you must specify the absolute path to `backtrace.h` using `CPPTRACE_BACKTRACE_PATH`. | | libbacktrace | `CPPTRACE_GET_SYMBOLS_WITH_LIBBACKTRACE` | linux, macos*, mingw* | Libbacktrace is already installed on most systems or available through the compiler directly. For clang you must specify the absolute path to `backtrace.h` using `CPPTRACE_BACKTRACE_PATH`. |
| addr2line | `CPPTRACE_GET_SYMBOLS_WITH_ADDR2LINE` | linux, mingw | Symbols are resolved by invoking `addr2line` via `fork()` (on linux/unix, and `popen` under mingw). | | addr2line | `CPPTRACE_GET_SYMBOLS_WITH_ADDR2LINE` | linux, macos, mingw | Symbols are resolved by invoking `addr2line` (or `atos` on mac) via `fork()` (on linux/unix, and `popen` under mingw). |
| dbghelp | `CPPTRACE_GET_SYMBOLS_WITH_DBGHELP` | windows | Dbghelp.h allows access to symbols via debug info. | | dbghelp | `CPPTRACE_GET_SYMBOLS_WITH_DBGHELP` | windows | Dbghelp.h allows access to symbols via debug info. |
| libdl | `CPPTRACE_GET_SYMBOLS_WITH_LIBDL` | linux, macos | Libdl uses dynamic export information. Compiling with `-rdynamic` is needed for symbol information to be retrievable. Line numbers won't be retrievable. | | libdl | `CPPTRACE_GET_SYMBOLS_WITH_LIBDL` | linux, macos | Libdl uses dynamic export information. Compiling with `-rdynamic` is needed for symbol information to be retrievable. Line numbers won't be retrievable. |
| N/A | `CPPTRACE_GET_SYMBOLS_WITH_NOTHING` | all | No attempt is made to resolve symbols. | | N/A | `CPPTRACE_GET_SYMBOLS_WITH_NOTHING` | all | No attempt is made to resolve symbols. |

View File

@ -336,7 +336,7 @@ def main():
"symbols": [ "symbols": [
#"CPPTRACE_GET_SYMBOLS_WITH_LIBBACKTRACE", #"CPPTRACE_GET_SYMBOLS_WITH_LIBBACKTRACE",
"CPPTRACE_GET_SYMBOLS_WITH_LIBDL", "CPPTRACE_GET_SYMBOLS_WITH_LIBDL",
#"CPPTRACE_GET_SYMBOLS_WITH_ADDR2LINE", "CPPTRACE_GET_SYMBOLS_WITH_ADDR2LINE",
#"CPPTRACE_GET_SYMBOLS_WITH_NOTHING", #"CPPTRACE_GET_SYMBOLS_WITH_NOTHING",
], ],
"demangle": [ "demangle": [

View File

@ -0,0 +1,161 @@
#ifndef CPPTRACE_MACHO_HPP
#define CPPTRACE_MACHO_HPP
#if IS_APPLE
#include <cstdio>
#include <cstring>
#include <type_traits>
#include <mach-o/loader.h>
#include <mach-o/swap.h>
#include <mach-o/fat.h>
// Based on https://github.com/AlexDenisov/segment_dumper/blob/master/main.c
// and https://lowlevelbits.org/parsing-mach-o-files/
template<typename T>
T load_bytes(FILE* obj_file, off_t offset) {
static_assert(std::is_pod<T>::value, "Expected POD type");
T object;
fseek(obj_file, offset, SEEK_SET);
fread(&object, sizeof(T), 1, obj_file);
return object;
}
static bool is_magic_64(uint32_t magic) {
return magic == MH_MAGIC_64 || magic == MH_CIGAM_64;
}
static bool should_swap_bytes(uint32_t magic) {
return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM;
}
#if defined(__aarch64__)
#define CURRENT_CPU CPU_TYPE_ARM64
#elif defined(__arm__)
#define CURRENT_CPU CPU_TYPE_ARM
#elif defined(__amd64__)
#define CURRENT_CPU CPU_TYPE_X86_64
#elif defined(__i386__)
#define CURRENT_CPU CPU_TYPE_I386
#else
#error "Unknown CPU architecture"
#endif
static uintptr_t get_text_vmaddr_from_segments(FILE* obj_file, off_t offset, bool should_swap, uint32_t ncmds) {
off_t actual_offset = offset;
for(uint32_t i = 0; i < ncmds; i++) {
load_command cmd = load_bytes<load_command>(obj_file, actual_offset);
if(should_swap) {
swap_load_command(&cmd, NX_UnknownByteOrder);
}
if(cmd.cmd == LC_SEGMENT_64) {
segment_command_64 segment = load_bytes<segment_command_64>(obj_file, actual_offset);
if(should_swap) {
swap_segment_command_64(&segment, NX_UnknownByteOrder);
}
//printf("segname(64): %s\n", segment.segname);
//printf(" %d\n", segment.nsects);
//printf(" %p\n", segment.vmaddr);
//printf(" %p\n", segment.vmsize);
if(strcmp(segment.segname, "__TEXT") == 0) {
return segment.vmaddr;
}
} else if(cmd.cmd == LC_SEGMENT) {
segment_command segment = load_bytes<segment_command>(obj_file, actual_offset);
if(should_swap) {
swap_segment_command(&segment, NX_UnknownByteOrder);
}
//printf("segname: %s\n", segment.segname);
if(strcmp(segment.segname, "__TEXT") == 0) {
return segment.vmaddr;
}
}
actual_offset += cmd.cmdsize;
}
// somehow no __TEXT section was found...
return 0;
}
static uintptr_t get_text_vmaddr_mach(FILE* obj_file, off_t offset, bool is_64, bool should_swap) {
uint32_t ncmds;
off_t load_commands_offset = offset;
if(is_64) {
size_t header_size = sizeof(mach_header_64);
mach_header_64 header = load_bytes<mach_header_64>(obj_file, offset);
//if(offset != 0) { // if fat the offset will be non-zero, if not fat the offset will be zero
if(header.cputype != CURRENT_CPU) {
return 0;
}
//}
if(should_swap) {
swap_mach_header_64(&header, NX_UnknownByteOrder);
}
ncmds = header.ncmds;
load_commands_offset += header_size;
} else {
size_t header_size = sizeof(mach_header);
mach_header header = load_bytes<mach_header>(obj_file, offset);
//if(offset != 0) { // if fat the offset will be non-zero, if not fat the offset will be zero
if(header.cputype != CURRENT_CPU) {
return 0;
}
//}
if(should_swap) {
swap_mach_header(&header, NX_UnknownByteOrder);
}
ncmds = header.ncmds;
load_commands_offset += header_size;
}
return get_text_vmaddr_from_segments(obj_file, load_commands_offset, should_swap, ncmds);
}
static uintptr_t get_text_vmaddr_fat(FILE* obj_file, bool should_swap) {
size_t header_size = sizeof(fat_header);
size_t arch_size = sizeof(fat_arch);
fat_header header = load_bytes<fat_header>(obj_file, 0);
if(should_swap) {
swap_fat_header(&header, NX_UnknownByteOrder);
}
off_t arch_offset = (off_t)header_size;
uintptr_t text_vmaddr = 0;
for(uint32_t i = 0; i < header.nfat_arch; i++) {
fat_arch arch = load_bytes<fat_arch>(obj_file, arch_offset);
if(should_swap) {
swap_fat_arch(&arch, 1, NX_UnknownByteOrder);
}
off_t mach_header_offset = (off_t)arch.offset;
arch_offset += arch_size;
uint32_t magic = load_bytes<uint32_t>(obj_file, mach_header_offset);
text_vmaddr = get_text_vmaddr_mach(
obj_file,
mach_header_offset,
is_magic_64(magic),
should_swap_bytes(magic)
);
if(text_vmaddr != 0) {
return text_vmaddr;
}
}
// If this is reached... something went wrong. The cpu we're on wasn't found.
return text_vmaddr;
}
static uintptr_t get_text_vmaddr(const char* path) {
FILE* obj_file = fopen(path, "rb");
uint32_t magic = load_bytes<uint32_t>(obj_file, 0);
bool is_64 = is_magic_64(magic);
bool should_swap = should_swap_bytes(magic);
uintptr_t addr;
if(magic == FAT_MAGIC || magic == FAT_CIGAM) {
addr = get_text_vmaddr_fat(obj_file, should_swap);
} else {
addr = get_text_vmaddr_mach(obj_file, 0, is_64, should_swap);
}
fclose(obj_file);
return addr;
}
#endif
#endif

View File

@ -18,6 +18,9 @@
// NOLINTNEXTLINE(misc-include-cleaner) // NOLINTNEXTLINE(misc-include-cleaner)
#include <sys/types.h> #include <sys/types.h>
#include <sys/wait.h> #include <sys/wait.h>
#if IS_APPLE
#include "../platform/cpptrace_macho.hpp"
#endif
#elif IS_WINDOWS #elif IS_WINDOWS
#include <windows.h> #include <windows.h>
#endif #endif
@ -61,8 +64,13 @@ namespace cpptrace {
if(pid == -1) { return false; } if(pid == -1) { return false; }
if(pid == 0) { // child if(pid == 0) { // child
close(STDOUT_FILENO); close(STDOUT_FILENO);
close(STDERR_FILENO); // atos --help writes to stderr
// TODO: path // TODO: path
execlp("addr2line", "addr2line", "--help", nullptr); #if !IS_APPLE
execlp("addr2line", "addr2line", "--help", nullptr);
#else
execlp("atos", "atos", "--help", nullptr);
#endif
_exit(magic); _exit(magic);
} }
int status; int status;
@ -99,7 +107,11 @@ namespace cpptrace {
close(input_pipe.write_end); close(input_pipe.write_end);
close(STDERR_FILENO); // TODO: Might be worth conditionally enabling or piping close(STDERR_FILENO); // TODO: Might be worth conditionally enabling or piping
// TODO: Prevent against path injection? // TODO: Prevent against path injection?
execlp("addr2line", "addr2line", "-e", executable.c_str(), "-f", "-C", "-p", nullptr); #if !IS_APPLE
execlp("addr2line", "addr2line", "-e", executable.c_str(), "-f", "-C", "-p", nullptr);
#else
execlp("atos", "atos", "-o", executable.c_str(), nullptr);
#endif
_exit(1); // TODO: Diagnostic? _exit(1); // TODO: Diagnostic?
} }
internal_verify(write(input_pipe.write_end, addresses.data(), addresses.size()) != -1); internal_verify(write(input_pipe.write_end, addresses.data(), addresses.size()) != -1);
@ -118,10 +130,19 @@ namespace cpptrace {
return output; return output;
} }
#if !IS_APPLE
uintptr_t get_module_image_base(const dlframe &entry) { uintptr_t get_module_image_base(const dlframe &entry) {
(void)entry; (void)entry;
return 0; return 0;
} }
#else
uintptr_t get_module_image_base(const dlframe &entry) {
// We have to parse the Mach-O to find the offset of the text section.....
// I don't know how addresses are handled if there is more than one __TEXT load command. I'm assuming for
// now that there is only one, and I'm using only the first section entry within that load command.
return get_text_vmaddr(entry.obj_path.c_str());
}
#endif
#elif IS_WINDOWS #elif IS_WINDOWS
// aladdr queries are needed to get pre-ASLR addresses and targets to run addr2line on // aladdr queries are needed to get pre-ASLR addresses and targets to run addr2line on
std::vector<dlframe> backtrace_frames(const std::vector<void*>& addrs) { std::vector<dlframe> backtrace_frames(const std::vector<void*>& addrs) {
@ -255,7 +276,8 @@ namespace cpptrace {
// NOLINTNEXTLINE(readability-convert-member-functions-to-static) // NOLINTNEXTLINE(readability-convert-member-functions-to-static)
void update_trace(const std::string& line, size_t entry_index, const target_vec& entries_vec) { void update_trace(const std::string& line, size_t entry_index, const target_vec& entries_vec) {
// Result will be of the form <identifier> " at " path:line #if !IS_APPLE
// Result will be of the form "<symbol> at path:line"
// The path may be ?? if addr2line cannot resolve, line may be ? // The path may be ?? if addr2line cannot resolve, line may be ?
// Edge cases: // Edge cases:
// ?? ??:0 // ?? ??:0
@ -285,6 +307,47 @@ namespace cpptrace {
if(!symbol.empty()) { if(!symbol.empty()) {
entries_vec[entry_index].second.get().symbol = symbol; entries_vec[entry_index].second.get().symbol = symbol;
} }
#else
// Result will be of the form "<symbol> (in <object name>) (file:line)"
// The symbol may just be the given address if atos can't resolve it
// Examples:
// trace() (in demo) (demo.cpp:8)
// 0x100003b70 (in demo)
// 0xffffffffffffffff
// foo (in bar) + 14
// I'm making some assumptions here. Support may need to be improved later. This is tricky output to
// parse.
const std::size_t in_location = line.find(" (in ");
if(in_location == std::string::npos) {
// presumably the 0xffffffffffffffff case
return;
}
const std::size_t symbol_end = in_location;
entries_vec[entry_index].second.get().symbol = line.substr(0, symbol_end);
const std::size_t obj_end = line.find(")", in_location);
internal_verify(
obj_end != std::string::npos,
"Unexpected edge case while processing addr2line/atos output"
);
const std::size_t filename_start = line.find(") (", obj_end) + 3;
if(filename_start == std::string::npos) {
// presumably something like 0x100003b70 (in demo) or foo (in bar) + 14
return;
}
const std::size_t filename_end = line.find(":", filename_start);
internal_verify(
filename_end != std::string::npos,
"Unexpected edge case while processing addr2line/atos output"
);
entries_vec[entry_index].second.get().filename = line.substr(filename_start, filename_end - filename_start);
const std::size_t line_start = filename_end + 1;
const std::size_t line_end = line.find(")", filename_end);
internal_verify(
line_end == line.size() - 1,
"Unexpected edge case while processing addr2line/atos output"
);
entries_vec[entry_index].second.get().line = std::stoi(line.substr(line_start, line_end - line_start));
#endif
} }
// NOLINTNEXTLINE(readability-convert-member-functions-to-static) // NOLINTNEXTLINE(readability-convert-member-functions-to-static)