From baf785cc492d0999fd1ed5ec771e1806e8f0a090 Mon Sep 17 00:00:00 2001 From: Jeremy Rifkin <51220084+jeremy-rifkin@users.noreply.github.com> Date: Sun, 23 Jul 2023 09:27:01 -0400 Subject: [PATCH] Macos addr2line / atos support (#14) --- README.md | 2 +- ci/test-all-configs.py | 2 +- src/platform/cpptrace_macho.hpp | 161 +++++++++++++++++++++++++ src/symbols/symbols_with_addr2line.cpp | 69 ++++++++++- 4 files changed, 229 insertions(+), 5 deletions(-) create mode 100644 src/platform/cpptrace_macho.hpp diff --git a/README.md b/README.md index 9147db2..512e6ab 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ can hold addresses for 100 frames (beyond the `skip` frames). This is configurab | Library | CMake config | Platforms | Info | | ------------ | ---------------------------------------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | libbacktrace | `CPPTRACE_GET_SYMBOLS_WITH_LIBBACKTRACE` | linux, macos*, mingw* | Libbacktrace is already installed on most systems or available through the compiler directly. For clang you must specify the absolute path to `backtrace.h` using `CPPTRACE_BACKTRACE_PATH`. | -| addr2line | `CPPTRACE_GET_SYMBOLS_WITH_ADDR2LINE` | linux, mingw | Symbols are resolved by invoking `addr2line` via `fork()` (on linux/unix, and `popen` under mingw). | +| addr2line | `CPPTRACE_GET_SYMBOLS_WITH_ADDR2LINE` | linux, macos, mingw | Symbols are resolved by invoking `addr2line` (or `atos` on mac) via `fork()` (on linux/unix, and `popen` under mingw). | | dbghelp | `CPPTRACE_GET_SYMBOLS_WITH_DBGHELP` | windows | Dbghelp.h allows access to symbols via debug info. | | libdl | `CPPTRACE_GET_SYMBOLS_WITH_LIBDL` | linux, macos | Libdl uses dynamic export information. Compiling with `-rdynamic` is needed for symbol information to be retrievable. Line numbers won't be retrievable. | | N/A | `CPPTRACE_GET_SYMBOLS_WITH_NOTHING` | all | No attempt is made to resolve symbols. | diff --git a/ci/test-all-configs.py b/ci/test-all-configs.py index b2d9620..2276f5e 100644 --- a/ci/test-all-configs.py +++ b/ci/test-all-configs.py @@ -336,7 +336,7 @@ def main(): "symbols": [ #"CPPTRACE_GET_SYMBOLS_WITH_LIBBACKTRACE", "CPPTRACE_GET_SYMBOLS_WITH_LIBDL", - #"CPPTRACE_GET_SYMBOLS_WITH_ADDR2LINE", + "CPPTRACE_GET_SYMBOLS_WITH_ADDR2LINE", #"CPPTRACE_GET_SYMBOLS_WITH_NOTHING", ], "demangle": [ diff --git a/src/platform/cpptrace_macho.hpp b/src/platform/cpptrace_macho.hpp new file mode 100644 index 0000000..86bfd0c --- /dev/null +++ b/src/platform/cpptrace_macho.hpp @@ -0,0 +1,161 @@ +#ifndef CPPTRACE_MACHO_HPP +#define CPPTRACE_MACHO_HPP + +#if IS_APPLE +#include +#include +#include + +#include +#include +#include + +// Based on https://github.com/AlexDenisov/segment_dumper/blob/master/main.c +// and https://lowlevelbits.org/parsing-mach-o-files/ + +template +T load_bytes(FILE* obj_file, off_t offset) { + static_assert(std::is_pod::value, "Expected POD type"); + T object; + fseek(obj_file, offset, SEEK_SET); + fread(&object, sizeof(T), 1, obj_file); + return object; +} + +static bool is_magic_64(uint32_t magic) { + return magic == MH_MAGIC_64 || magic == MH_CIGAM_64; +} + +static bool should_swap_bytes(uint32_t magic) { + return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM; +} + +#if defined(__aarch64__) + #define CURRENT_CPU CPU_TYPE_ARM64 +#elif defined(__arm__) + #define CURRENT_CPU CPU_TYPE_ARM +#elif defined(__amd64__) + #define CURRENT_CPU CPU_TYPE_X86_64 +#elif defined(__i386__) + #define CURRENT_CPU CPU_TYPE_I386 +#else + #error "Unknown CPU architecture" +#endif + +static uintptr_t get_text_vmaddr_from_segments(FILE* obj_file, off_t offset, bool should_swap, uint32_t ncmds) { + off_t actual_offset = offset; + for(uint32_t i = 0; i < ncmds; i++) { + load_command cmd = load_bytes(obj_file, actual_offset); + if(should_swap) { + swap_load_command(&cmd, NX_UnknownByteOrder); + } + if(cmd.cmd == LC_SEGMENT_64) { + segment_command_64 segment = load_bytes(obj_file, actual_offset); + if(should_swap) { + swap_segment_command_64(&segment, NX_UnknownByteOrder); + } + //printf("segname(64): %s\n", segment.segname); + //printf(" %d\n", segment.nsects); + //printf(" %p\n", segment.vmaddr); + //printf(" %p\n", segment.vmsize); + if(strcmp(segment.segname, "__TEXT") == 0) { + return segment.vmaddr; + } + } else if(cmd.cmd == LC_SEGMENT) { + segment_command segment = load_bytes(obj_file, actual_offset); + if(should_swap) { + swap_segment_command(&segment, NX_UnknownByteOrder); + } + //printf("segname: %s\n", segment.segname); + if(strcmp(segment.segname, "__TEXT") == 0) { + return segment.vmaddr; + } + } + actual_offset += cmd.cmdsize; + } + // somehow no __TEXT section was found... + return 0; +} + +static uintptr_t get_text_vmaddr_mach(FILE* obj_file, off_t offset, bool is_64, bool should_swap) { + uint32_t ncmds; + off_t load_commands_offset = offset; + if(is_64) { + size_t header_size = sizeof(mach_header_64); + mach_header_64 header = load_bytes(obj_file, offset); + //if(offset != 0) { // if fat the offset will be non-zero, if not fat the offset will be zero + if(header.cputype != CURRENT_CPU) { + return 0; + } + //} + if(should_swap) { + swap_mach_header_64(&header, NX_UnknownByteOrder); + } + ncmds = header.ncmds; + load_commands_offset += header_size; + } else { + size_t header_size = sizeof(mach_header); + mach_header header = load_bytes(obj_file, offset); + //if(offset != 0) { // if fat the offset will be non-zero, if not fat the offset will be zero + if(header.cputype != CURRENT_CPU) { + return 0; + } + //} + if(should_swap) { + swap_mach_header(&header, NX_UnknownByteOrder); + } + ncmds = header.ncmds; + load_commands_offset += header_size; + } + return get_text_vmaddr_from_segments(obj_file, load_commands_offset, should_swap, ncmds); +} + +static uintptr_t get_text_vmaddr_fat(FILE* obj_file, bool should_swap) { + size_t header_size = sizeof(fat_header); + size_t arch_size = sizeof(fat_arch); + fat_header header = load_bytes(obj_file, 0); + if(should_swap) { + swap_fat_header(&header, NX_UnknownByteOrder); + } + off_t arch_offset = (off_t)header_size; + uintptr_t text_vmaddr = 0; + for(uint32_t i = 0; i < header.nfat_arch; i++) { + fat_arch arch = load_bytes(obj_file, arch_offset); + if(should_swap) { + swap_fat_arch(&arch, 1, NX_UnknownByteOrder); + } + off_t mach_header_offset = (off_t)arch.offset; + arch_offset += arch_size; + uint32_t magic = load_bytes(obj_file, mach_header_offset); + text_vmaddr = get_text_vmaddr_mach( + obj_file, + mach_header_offset, + is_magic_64(magic), + should_swap_bytes(magic) + ); + if(text_vmaddr != 0) { + return text_vmaddr; + } + } + // If this is reached... something went wrong. The cpu we're on wasn't found. + return text_vmaddr; +} + +static uintptr_t get_text_vmaddr(const char* path) { + FILE* obj_file = fopen(path, "rb"); + uint32_t magic = load_bytes(obj_file, 0); + bool is_64 = is_magic_64(magic); + bool should_swap = should_swap_bytes(magic); + uintptr_t addr; + if(magic == FAT_MAGIC || magic == FAT_CIGAM) { + addr = get_text_vmaddr_fat(obj_file, should_swap); + } else { + addr = get_text_vmaddr_mach(obj_file, 0, is_64, should_swap); + } + fclose(obj_file); + return addr; +} + +#endif + +#endif diff --git a/src/symbols/symbols_with_addr2line.cpp b/src/symbols/symbols_with_addr2line.cpp index e0eac4f..27596f2 100644 --- a/src/symbols/symbols_with_addr2line.cpp +++ b/src/symbols/symbols_with_addr2line.cpp @@ -18,6 +18,9 @@ // NOLINTNEXTLINE(misc-include-cleaner) #include #include + #if IS_APPLE + #include "../platform/cpptrace_macho.hpp" + #endif #elif IS_WINDOWS #include #endif @@ -61,8 +64,13 @@ namespace cpptrace { if(pid == -1) { return false; } if(pid == 0) { // child close(STDOUT_FILENO); + close(STDERR_FILENO); // atos --help writes to stderr // TODO: path - execlp("addr2line", "addr2line", "--help", nullptr); + #if !IS_APPLE + execlp("addr2line", "addr2line", "--help", nullptr); + #else + execlp("atos", "atos", "--help", nullptr); + #endif _exit(magic); } int status; @@ -99,7 +107,11 @@ namespace cpptrace { close(input_pipe.write_end); close(STDERR_FILENO); // TODO: Might be worth conditionally enabling or piping // TODO: Prevent against path injection? - execlp("addr2line", "addr2line", "-e", executable.c_str(), "-f", "-C", "-p", nullptr); + #if !IS_APPLE + execlp("addr2line", "addr2line", "-e", executable.c_str(), "-f", "-C", "-p", nullptr); + #else + execlp("atos", "atos", "-o", executable.c_str(), nullptr); + #endif _exit(1); // TODO: Diagnostic? } internal_verify(write(input_pipe.write_end, addresses.data(), addresses.size()) != -1); @@ -118,10 +130,19 @@ namespace cpptrace { return output; } + #if !IS_APPLE uintptr_t get_module_image_base(const dlframe &entry) { (void)entry; return 0; } + #else + uintptr_t get_module_image_base(const dlframe &entry) { + // We have to parse the Mach-O to find the offset of the text section..... + // I don't know how addresses are handled if there is more than one __TEXT load command. I'm assuming for + // now that there is only one, and I'm using only the first section entry within that load command. + return get_text_vmaddr(entry.obj_path.c_str()); + } + #endif #elif IS_WINDOWS // aladdr queries are needed to get pre-ASLR addresses and targets to run addr2line on std::vector backtrace_frames(const std::vector& addrs) { @@ -255,7 +276,8 @@ namespace cpptrace { // NOLINTNEXTLINE(readability-convert-member-functions-to-static) void update_trace(const std::string& line, size_t entry_index, const target_vec& entries_vec) { - // Result will be of the form " at " path:line + #if !IS_APPLE + // Result will be of the form " at path:line" // The path may be ?? if addr2line cannot resolve, line may be ? // Edge cases: // ?? ??:0 @@ -285,6 +307,47 @@ namespace cpptrace { if(!symbol.empty()) { entries_vec[entry_index].second.get().symbol = symbol; } + #else + // Result will be of the form " (in ) (file:line)" + // The symbol may just be the given address if atos can't resolve it + // Examples: + // trace() (in demo) (demo.cpp:8) + // 0x100003b70 (in demo) + // 0xffffffffffffffff + // foo (in bar) + 14 + // I'm making some assumptions here. Support may need to be improved later. This is tricky output to + // parse. + const std::size_t in_location = line.find(" (in "); + if(in_location == std::string::npos) { + // presumably the 0xffffffffffffffff case + return; + } + const std::size_t symbol_end = in_location; + entries_vec[entry_index].second.get().symbol = line.substr(0, symbol_end); + const std::size_t obj_end = line.find(")", in_location); + internal_verify( + obj_end != std::string::npos, + "Unexpected edge case while processing addr2line/atos output" + ); + const std::size_t filename_start = line.find(") (", obj_end) + 3; + if(filename_start == std::string::npos) { + // presumably something like 0x100003b70 (in demo) or foo (in bar) + 14 + return; + } + const std::size_t filename_end = line.find(":", filename_start); + internal_verify( + filename_end != std::string::npos, + "Unexpected edge case while processing addr2line/atos output" + ); + entries_vec[entry_index].second.get().filename = line.substr(filename_start, filename_end - filename_start); + const std::size_t line_start = filename_end + 1; + const std::size_t line_end = line.find(")", filename_end); + internal_verify( + line_end == line.size() - 1, + "Unexpected edge case while processing addr2line/atos output" + ); + entries_vec[entry_index].second.get().line = std::stoi(line.substr(line_start, line_end - line_start)); + #endif } // NOLINTNEXTLINE(readability-convert-member-functions-to-static)