diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 04031bc..399e889 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,8 +23,8 @@ jobs: mkdir libdwarf cd libdwarf git init - git remote add origin https://github.com/davea42/libdwarf-code.git - git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7 + git remote add origin https://github.com/flagarde/libdwarf-code.git + git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032 git checkout FETCH_HEAD mkdir build cd build @@ -52,8 +52,8 @@ jobs: mkdir libdwarf cd libdwarf git init - git remote add origin https://github.com/davea42/libdwarf-code.git - git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7 + git remote add origin https://github.com/flagarde/libdwarf-code.git + git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032 git checkout FETCH_HEAD mkdir build cd build diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 624ec53..c7cb37d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,8 +26,8 @@ jobs: mkdir libdwarf cd libdwarf git init - git remote add origin https://github.com/davea42/libdwarf-code.git - git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7 + git remote add origin https://github.com/flagarde/libdwarf-code.git + git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032 git checkout FETCH_HEAD mkdir build cd build @@ -53,8 +53,8 @@ jobs: mkdir libdwarf cd libdwarf git init - git remote add origin https://github.com/davea42/libdwarf-code.git - git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7 + git remote add origin https://github.com/flagarde/libdwarf-code.git + git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032 git checkout FETCH_HEAD mkdir build cd build @@ -68,6 +68,17 @@ jobs: - name: build and test run: | python3 ci/test-all-configs.py --${{matrix.compiler}} + # - name: bundle artifacts + # if: always() + # run: | + # tar czfH bundle.tar.gz build + # - name: upload artifacts + # uses: actions/upload-artifact@v4 + # if: always() + # with: + # name: build-macos-${{matrix.compiler}}${{matrix.shared}} + # path: bundle.tar.gz + # retention-days: 2 test-windows: runs-on: windows-2022 strategy: diff --git a/CMakeLists.txt b/CMakeLists.txt index c42206f..eaa8341 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,12 +27,12 @@ if(PROJECT_IS_TOP_LEVEL) if(CMAKE_GENERATOR STREQUAL "Ninja") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always") - elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics") endif() if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always") - elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fcolor-diagnostics") endif() endif() @@ -358,10 +358,13 @@ if(CPPTRACE_GET_SYMBOLS_WITH_LIBDWARF) # GIT_REPOSITORY https://github.com/davea42/libdwarf-code.git # GIT_TAG 6216e185863f41d6f19ab850caabfff7326020d7 # v0.8.0 # GIT_TAG 8b0bd09d8c77d45a68cb1bb00a54186a92b683d9 # v0.9.0 + # GIT_TAG c0cfba34ec80996426b5be2523f6447a2c9b7b39 # v0.9.0 + mach-o changes # Using a lightweight mirror that's optimized for clone + configure speed - GIT_REPOSITORY https://github.com/jeremy-rifkin/libdwarf-lite.git - GIT_TAG c78e984f3abbd20f6e01d6f51819e826b1691f65 # v0.8.0 + # GIT_REPOSITORY https://github.com/jeremy-rifkin/libdwarf-lite.git + # GIT_TAG c78e984f3abbd20f6e01d6f51819e826b1691f65 # v0.8.0 # GIT_TAG 71090c680b4c943448ba87a0f1f864f174e4edda # v0.9.0 + GIT_REPOSITORY https://github.com/flagarde/libdwarf-code.git + GIT_TAG d1a559b7af0840194dfa51f7e3013e0f80614032 GIT_SHALLOW 1 ) FetchContent_MakeAvailable(libdwarf) @@ -533,20 +536,13 @@ macro(add_test_dependencies exec_name) if(CPPTRACE_BUILD_TEST_RDYNAMIC) set_property(TARGET ${exec_name} PROPERTY ENABLE_EXPORTS ON) endif() - if(APPLE) # TODO: Temporary - add_custom_command( - TARGET ${exec_name} - POST_BUILD - COMMAND dsymutil $ - ) - endif() endmacro() if(CPPTRACE_BUILD_TESTING) add_executable(test test/test.cpp) add_executable(demo test/demo.cpp) add_executable(c_demo test/ctrace_demo.cpp) - + add_test_dependencies(test) add_test_dependencies(demo) add_test_dependencies(c_demo) diff --git a/README.md b/README.md index 800469a..d454897 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ endif() Be sure to configure with `-DCMAKE_BUILD_TYPE=Debug` or `-DDCMAKE_BUILD_TYPE=RelWithDebInfo` for symbols and line information. -On macos a little extra work to generate a .dSYM file is required, see [Platform Logistics](#platform-logistics) below. +On macOS it is recommended to generate a .dSYM file, see [Platform Logistics](#platform-logistics) below. For other ways to use the library, such as through package managers, a system-wide installation, or on a platform without internet access see [Usage](#usage) below. @@ -140,9 +140,6 @@ method to get lightweight raw traces, which are just vectors of program counters **Note:** Debug info (`-g`/`/Z7`/`/Zi`/`/DEBUG`) is generally required for good trace information. -**Note:** Currently on Mac .dSYM files are required, which can be generated with `dsymutil yourbinary`. A cmake snippet -for generating these is provided in [Platform Logistics](#platform-logistics) below. - All functions are thread-safe unless otherwise noted. ### Stack Traces @@ -506,7 +503,7 @@ namespace cpptrace { | DWARF in separate binary (binary gnu debug link) | ️️✔️ | | DWARF in separate binary (split dwarf) | ✔️ | | DWARF in dSYM | ✔️ | -| DWARF in via Mach-O debug map | Soon | +| DWARF in via Mach-O debug map | ✔️ | | Windows debug symbols in PDB | ✔️ | DWARF5 added DWARF package files. As far as I can tell no compiler implements these yet. @@ -719,7 +716,7 @@ if(WIN32) endif() ``` -Generating a .dSYM file on macos: +On macOS it's recommended to generate a dSYM file containing debug information for your program: In xcode cmake this can be done with @@ -872,7 +869,6 @@ and time-memory tradeoffs. If you find the current implementation is either slow to explore some of these options. A couple things I'd like to improve in the future: -- On MacOS .dSYM files are required - On Windows when collecting symbols with dbghelp (msvc/clang) parameter types are almost perfect but due to limitations in dbghelp the library cannot accurately show const and volatile qualifiers or rvalue references (these appear as pointers). diff --git a/ci/test-all-configs.py b/ci/test-all-configs.py index abdd7c0..76a5bca 100644 --- a/ci/test-all-configs.py +++ b/ci/test-all-configs.py @@ -30,7 +30,7 @@ def similarity(name: str, target: List[str]) -> int: return -1 return c -def output_matches(output: str, params: Tuple[str]): +def output_matches(raw_output: str, params: Tuple[str]): target = [] if params[0].startswith("gcc") or params[0].startswith("g++"): @@ -72,31 +72,41 @@ def output_matches(output: str, params: Tuple[str]): print(f"Reading from {file}") with open(os.path.join(expected_dir, file), "r") as f: - expected = f.read() + raw_expected = f.read() - if output.strip() == "": + if raw_output.strip() == "": print(f"Error: No output from test") return False - expected = [line.strip().split("||") for line in expected.split("\n")] - output = [line.strip().split("||") for line in output.split("\n")] + expected = [line.strip().split("||") for line in raw_expected.split("\n")] + output = [line.strip().split("||") for line in raw_output.split("\n")] max_line_diff = 0 errored = False - for i, ((output_file, output_line, output_symbol), (expected_file, expected_line, expected_symbol)) in enumerate(zip(output, expected)): - if output_file != expected_file: - print(f"Error: File name mismatch on line {i + 1}, found \"{output_file}\" expected \"{expected_file}\"") - errored = True - if abs(int(output_line) - int(expected_line)) > max_line_diff: - print(f"Error: File line mismatch on line {i + 1}, found {output_line} expected {expected_line}") - errored = True - if output_symbol != expected_symbol: - print(f"Error: File symbol mismatch on line {i + 1}, found \"{output_symbol}\" expected \"{expected_symbol}\"") - errored = True - if expected_symbol == "main" or expected_symbol == "main()": - break + try: + for i, ((output_file, output_line, output_symbol), (expected_file, expected_line, expected_symbol)) in enumerate(zip(output, expected)): + if output_file != expected_file: + print(f"Error: File name mismatch on line {i + 1}, found \"{output_file}\" expected \"{expected_file}\"") + errored = True + if abs(int(output_line) - int(expected_line)) > max_line_diff: + print(f"Error: File line mismatch on line {i + 1}, found {output_line} expected {expected_line}") + errored = True + if output_symbol != expected_symbol: + print(f"Error: File symbol mismatch on line {i + 1}, found \"{output_symbol}\" expected \"{expected_symbol}\"") + errored = True + if expected_symbol == "main" or expected_symbol == "main()": + break + except ValueError: + print("ValueError during output checking") + errored = True + + if errored: + print("Output:") + print(raw_output) + print("Expected:") + print(raw_expected) return not errored @@ -126,7 +136,7 @@ def run_test(test_binary, params: Tuple[str]): print(Style.RESET_ALL, end="") # makefile in parallel sometimes messes up colors if test.returncode != 0: - print("[🔴 Test command failed]") + print(f"[🔴 Test command failed with code {test.returncode}]") print("stderr:") print(test_stderr.decode("utf-8"), end="") print("stdout:") diff --git a/src/binary/mach-o.hpp b/src/binary/mach-o.hpp index 23153f5..02fd776 100644 --- a/src/binary/mach-o.hpp +++ b/src/binary/mach-o.hpp @@ -12,7 +12,13 @@ #include #include +#include #include +#include +#include + +#include +#include #include #include @@ -23,7 +29,7 @@ namespace cpptrace { namespace detail { - static bool is_mach_o(std::uint32_t magic) { + inline bool is_mach_o(std::uint32_t magic) { switch(magic) { case FAT_MAGIC: case FAT_CIGAM: @@ -37,36 +43,57 @@ namespace detail { } } - static bool is_fat_magic(std::uint32_t magic) { + inline bool file_is_mach_o(const std::string& object_path) noexcept { + try { + FILE* file = std::fopen(object_path.c_str(), "rb"); + if(file == nullptr) { + return false; + } + auto magic = load_bytes(file, 0); + return is_mach_o(magic); + } catch(...) { + return false; + } + } + + inline bool is_fat_magic(std::uint32_t magic) { return magic == FAT_MAGIC || magic == FAT_CIGAM; } // Based on https://github.com/AlexDenisov/segment_dumper/blob/master/main.c // and https://lowlevelbits.org/parsing-mach-o-files/ - static bool is_magic_64(std::uint32_t magic) { + inline bool is_magic_64(std::uint32_t magic) { return magic == MH_MAGIC_64 || magic == MH_CIGAM_64; } - static bool should_swap_bytes(std::uint32_t magic) { + inline bool should_swap_bytes(std::uint32_t magic) { return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM; } - static void swap_mach_header(mach_header_64& header) { + inline void swap_mach_header(mach_header_64& header) { swap_mach_header_64(&header, NX_UnknownByteOrder); } - static void swap_mach_header(mach_header& header) { + inline void swap_mach_header(mach_header& header) { swap_mach_header(&header, NX_UnknownByteOrder); } - static void swap_segment_command(segment_command_64& segment) { + inline void swap_segment_command(segment_command_64& segment) { swap_segment_command_64(&segment, NX_UnknownByteOrder); } - static void swap_segment_command(segment_command& segment) { + inline void swap_segment_command(segment_command& segment) { swap_segment_command(&segment, NX_UnknownByteOrder); } + inline void swap_nlist(struct nlist& entry) { + swap_nlist(&entry, 1, NX_UnknownByteOrder); + } + + inline void swap_nlist(struct nlist_64& entry) { + swap_nlist_64(&entry, 1, NX_UnknownByteOrder); + } + #ifdef __LP64__ #define LP(x) x##_64 #else @@ -89,12 +116,28 @@ namespace detail { std::uint32_t n_load_commands; std::uint32_t sizeof_load_commands; std::uint32_t flags; + std::size_t bits = 0; // 32 or 64 once load_mach is called std::size_t load_base = 0; std::size_t fat_index = std::numeric_limits::max(); std::vector load_commands; + struct symtab_info_data { + symtab_command symtab; + std::unique_ptr stringtab; + const char* get_string(std::size_t index) const { + if(stringtab && index < symtab.strsize) { + return stringtab.get() + index; + } else { + throw std::runtime_error("can't retrieve symbol from symtab"); + } + } + }; + + bool tried_to_load_symtab = false; + optional symtab_info; + public: mach_o(const std::string& object_path) : object_path(object_path) { file = std::fopen(object_path.c_str(), "rb"); @@ -117,7 +160,7 @@ namespace detail { ~mach_o() { if(file) { - fclose(file); + std::fclose(file); } } @@ -163,12 +206,195 @@ namespace detail { } } + optional& get_symtab_info() { + if(!symtab_info.has_value() && !tried_to_load_symtab) { + // don't try to load the symtab again if for some reason loading here fails + tried_to_load_symtab = true; + for(const auto& command : load_commands) { + if(command.cmd == LC_SYMTAB) { + symtab_info_data info; + info.symtab = load_symbol_table_command(command.file_offset); + info.stringtab = load_string_table(info.symtab.stroff, info.symtab.strsize); + symtab_info = std::move(info); + break; + } + } + } + return symtab_info; + } + + void print_symbol_table_entry( + const nlist_64& entry, + const std::unique_ptr& stringtab, + std::size_t stringsize, + std::size_t j + ) const { + const char* type = ""; + if(entry.n_type & N_STAB) { + switch(entry.n_type) { + case N_SO: type = "N_SO"; break; + case N_OSO: type = "N_OSO"; break; + case N_BNSYM: type = "N_BNSYM"; break; + case N_ENSYM: type = "N_ENSYM"; break; + case N_FUN: type = "N_FUN"; break; + } + } else if((entry.n_type & N_TYPE) == N_SECT) { + type = "N_SECT"; + } + fprintf( + stderr, + "%5llu %8llx %2llx %7s %2llu %4llx %16llx %s\n", + to_ull(j), + to_ull(entry.n_un.n_strx), + to_ull(entry.n_type), + type, + to_ull(entry.n_sect), + to_ull(entry.n_desc), + to_ull(entry.n_value), + stringtab == nullptr + ? "Stringtab error" + : entry.n_un.n_strx < stringsize + ? stringtab.get() + entry.n_un.n_strx + : "String index out of bounds" + ); + } + + void print_symbol_table() { + int i = 0; + for(const auto& command : load_commands) { + if(command.cmd == LC_SYMTAB) { + auto symtab = load_symbol_table_command(command.file_offset); + fprintf(stderr, "Load command %d\n", i); + fprintf(stderr, " cmd %llu\n", to_ull(symtab.cmd)); + fprintf(stderr, " cmdsize %llu\n", to_ull(symtab.cmdsize)); + fprintf(stderr, " symoff 0x%llu\n", to_ull(symtab.symoff)); + fprintf(stderr, " nsyms %llu\n", to_ull(symtab.nsyms)); + fprintf(stderr, " stroff 0x%llu\n", to_ull(symtab.stroff)); + fprintf(stderr, " strsize %llu\n", to_ull(symtab.strsize)); + auto stringtab = load_string_table(symtab.stroff, symtab.strsize); + for(std::size_t j = 0; j < symtab.nsyms; j++) { + nlist_64 entry = bits == 32 + ? load_symtab_entry<32>(symtab.symoff, j) + : load_symtab_entry<64>(symtab.symoff, j); + print_symbol_table_entry(entry, stringtab, symtab.strsize, j); + } + } + i++; + } + } + + struct debug_map_entry { + uint64_t source_address; + uint64_t size; + std::string name; + }; + + struct symbol_entry { + uint64_t address; + std::string name; + }; + + // map from object file to a vector of symbols to resolve + using debug_map = std::unordered_map>; + + // produce information similar to dsymutil -dump-debug-map + debug_map get_debug_map() { + // we have a bunch of symbols in our binary we need to pair up with symbols from various .o files + // first collect symbols and the objects they come from + debug_map debug_map; + const auto& symtab_info = get_symtab_info().unwrap(); + const auto& symtab = symtab_info.symtab; + // TODO: Take timestamp into account? + std::string current_module; + optional current_function; + for(std::size_t j = 0; j < symtab.nsyms; j++) { + nlist_64 entry = bits == 32 + ? load_symtab_entry<32>(symtab.symoff, j) + : load_symtab_entry<64>(symtab.symoff, j); + // entry.n_type & N_STAB indicates symbolic debug info + if(!(entry.n_type & N_STAB)) { + continue; + } + switch(entry.n_type) { + case N_SO: + // pass - these encode path and filename for the module, if applicable + break; + case N_OSO: + // sets the module + current_module = symtab_info.get_string(entry.n_un.n_strx); + break; + case N_BNSYM: break; // pass + case N_ENSYM: break; // pass + case N_FUN: + { + const char* str = symtab_info.get_string(entry.n_un.n_strx); + if(str[0] == 0) { + // end of function scope + if(!current_function) { /**/ } + current_function.unwrap().size = entry.n_value; + debug_map[current_module].push_back(std::move(current_function).unwrap()); + } else { + current_function = debug_map_entry{}; + current_function.unwrap().source_address = entry.n_value; + current_function.unwrap().name = str; + } + } + break; + } + } + return debug_map; + } + + std::vector symbol_table() { + // we have a bunch of symbols in our binary we need to pair up with symbols from various .o files + // first collect symbols and the objects they come from + std::vector symbols; + const auto& symtab_info = get_symtab_info().unwrap(); + const auto& symtab = symtab_info.symtab; + // TODO: Take timestamp into account? + for(std::size_t j = 0; j < symtab.nsyms; j++) { + nlist_64 entry = bits == 32 + ? load_symtab_entry<32>(symtab.symoff, j) + : load_symtab_entry<64>(symtab.symoff, j); + if(entry.n_type & N_STAB) { + continue; + } + if((entry.n_type & N_TYPE) == N_SECT) { + symbols.push_back({ + entry.n_value, + symtab_info.get_string(entry.n_un.n_strx) + }); + } + } + return symbols; + } + + // produce information similar to dsymutil -dump-debug-map + static void print_debug_map(const debug_map& debug_map) { + for(const auto& entry : debug_map) { + std::cout< void load_mach( bool allow_arch_mismatch ) { static_assert(Bits == 32 || Bits == 64, "Unexpected Bits argument"); + bits = Bits; using Mach_Header = typename std::conditional::type; std::size_t header_size = sizeof(Mach_Header); Mach_Header header = load_bytes(file, load_base); @@ -268,6 +494,41 @@ namespace detail { return common; } + symtab_command load_symbol_table_command(std::uint32_t offset) const { + symtab_command symtab = load_bytes(file, offset); + ASSERT(symtab.cmd == LC_SYMTAB); + if(should_swap()) { + swap_symtab_command(&symtab, NX_UnknownByteOrder); + } + return symtab; + } + + template + nlist_64 load_symtab_entry(std::uint32_t symbol_base, std::size_t index) const { + using Nlist = typename std::conditional::type; + uint32_t offset = load_base + symbol_base + index * sizeof(Nlist); + Nlist entry = load_bytes(file, offset); + if(should_swap()) { + swap_nlist(entry); + } + // fields match just u64 instead of u32 + nlist_64 common; + common.n_un.n_strx = entry.n_un.n_strx; + common.n_type = entry.n_type; + common.n_sect = entry.n_sect; + common.n_desc = entry.n_desc; + common.n_value = entry.n_value; + return common; + } + + std::unique_ptr load_string_table(std::uint32_t offset, std::uint32_t byte_count) const { + std::unique_ptr buffer(new char[byte_count + 1]); + VERIFY(std::fseek(file, load_base + offset, SEEK_SET) == 0, "fseek error"); + VERIFY(std::fread(buffer.get(), sizeof(char), byte_count, file) == byte_count, "fread error"); + buffer[byte_count] = 0; // just out of an abundance of caution + return buffer; + } + bool should_swap() const { return should_swap_bytes(magic); } diff --git a/src/symbols/symbols_with_libdwarf.cpp b/src/symbols/symbols_with_libdwarf.cpp index a260cc4..5e8e9ad 100644 --- a/src/symbols/symbols_with_libdwarf.cpp +++ b/src/symbols/symbols_with_libdwarf.cpp @@ -20,6 +20,9 @@ #include #include +#include +#include + // It's been tricky to piece together how to handle all this dwarf stuff. Some resources I've used are // https://www.prevanders.net/libdwarf.pdf // https://github.com/davea42/libdwarf-addr2line @@ -74,7 +77,14 @@ namespace libdwarf { std::vector line_entries; }; - struct dwarf_resolver { + class symbol_resolver { + public: + virtual ~symbol_resolver() = default; + CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING + virtual frame_with_inlines resolve_frame(const object_frame& frame_info) = 0; + }; + + class dwarf_resolver : public symbol_resolver { std::string object_path; Dwarf_Debug dbg = nullptr; bool ok = false; @@ -91,6 +101,7 @@ namespace libdwarf { // Map from CU -> {srcfiles, count} std::unordered_map> srcfiles_cache; + private: // Error handling helper // For some reason R (*f)(Args..., void*)-style deduction isn't possible, seems like a bug in all compilers // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56190 @@ -116,14 +127,25 @@ namespace libdwarf { return ret; } + public: CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING dwarf_resolver(const std::string& _object_path) { object_path = _object_path; + // use a buffer when invoking dwarf_init_path, which allows it to automatically find debuglink or dSYM + // sources + bool use_buffer = true; // for universal / fat mach-o files unsigned universal_number = 0; #if IS_APPLE if(directory_exists(object_path + ".dSYM")) { - object_path += ".dSYM/Contents/Resources/DWARF/" + basename(object_path); + // Possibly depends on the build system but a obj.cpp.o.dSYM/Contents/Resources/DWARF/obj.cpp.o can be + // created alongside .o files. These are text files containing directives, as opposed to something we + // can actually use + std::string dsym_resource = object_path + ".dSYM/Contents/Resources/DWARF/" + basename(object_path); + if(file_is_mach_o(dsym_resource)) { + object_path = std::move(dsym_resource); + } + use_buffer = false; // we resolved dSYM above as appropriate } if(macho_is_fat(object_path)) { universal_number = mach_o(object_path).get_fat_index(); @@ -132,7 +154,10 @@ namespace libdwarf { // Giving libdwarf a buffer for a true output path is needed for its automatic resolution of debuglink and // dSYM files. We don't utilize the dSYM logic here, we just care about debuglink. - std::unique_ptr buffer(new char[CPPTRACE_MAX_PATH]); + std::unique_ptr buffer; + if(use_buffer) { + buffer = std::unique_ptr(new char[CPPTRACE_MAX_PATH]); + } auto ret = wrap( dwarf_init_path_a, object_path.c_str(), @@ -214,6 +239,7 @@ namespace libdwarf { return *this; } + private: // walk all CU's in a dbg, callback is called on each die and should return true to // continue traversal void walk_compilation_units(const std::function& fn) { @@ -521,19 +547,14 @@ namespace libdwarf { it = subprograms_cache.find(off); } auto& vec = it->second; - auto vec_it = std::lower_bound( + auto vec_it = first_less_than_or_equal( vec.begin(), vec.end(), pc, - [] (const subprogram_entry& entry, Dwarf_Addr pc) { - return entry.low < pc; + [] (Dwarf_Addr pc, const subprogram_entry& entry) { + return pc < entry.low; } ); - // vec_it is first >= pc - // we want first <= pc - if(vec_it != vec.begin()) { - vec_it--; - } // If the vector has been empty this can happen if(vec_it != vec.end()) { //vec_it->die.print(); @@ -648,19 +669,14 @@ namespace libdwarf { if(get_cache_mode() == cache_mode::prioritize_speed) { // Lookup in the table auto& line_entries = table_info.line_entries; - auto table_it = std::lower_bound( + auto table_it = first_less_than_or_equal( line_entries.begin(), line_entries.end(), pc, - [] (const line_entry& entry, Dwarf_Addr pc) { - return entry.low < pc; + [] (Dwarf_Addr pc, const line_entry& entry) { + return pc < entry.low; } ); - // vec_it is first >= pc - // we want first <= pc - if(table_it != line_entries.begin()) { - table_it--; - } // If the vector has been empty this can happen if(table_it != line_entries.end()) { Dwarf_Line line = table_it->line; @@ -824,19 +840,14 @@ namespace libdwarf { } else { lazy_generate_cu_cache(); // look up the cu - auto vec_it = std::lower_bound( + auto vec_it = first_less_than_or_equal( cu_cache.begin(), cu_cache.end(), pc, - [] (const cu_entry& entry, Dwarf_Addr pc) { - return entry.low < pc; + [] (Dwarf_Addr pc, const cu_entry& entry) { + return pc < entry.low; } ); - // vec_it is first >= pc - // we want first <= pc - if(vec_it != cu_cache.begin()) { - vec_it--; - } // If the vector has been empty this can happen if(vec_it != cu_cache.end()) { //vec_it->die.print(); @@ -850,8 +861,22 @@ namespace libdwarf { } } + public: CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING - frame_with_inlines resolve_frame(const object_frame& frame_info) { + frame_with_inlines resolve_frame(const object_frame& frame_info) override { + if(!ok) { + return { + { + frame_info.raw_address, + nullable::null(), + nullable::null(), + frame_info.object_path, + "", + false + }, + {} + }; + } stacktrace_frame frame = null_frame; frame.filename = frame_info.object_path; frame.address = frame_info.raw_address; @@ -873,55 +898,249 @@ namespace libdwarf { } }; + class null_resolver : public symbol_resolver { + public: + null_resolver() = default; + null_resolver(const std::string&) {} + + CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING + frame_with_inlines resolve_frame(const object_frame& frame_info) override { + return { + { + frame_info.raw_address, + nullable::null(), + nullable::null(), + frame_info.object_path, + "", + false + }, + {} + }; + }; + }; + + #if IS_APPLE + struct target_object { + std::string object_path; + bool path_ok = true; + optional> symbols; + std::unique_ptr resolver; + + target_object(std::string object_path) : object_path(object_path) {} + + std::unique_ptr& get_resolver() { + if(!resolver) { + // this seems silly but it's an attempt to not repeatedly try to initialize new dwarf_resolvers if + // exceptions are thrown, e.g. if the path doesn't exist + resolver = std::unique_ptr(new null_resolver); + resolver = std::unique_ptr(new dwarf_resolver(object_path)); + } + return resolver; + } + + std::unordered_map& get_symbols() { + if(!symbols) { + // this is an attempt to not repeatedly try to reprocess mach-o files if exceptions are thrown, e.g. if + // the path doesn't exist + std::unordered_map symbols; + this->symbols = symbols; + auto symbol_table = mach_o(object_path).symbol_table(); + for(const auto& symbol : symbol_table) { + symbols[symbol.name] = symbol.address; + } + this->symbols = std::move(symbols); + } + return symbols.unwrap(); + } + + CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING + frame_with_inlines resolve_frame( + const object_frame& frame_info, + const std::string& symbol_name, + std::size_t offset + ) { + const auto& symbol_table = get_symbols(); + auto it = symbol_table.find(symbol_name); + if(it != symbol_table.end()) { + auto frame = frame_info; + frame.object_address = it->second + offset; + return get_resolver()->resolve_frame(frame); + } else { + return { + { + frame_info.raw_address, + nullable::null(), + nullable::null(), + frame_info.object_path, + symbol_name, + false + }, + {} + }; + } + } + }; + + struct debug_map_symbol_info { + uint64_t source_address; + uint64_t size; + std::string name; + nullable target_address; // T(-1) is used as a sentinel + std::size_t object_index; + }; + + class debug_map_resolver : public symbol_resolver { + std::vector target_objects; + std::vector symbols; + public: + debug_map_resolver(const std::string& source_object_path) { + // load mach-o + // TODO: Cache somehow? + mach_o source_mach(source_object_path); + auto source_debug_map = source_mach.get_debug_map(); + // get symbol entries from debug map, as well as the various object files used to make this binary + for(auto& entry : source_debug_map) { + // object it came from + target_objects.push_back({std::move(entry.first)}); + // push the symbols + auto& map_entry_symbols = entry.second; + symbols.reserve(symbols.size() + map_entry_symbols.size()); + for(auto& symbol : map_entry_symbols) { + symbols.push_back({ + symbol.source_address, + symbol.size, + std::move(symbol.name), + nullable::null(), + target_objects.size() - 1 + }); + } + } + // sort for binary lookup later + std::sort( + symbols.begin(), + symbols.end(), + [] ( + const debug_map_symbol_info& a, + const debug_map_symbol_info& b + ) { + return a.source_address < b.source_address; + } + ); + } + CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING + frame_with_inlines resolve_frame(const object_frame& frame_info) override { + // resolve object frame: + // find the symbol in this executable corresponding to the object address + // resolve the symbol in the object it came from, based on the symbol name + auto closest_symbol_it = first_less_than_or_equal( + symbols.begin(), + symbols.end(), + frame_info.object_address, + [] ( + Dwarf_Addr pc, + const debug_map_symbol_info& symbol + ) { + return pc < symbol.source_address; + } + ); + if(closest_symbol_it != symbols.end()) { + if(frame_info.object_address <= closest_symbol_it->source_address + closest_symbol_it->size) { + return target_objects[closest_symbol_it->object_index].resolve_frame( + { + frame_info.raw_address, + // the resolver doesn't care about the object address here, only the offset from the start + // of the symbol and it'll lookup the symbol's base-address + 0, + frame_info.object_path + }, + closest_symbol_it->name, + frame_info.object_address - closest_symbol_it->source_address + ); + } + } + // There was either no closest symbol or the closest symbol didn't end up containing the address we're + // looking for, so just return a blank frame + return { + { + frame_info.raw_address, + nullable::null(), + nullable::null(), + frame_info.object_path, + "", + false + }, + {} + }; + }; + }; + #endif + + std::unique_ptr get_resolver_for_object(const std::string& object_path) { + #if IS_APPLE + // Check if dSYM exist, if not fallback to debug map + if(!directory_exists(object_path + ".dSYM")) { + return std::unique_ptr(new debug_map_resolver(object_path)); + } + #endif + return std::unique_ptr(new dwarf_resolver(object_path)); + } + CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING std::vector resolve_frames(const std::vector& frames) { std::vector trace(frames.size(), {null_frame, {}}); static std::mutex mutex; // cache resolvers since objects are likely to be traced more than once - static std::unordered_map resolver_map; + static std::unordered_map> resolver_map; // Locking around all libdwarf interaction per https://github.com/davea42/libdwarf-code/discussions/184 + // And also interactions with the above static map const std::lock_guard lock(mutex); for(const auto& object_entry : collate_frames(frames, trace)) { try { const auto& object_name = object_entry.first; - optional resolver_object = nullopt; - dwarf_resolver* resolver = nullptr; + std::unique_ptr resolver_object; + symbol_resolver* resolver = nullptr; auto it = resolver_map.find(object_name); if(it != resolver_map.end()) { - resolver = &it->second; + resolver = it->second.get(); } else { - resolver_object = dwarf_resolver(object_name); - resolver = &resolver_object.unwrap(); + resolver_object = get_resolver_for_object(object_name); + resolver = resolver_object.get(); } // If there's no debug information it'll mark itself as not ok - if(resolver->ok) { - for(const auto& entry : object_entry.second) { - try { - const auto& dlframe = entry.first.get(); - auto& frame = entry.second.get(); - frame = resolver->resolve_frame(dlframe); - } catch(...) { - if(!should_absorb_trace_exceptions()) { - throw; - } - } - } - } else { - // at least copy the addresses - for(const auto& entry : object_entry.second) { + for(const auto& entry : object_entry.second) { + try { const auto& dlframe = entry.first.get(); auto& frame = entry.second.get(); - frame.frame.address = dlframe.raw_address; + frame = resolver->resolve_frame(dlframe); + } catch(...) { + if(!should_absorb_trace_exceptions()) { + throw; + } } } - if(resolver_object.has_value() && get_cache_mode() == cache_mode::prioritize_speed) { + if(resolver_object && get_cache_mode() == cache_mode::prioritize_speed) { // .emplace needed, for some reason .insert tries to copy <= gcc 7.2 - resolver_map.emplace(object_name, std::move(resolver_object).unwrap()); + resolver_map.emplace(object_name, std::move(resolver_object)); } } catch(...) { // NOSONAR if(!should_absorb_trace_exceptions()) { throw; } + for(const auto& entry : object_entry.second) { + const auto& dlframe = entry.first.get(); + auto& frame = entry.second.get(); + frame = { + { + dlframe.raw_address, + nullable::null(), + nullable::null(), + dlframe.object_path, + "", + false + }, + {} + }; + } } } // flatten trace with inlines diff --git a/src/utils/utils.hpp b/src/utils/utils.hpp index ea0c53f..c51a2a2 100644 --- a/src/utils/utils.hpp +++ b/src/utils/utils.hpp @@ -1,6 +1,7 @@ #ifndef UTILS_HPP #define UTILS_HPP +#include #include #include #include @@ -72,6 +73,28 @@ namespace detail { return str; } + // first value in a sorted range such that *it <= value + template + ForwardIt first_less_than_or_equal(ForwardIt begin, ForwardIt end, const T& value) { + auto it = std::upper_bound(begin, end, value); + // it is first > value, we want first <= value + if(it != begin) { + return --it; + } + return end; + } + + // first value in a sorted range such that *it <= value + template + ForwardIt first_less_than_or_equal(ForwardIt begin, ForwardIt end, const T& value, Compare compare) { + auto it = std::upper_bound(begin, end, value, compare); + // it is first > value, we want first <= value + if(it != begin) { + return --it; + } + return end; + } + constexpr const char* const whitespace = " \t\n\r\f\v"; inline std::string trim(const std::string& str) {