Parsing of mach-o symbol tables, generation of debug maps, and resolution through object files (#82)

This commit is contained in:
Jeremy Rifkin 2024-01-29 22:12:59 -06:00 committed by GitHub
parent ea30c99f35
commit 79931c8823
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 622 additions and 106 deletions

View File

@ -23,8 +23,8 @@ jobs:
mkdir libdwarf mkdir libdwarf
cd libdwarf cd libdwarf
git init git init
git remote add origin https://github.com/davea42/libdwarf-code.git git remote add origin https://github.com/flagarde/libdwarf-code.git
git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7 git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032
git checkout FETCH_HEAD git checkout FETCH_HEAD
mkdir build mkdir build
cd build cd build
@ -52,8 +52,8 @@ jobs:
mkdir libdwarf mkdir libdwarf
cd libdwarf cd libdwarf
git init git init
git remote add origin https://github.com/davea42/libdwarf-code.git git remote add origin https://github.com/flagarde/libdwarf-code.git
git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7 git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032
git checkout FETCH_HEAD git checkout FETCH_HEAD
mkdir build mkdir build
cd build cd build

View File

@ -26,8 +26,8 @@ jobs:
mkdir libdwarf mkdir libdwarf
cd libdwarf cd libdwarf
git init git init
git remote add origin https://github.com/davea42/libdwarf-code.git git remote add origin https://github.com/flagarde/libdwarf-code.git
git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7 git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032
git checkout FETCH_HEAD git checkout FETCH_HEAD
mkdir build mkdir build
cd build cd build
@ -53,8 +53,8 @@ jobs:
mkdir libdwarf mkdir libdwarf
cd libdwarf cd libdwarf
git init git init
git remote add origin https://github.com/davea42/libdwarf-code.git git remote add origin https://github.com/flagarde/libdwarf-code.git
git fetch --depth 1 origin 6216e185863f41d6f19ab850caabfff7326020d7 git fetch --depth 1 origin d1a559b7af0840194dfa51f7e3013e0f80614032
git checkout FETCH_HEAD git checkout FETCH_HEAD
mkdir build mkdir build
cd build cd build
@ -68,6 +68,17 @@ jobs:
- name: build and test - name: build and test
run: | run: |
python3 ci/test-all-configs.py --${{matrix.compiler}} python3 ci/test-all-configs.py --${{matrix.compiler}}
# - name: bundle artifacts
# if: always()
# run: |
# tar czfH bundle.tar.gz build
# - name: upload artifacts
# uses: actions/upload-artifact@v4
# if: always()
# with:
# name: build-macos-${{matrix.compiler}}${{matrix.shared}}
# path: bundle.tar.gz
# retention-days: 2
test-windows: test-windows:
runs-on: windows-2022 runs-on: windows-2022
strategy: strategy:

View File

@ -27,12 +27,12 @@ if(PROJECT_IS_TOP_LEVEL)
if(CMAKE_GENERATOR STREQUAL "Ninja") if(CMAKE_GENERATOR STREQUAL "Ninja")
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
endif() endif()
if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fcolor-diagnostics") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fcolor-diagnostics")
endif() endif()
endif() endif()
@ -358,10 +358,13 @@ if(CPPTRACE_GET_SYMBOLS_WITH_LIBDWARF)
# GIT_REPOSITORY https://github.com/davea42/libdwarf-code.git # GIT_REPOSITORY https://github.com/davea42/libdwarf-code.git
# GIT_TAG 6216e185863f41d6f19ab850caabfff7326020d7 # v0.8.0 # GIT_TAG 6216e185863f41d6f19ab850caabfff7326020d7 # v0.8.0
# GIT_TAG 8b0bd09d8c77d45a68cb1bb00a54186a92b683d9 # v0.9.0 # GIT_TAG 8b0bd09d8c77d45a68cb1bb00a54186a92b683d9 # v0.9.0
# GIT_TAG c0cfba34ec80996426b5be2523f6447a2c9b7b39 # v0.9.0 + mach-o changes
# Using a lightweight mirror that's optimized for clone + configure speed # Using a lightweight mirror that's optimized for clone + configure speed
GIT_REPOSITORY https://github.com/jeremy-rifkin/libdwarf-lite.git # GIT_REPOSITORY https://github.com/jeremy-rifkin/libdwarf-lite.git
GIT_TAG c78e984f3abbd20f6e01d6f51819e826b1691f65 # v0.8.0 # GIT_TAG c78e984f3abbd20f6e01d6f51819e826b1691f65 # v0.8.0
# GIT_TAG 71090c680b4c943448ba87a0f1f864f174e4edda # v0.9.0 # GIT_TAG 71090c680b4c943448ba87a0f1f864f174e4edda # v0.9.0
GIT_REPOSITORY https://github.com/flagarde/libdwarf-code.git
GIT_TAG d1a559b7af0840194dfa51f7e3013e0f80614032
GIT_SHALLOW 1 GIT_SHALLOW 1
) )
FetchContent_MakeAvailable(libdwarf) FetchContent_MakeAvailable(libdwarf)
@ -533,13 +536,6 @@ macro(add_test_dependencies exec_name)
if(CPPTRACE_BUILD_TEST_RDYNAMIC) if(CPPTRACE_BUILD_TEST_RDYNAMIC)
set_property(TARGET ${exec_name} PROPERTY ENABLE_EXPORTS ON) set_property(TARGET ${exec_name} PROPERTY ENABLE_EXPORTS ON)
endif() endif()
if(APPLE) # TODO: Temporary
add_custom_command(
TARGET ${exec_name}
POST_BUILD
COMMAND dsymutil $<TARGET_FILE:${exec_name}>
)
endif()
endmacro() endmacro()
if(CPPTRACE_BUILD_TESTING) if(CPPTRACE_BUILD_TESTING)

View File

@ -115,7 +115,7 @@ endif()
Be sure to configure with `-DCMAKE_BUILD_TYPE=Debug` or `-DDCMAKE_BUILD_TYPE=RelWithDebInfo` for symbols and line Be sure to configure with `-DCMAKE_BUILD_TYPE=Debug` or `-DDCMAKE_BUILD_TYPE=RelWithDebInfo` for symbols and line
information. information.
On macos a little extra work to generate a .dSYM file is required, see [Platform Logistics](#platform-logistics) below. On macOS it is recommended to generate a .dSYM file, see [Platform Logistics](#platform-logistics) below.
For other ways to use the library, such as through package managers, a system-wide installation, or on a platform For other ways to use the library, such as through package managers, a system-wide installation, or on a platform
without internet access see [Usage](#usage) below. without internet access see [Usage](#usage) below.
@ -140,9 +140,6 @@ method to get lightweight raw traces, which are just vectors of program counters
**Note:** Debug info (`-g`/`/Z7`/`/Zi`/`/DEBUG`) is generally required for good trace information. **Note:** Debug info (`-g`/`/Z7`/`/Zi`/`/DEBUG`) is generally required for good trace information.
**Note:** Currently on Mac .dSYM files are required, which can be generated with `dsymutil yourbinary`. A cmake snippet
for generating these is provided in [Platform Logistics](#platform-logistics) below.
All functions are thread-safe unless otherwise noted. All functions are thread-safe unless otherwise noted.
### Stack Traces ### Stack Traces
@ -506,7 +503,7 @@ namespace cpptrace {
| DWARF in separate binary (binary gnu debug link) | ️️✔️ | | DWARF in separate binary (binary gnu debug link) | ️️✔️ |
| DWARF in separate binary (split dwarf) | ✔️ | | DWARF in separate binary (split dwarf) | ✔️ |
| DWARF in dSYM | ✔️ | | DWARF in dSYM | ✔️ |
| DWARF in via Mach-O debug map | Soon | | DWARF in via Mach-O debug map | ✔️ |
| Windows debug symbols in PDB | ✔️ | | Windows debug symbols in PDB | ✔️ |
DWARF5 added DWARF package files. As far as I can tell no compiler implements these yet. DWARF5 added DWARF package files. As far as I can tell no compiler implements these yet.
@ -719,7 +716,7 @@ if(WIN32)
endif() endif()
``` ```
Generating a .dSYM file on macos: On macOS it's recommended to generate a dSYM file containing debug information for your program:
In xcode cmake this can be done with In xcode cmake this can be done with
@ -872,7 +869,6 @@ and time-memory tradeoffs. If you find the current implementation is either slow
to explore some of these options. to explore some of these options.
A couple things I'd like to improve in the future: A couple things I'd like to improve in the future:
- On MacOS .dSYM files are required
- On Windows when collecting symbols with dbghelp (msvc/clang) parameter types are almost perfect but due to limitations - On Windows when collecting symbols with dbghelp (msvc/clang) parameter types are almost perfect but due to limitations
in dbghelp the library cannot accurately show const and volatile qualifiers or rvalue references (these appear as in dbghelp the library cannot accurately show const and volatile qualifiers or rvalue references (these appear as
pointers). pointers).

View File

@ -30,7 +30,7 @@ def similarity(name: str, target: List[str]) -> int:
return -1 return -1
return c return c
def output_matches(output: str, params: Tuple[str]): def output_matches(raw_output: str, params: Tuple[str]):
target = [] target = []
if params[0].startswith("gcc") or params[0].startswith("g++"): if params[0].startswith("gcc") or params[0].startswith("g++"):
@ -72,31 +72,41 @@ def output_matches(output: str, params: Tuple[str]):
print(f"Reading from {file}") print(f"Reading from {file}")
with open(os.path.join(expected_dir, file), "r") as f: with open(os.path.join(expected_dir, file), "r") as f:
expected = f.read() raw_expected = f.read()
if output.strip() == "": if raw_output.strip() == "":
print(f"Error: No output from test") print(f"Error: No output from test")
return False return False
expected = [line.strip().split("||") for line in expected.split("\n")] expected = [line.strip().split("||") for line in raw_expected.split("\n")]
output = [line.strip().split("||") for line in output.split("\n")] output = [line.strip().split("||") for line in raw_output.split("\n")]
max_line_diff = 0 max_line_diff = 0
errored = False errored = False
for i, ((output_file, output_line, output_symbol), (expected_file, expected_line, expected_symbol)) in enumerate(zip(output, expected)): try:
if output_file != expected_file: for i, ((output_file, output_line, output_symbol), (expected_file, expected_line, expected_symbol)) in enumerate(zip(output, expected)):
print(f"Error: File name mismatch on line {i + 1}, found \"{output_file}\" expected \"{expected_file}\"") if output_file != expected_file:
errored = True print(f"Error: File name mismatch on line {i + 1}, found \"{output_file}\" expected \"{expected_file}\"")
if abs(int(output_line) - int(expected_line)) > max_line_diff: errored = True
print(f"Error: File line mismatch on line {i + 1}, found {output_line} expected {expected_line}") if abs(int(output_line) - int(expected_line)) > max_line_diff:
errored = True print(f"Error: File line mismatch on line {i + 1}, found {output_line} expected {expected_line}")
if output_symbol != expected_symbol: errored = True
print(f"Error: File symbol mismatch on line {i + 1}, found \"{output_symbol}\" expected \"{expected_symbol}\"") if output_symbol != expected_symbol:
errored = True print(f"Error: File symbol mismatch on line {i + 1}, found \"{output_symbol}\" expected \"{expected_symbol}\"")
if expected_symbol == "main" or expected_symbol == "main()": errored = True
break if expected_symbol == "main" or expected_symbol == "main()":
break
except ValueError:
print("ValueError during output checking")
errored = True
if errored:
print("Output:")
print(raw_output)
print("Expected:")
print(raw_expected)
return not errored return not errored
@ -126,7 +136,7 @@ def run_test(test_binary, params: Tuple[str]):
print(Style.RESET_ALL, end="") # makefile in parallel sometimes messes up colors print(Style.RESET_ALL, end="") # makefile in parallel sometimes messes up colors
if test.returncode != 0: if test.returncode != 0:
print("[🔴 Test command failed]") print(f"[🔴 Test command failed with code {test.returncode}]")
print("stderr:") print("stderr:")
print(test_stderr.decode("utf-8"), end="") print(test_stderr.decode("utf-8"), end="")
print("stdout:") print("stdout:")

View File

@ -12,7 +12,13 @@
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <string>
#include <type_traits> #include <type_traits>
#include <unordered_map>
#include <vector>
#include <iostream>
#include <iomanip>
#include <mach-o/loader.h> #include <mach-o/loader.h>
#include <mach-o/swap.h> #include <mach-o/swap.h>
@ -23,7 +29,7 @@
namespace cpptrace { namespace cpptrace {
namespace detail { namespace detail {
static bool is_mach_o(std::uint32_t magic) { inline bool is_mach_o(std::uint32_t magic) {
switch(magic) { switch(magic) {
case FAT_MAGIC: case FAT_MAGIC:
case FAT_CIGAM: case FAT_CIGAM:
@ -37,36 +43,57 @@ namespace detail {
} }
} }
static bool is_fat_magic(std::uint32_t magic) { inline bool file_is_mach_o(const std::string& object_path) noexcept {
try {
FILE* file = std::fopen(object_path.c_str(), "rb");
if(file == nullptr) {
return false;
}
auto magic = load_bytes<std::uint32_t>(file, 0);
return is_mach_o(magic);
} catch(...) {
return false;
}
}
inline bool is_fat_magic(std::uint32_t magic) {
return magic == FAT_MAGIC || magic == FAT_CIGAM; return magic == FAT_MAGIC || magic == FAT_CIGAM;
} }
// Based on https://github.com/AlexDenisov/segment_dumper/blob/master/main.c // Based on https://github.com/AlexDenisov/segment_dumper/blob/master/main.c
// and https://lowlevelbits.org/parsing-mach-o-files/ // and https://lowlevelbits.org/parsing-mach-o-files/
static bool is_magic_64(std::uint32_t magic) { inline bool is_magic_64(std::uint32_t magic) {
return magic == MH_MAGIC_64 || magic == MH_CIGAM_64; return magic == MH_MAGIC_64 || magic == MH_CIGAM_64;
} }
static bool should_swap_bytes(std::uint32_t magic) { inline bool should_swap_bytes(std::uint32_t magic) {
return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM; return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM;
} }
static void swap_mach_header(mach_header_64& header) { inline void swap_mach_header(mach_header_64& header) {
swap_mach_header_64(&header, NX_UnknownByteOrder); swap_mach_header_64(&header, NX_UnknownByteOrder);
} }
static void swap_mach_header(mach_header& header) { inline void swap_mach_header(mach_header& header) {
swap_mach_header(&header, NX_UnknownByteOrder); swap_mach_header(&header, NX_UnknownByteOrder);
} }
static void swap_segment_command(segment_command_64& segment) { inline void swap_segment_command(segment_command_64& segment) {
swap_segment_command_64(&segment, NX_UnknownByteOrder); swap_segment_command_64(&segment, NX_UnknownByteOrder);
} }
static void swap_segment_command(segment_command& segment) { inline void swap_segment_command(segment_command& segment) {
swap_segment_command(&segment, NX_UnknownByteOrder); swap_segment_command(&segment, NX_UnknownByteOrder);
} }
inline void swap_nlist(struct nlist& entry) {
swap_nlist(&entry, 1, NX_UnknownByteOrder);
}
inline void swap_nlist(struct nlist_64& entry) {
swap_nlist_64(&entry, 1, NX_UnknownByteOrder);
}
#ifdef __LP64__ #ifdef __LP64__
#define LP(x) x##_64 #define LP(x) x##_64
#else #else
@ -89,12 +116,28 @@ namespace detail {
std::uint32_t n_load_commands; std::uint32_t n_load_commands;
std::uint32_t sizeof_load_commands; std::uint32_t sizeof_load_commands;
std::uint32_t flags; std::uint32_t flags;
std::size_t bits = 0; // 32 or 64 once load_mach is called
std::size_t load_base = 0; std::size_t load_base = 0;
std::size_t fat_index = std::numeric_limits<std::size_t>::max(); std::size_t fat_index = std::numeric_limits<std::size_t>::max();
std::vector<load_command_entry> load_commands; std::vector<load_command_entry> load_commands;
struct symtab_info_data {
symtab_command symtab;
std::unique_ptr<char[]> stringtab;
const char* get_string(std::size_t index) const {
if(stringtab && index < symtab.strsize) {
return stringtab.get() + index;
} else {
throw std::runtime_error("can't retrieve symbol from symtab");
}
}
};
bool tried_to_load_symtab = false;
optional<symtab_info_data> symtab_info;
public: public:
mach_o(const std::string& object_path) : object_path(object_path) { mach_o(const std::string& object_path) : object_path(object_path) {
file = std::fopen(object_path.c_str(), "rb"); file = std::fopen(object_path.c_str(), "rb");
@ -117,7 +160,7 @@ namespace detail {
~mach_o() { ~mach_o() {
if(file) { if(file) {
fclose(file); std::fclose(file);
} }
} }
@ -163,12 +206,195 @@ namespace detail {
} }
} }
optional<symtab_info_data>& get_symtab_info() {
if(!symtab_info.has_value() && !tried_to_load_symtab) {
// don't try to load the symtab again if for some reason loading here fails
tried_to_load_symtab = true;
for(const auto& command : load_commands) {
if(command.cmd == LC_SYMTAB) {
symtab_info_data info;
info.symtab = load_symbol_table_command(command.file_offset);
info.stringtab = load_string_table(info.symtab.stroff, info.symtab.strsize);
symtab_info = std::move(info);
break;
}
}
}
return symtab_info;
}
void print_symbol_table_entry(
const nlist_64& entry,
const std::unique_ptr<char[]>& stringtab,
std::size_t stringsize,
std::size_t j
) const {
const char* type = "";
if(entry.n_type & N_STAB) {
switch(entry.n_type) {
case N_SO: type = "N_SO"; break;
case N_OSO: type = "N_OSO"; break;
case N_BNSYM: type = "N_BNSYM"; break;
case N_ENSYM: type = "N_ENSYM"; break;
case N_FUN: type = "N_FUN"; break;
}
} else if((entry.n_type & N_TYPE) == N_SECT) {
type = "N_SECT";
}
fprintf(
stderr,
"%5llu %8llx %2llx %7s %2llu %4llx %16llx %s\n",
to_ull(j),
to_ull(entry.n_un.n_strx),
to_ull(entry.n_type),
type,
to_ull(entry.n_sect),
to_ull(entry.n_desc),
to_ull(entry.n_value),
stringtab == nullptr
? "Stringtab error"
: entry.n_un.n_strx < stringsize
? stringtab.get() + entry.n_un.n_strx
: "String index out of bounds"
);
}
void print_symbol_table() {
int i = 0;
for(const auto& command : load_commands) {
if(command.cmd == LC_SYMTAB) {
auto symtab = load_symbol_table_command(command.file_offset);
fprintf(stderr, "Load command %d\n", i);
fprintf(stderr, " cmd %llu\n", to_ull(symtab.cmd));
fprintf(stderr, " cmdsize %llu\n", to_ull(symtab.cmdsize));
fprintf(stderr, " symoff 0x%llu\n", to_ull(symtab.symoff));
fprintf(stderr, " nsyms %llu\n", to_ull(symtab.nsyms));
fprintf(stderr, " stroff 0x%llu\n", to_ull(symtab.stroff));
fprintf(stderr, " strsize %llu\n", to_ull(symtab.strsize));
auto stringtab = load_string_table(symtab.stroff, symtab.strsize);
for(std::size_t j = 0; j < symtab.nsyms; j++) {
nlist_64 entry = bits == 32
? load_symtab_entry<32>(symtab.symoff, j)
: load_symtab_entry<64>(symtab.symoff, j);
print_symbol_table_entry(entry, stringtab, symtab.strsize, j);
}
}
i++;
}
}
struct debug_map_entry {
uint64_t source_address;
uint64_t size;
std::string name;
};
struct symbol_entry {
uint64_t address;
std::string name;
};
// map from object file to a vector of symbols to resolve
using debug_map = std::unordered_map<std::string, std::vector<debug_map_entry>>;
// produce information similar to dsymutil -dump-debug-map
debug_map get_debug_map() {
// we have a bunch of symbols in our binary we need to pair up with symbols from various .o files
// first collect symbols and the objects they come from
debug_map debug_map;
const auto& symtab_info = get_symtab_info().unwrap();
const auto& symtab = symtab_info.symtab;
// TODO: Take timestamp into account?
std::string current_module;
optional<debug_map_entry> current_function;
for(std::size_t j = 0; j < symtab.nsyms; j++) {
nlist_64 entry = bits == 32
? load_symtab_entry<32>(symtab.symoff, j)
: load_symtab_entry<64>(symtab.symoff, j);
// entry.n_type & N_STAB indicates symbolic debug info
if(!(entry.n_type & N_STAB)) {
continue;
}
switch(entry.n_type) {
case N_SO:
// pass - these encode path and filename for the module, if applicable
break;
case N_OSO:
// sets the module
current_module = symtab_info.get_string(entry.n_un.n_strx);
break;
case N_BNSYM: break; // pass
case N_ENSYM: break; // pass
case N_FUN:
{
const char* str = symtab_info.get_string(entry.n_un.n_strx);
if(str[0] == 0) {
// end of function scope
if(!current_function) { /**/ }
current_function.unwrap().size = entry.n_value;
debug_map[current_module].push_back(std::move(current_function).unwrap());
} else {
current_function = debug_map_entry{};
current_function.unwrap().source_address = entry.n_value;
current_function.unwrap().name = str;
}
}
break;
}
}
return debug_map;
}
std::vector<symbol_entry> symbol_table() {
// we have a bunch of symbols in our binary we need to pair up with symbols from various .o files
// first collect symbols and the objects they come from
std::vector<symbol_entry> symbols;
const auto& symtab_info = get_symtab_info().unwrap();
const auto& symtab = symtab_info.symtab;
// TODO: Take timestamp into account?
for(std::size_t j = 0; j < symtab.nsyms; j++) {
nlist_64 entry = bits == 32
? load_symtab_entry<32>(symtab.symoff, j)
: load_symtab_entry<64>(symtab.symoff, j);
if(entry.n_type & N_STAB) {
continue;
}
if((entry.n_type & N_TYPE) == N_SECT) {
symbols.push_back({
entry.n_value,
symtab_info.get_string(entry.n_un.n_strx)
});
}
}
return symbols;
}
// produce information similar to dsymutil -dump-debug-map
static void print_debug_map(const debug_map& debug_map) {
for(const auto& entry : debug_map) {
std::cout<<entry.first<<": "<<std::endl;
for(const auto& symbol : entry.second) {
std::cerr
<< " "
<< symbol.name
<< " "
<< std::hex
<< symbol.source_address
<< " "
<< symbol.size
<< std::dec
<< std::endl;
}
}
}
private: private:
template<std::size_t Bits> template<std::size_t Bits>
void load_mach( void load_mach(
bool allow_arch_mismatch bool allow_arch_mismatch
) { ) {
static_assert(Bits == 32 || Bits == 64, "Unexpected Bits argument"); static_assert(Bits == 32 || Bits == 64, "Unexpected Bits argument");
bits = Bits;
using Mach_Header = typename std::conditional<Bits == 32, mach_header, mach_header_64>::type; using Mach_Header = typename std::conditional<Bits == 32, mach_header, mach_header_64>::type;
std::size_t header_size = sizeof(Mach_Header); std::size_t header_size = sizeof(Mach_Header);
Mach_Header header = load_bytes<Mach_Header>(file, load_base); Mach_Header header = load_bytes<Mach_Header>(file, load_base);
@ -268,6 +494,41 @@ namespace detail {
return common; return common;
} }
symtab_command load_symbol_table_command(std::uint32_t offset) const {
symtab_command symtab = load_bytes<symtab_command>(file, offset);
ASSERT(symtab.cmd == LC_SYMTAB);
if(should_swap()) {
swap_symtab_command(&symtab, NX_UnknownByteOrder);
}
return symtab;
}
template<std::size_t Bits>
nlist_64 load_symtab_entry(std::uint32_t symbol_base, std::size_t index) const {
using Nlist = typename std::conditional<Bits == 32, struct nlist, struct nlist_64>::type;
uint32_t offset = load_base + symbol_base + index * sizeof(Nlist);
Nlist entry = load_bytes<Nlist>(file, offset);
if(should_swap()) {
swap_nlist(entry);
}
// fields match just u64 instead of u32
nlist_64 common;
common.n_un.n_strx = entry.n_un.n_strx;
common.n_type = entry.n_type;
common.n_sect = entry.n_sect;
common.n_desc = entry.n_desc;
common.n_value = entry.n_value;
return common;
}
std::unique_ptr<char[]> load_string_table(std::uint32_t offset, std::uint32_t byte_count) const {
std::unique_ptr<char[]> buffer(new char[byte_count + 1]);
VERIFY(std::fseek(file, load_base + offset, SEEK_SET) == 0, "fseek error");
VERIFY(std::fread(buffer.get(), sizeof(char), byte_count, file) == byte_count, "fread error");
buffer[byte_count] = 0; // just out of an abundance of caution
return buffer;
}
bool should_swap() const { bool should_swap() const {
return should_swap_bytes(magic); return should_swap_bytes(magic);
} }

View File

@ -20,6 +20,9 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include <iostream>
#include <iomanip>
// It's been tricky to piece together how to handle all this dwarf stuff. Some resources I've used are // It's been tricky to piece together how to handle all this dwarf stuff. Some resources I've used are
// https://www.prevanders.net/libdwarf.pdf // https://www.prevanders.net/libdwarf.pdf
// https://github.com/davea42/libdwarf-addr2line // https://github.com/davea42/libdwarf-addr2line
@ -74,7 +77,14 @@ namespace libdwarf {
std::vector<line_entry> line_entries; std::vector<line_entry> line_entries;
}; };
struct dwarf_resolver { class symbol_resolver {
public:
virtual ~symbol_resolver() = default;
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
virtual frame_with_inlines resolve_frame(const object_frame& frame_info) = 0;
};
class dwarf_resolver : public symbol_resolver {
std::string object_path; std::string object_path;
Dwarf_Debug dbg = nullptr; Dwarf_Debug dbg = nullptr;
bool ok = false; bool ok = false;
@ -91,6 +101,7 @@ namespace libdwarf {
// Map from CU -> {srcfiles, count} // Map from CU -> {srcfiles, count}
std::unordered_map<Dwarf_Off, std::pair<char**, Dwarf_Signed>> srcfiles_cache; std::unordered_map<Dwarf_Off, std::pair<char**, Dwarf_Signed>> srcfiles_cache;
private:
// Error handling helper // Error handling helper
// For some reason R (*f)(Args..., void*)-style deduction isn't possible, seems like a bug in all compilers // For some reason R (*f)(Args..., void*)-style deduction isn't possible, seems like a bug in all compilers
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56190 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56190
@ -116,14 +127,25 @@ namespace libdwarf {
return ret; return ret;
} }
public:
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
dwarf_resolver(const std::string& _object_path) { dwarf_resolver(const std::string& _object_path) {
object_path = _object_path; object_path = _object_path;
// use a buffer when invoking dwarf_init_path, which allows it to automatically find debuglink or dSYM
// sources
bool use_buffer = true;
// for universal / fat mach-o files // for universal / fat mach-o files
unsigned universal_number = 0; unsigned universal_number = 0;
#if IS_APPLE #if IS_APPLE
if(directory_exists(object_path + ".dSYM")) { if(directory_exists(object_path + ".dSYM")) {
object_path += ".dSYM/Contents/Resources/DWARF/" + basename(object_path); // Possibly depends on the build system but a obj.cpp.o.dSYM/Contents/Resources/DWARF/obj.cpp.o can be
// created alongside .o files. These are text files containing directives, as opposed to something we
// can actually use
std::string dsym_resource = object_path + ".dSYM/Contents/Resources/DWARF/" + basename(object_path);
if(file_is_mach_o(dsym_resource)) {
object_path = std::move(dsym_resource);
}
use_buffer = false; // we resolved dSYM above as appropriate
} }
if(macho_is_fat(object_path)) { if(macho_is_fat(object_path)) {
universal_number = mach_o(object_path).get_fat_index(); universal_number = mach_o(object_path).get_fat_index();
@ -132,7 +154,10 @@ namespace libdwarf {
// Giving libdwarf a buffer for a true output path is needed for its automatic resolution of debuglink and // Giving libdwarf a buffer for a true output path is needed for its automatic resolution of debuglink and
// dSYM files. We don't utilize the dSYM logic here, we just care about debuglink. // dSYM files. We don't utilize the dSYM logic here, we just care about debuglink.
std::unique_ptr<char[]> buffer(new char[CPPTRACE_MAX_PATH]); std::unique_ptr<char[]> buffer;
if(use_buffer) {
buffer = std::unique_ptr<char[]>(new char[CPPTRACE_MAX_PATH]);
}
auto ret = wrap( auto ret = wrap(
dwarf_init_path_a, dwarf_init_path_a,
object_path.c_str(), object_path.c_str(),
@ -214,6 +239,7 @@ namespace libdwarf {
return *this; return *this;
} }
private:
// walk all CU's in a dbg, callback is called on each die and should return true to // walk all CU's in a dbg, callback is called on each die and should return true to
// continue traversal // continue traversal
void walk_compilation_units(const std::function<bool(const die_object&)>& fn) { void walk_compilation_units(const std::function<bool(const die_object&)>& fn) {
@ -521,19 +547,14 @@ namespace libdwarf {
it = subprograms_cache.find(off); it = subprograms_cache.find(off);
} }
auto& vec = it->second; auto& vec = it->second;
auto vec_it = std::lower_bound( auto vec_it = first_less_than_or_equal(
vec.begin(), vec.begin(),
vec.end(), vec.end(),
pc, pc,
[] (const subprogram_entry& entry, Dwarf_Addr pc) { [] (Dwarf_Addr pc, const subprogram_entry& entry) {
return entry.low < pc; return pc < entry.low;
} }
); );
// vec_it is first >= pc
// we want first <= pc
if(vec_it != vec.begin()) {
vec_it--;
}
// If the vector has been empty this can happen // If the vector has been empty this can happen
if(vec_it != vec.end()) { if(vec_it != vec.end()) {
//vec_it->die.print(); //vec_it->die.print();
@ -648,19 +669,14 @@ namespace libdwarf {
if(get_cache_mode() == cache_mode::prioritize_speed) { if(get_cache_mode() == cache_mode::prioritize_speed) {
// Lookup in the table // Lookup in the table
auto& line_entries = table_info.line_entries; auto& line_entries = table_info.line_entries;
auto table_it = std::lower_bound( auto table_it = first_less_than_or_equal(
line_entries.begin(), line_entries.begin(),
line_entries.end(), line_entries.end(),
pc, pc,
[] (const line_entry& entry, Dwarf_Addr pc) { [] (Dwarf_Addr pc, const line_entry& entry) {
return entry.low < pc; return pc < entry.low;
} }
); );
// vec_it is first >= pc
// we want first <= pc
if(table_it != line_entries.begin()) {
table_it--;
}
// If the vector has been empty this can happen // If the vector has been empty this can happen
if(table_it != line_entries.end()) { if(table_it != line_entries.end()) {
Dwarf_Line line = table_it->line; Dwarf_Line line = table_it->line;
@ -824,19 +840,14 @@ namespace libdwarf {
} else { } else {
lazy_generate_cu_cache(); lazy_generate_cu_cache();
// look up the cu // look up the cu
auto vec_it = std::lower_bound( auto vec_it = first_less_than_or_equal(
cu_cache.begin(), cu_cache.begin(),
cu_cache.end(), cu_cache.end(),
pc, pc,
[] (const cu_entry& entry, Dwarf_Addr pc) { [] (Dwarf_Addr pc, const cu_entry& entry) {
return entry.low < pc; return pc < entry.low;
} }
); );
// vec_it is first >= pc
// we want first <= pc
if(vec_it != cu_cache.begin()) {
vec_it--;
}
// If the vector has been empty this can happen // If the vector has been empty this can happen
if(vec_it != cu_cache.end()) { if(vec_it != cu_cache.end()) {
//vec_it->die.print(); //vec_it->die.print();
@ -850,8 +861,22 @@ namespace libdwarf {
} }
} }
public:
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
frame_with_inlines resolve_frame(const object_frame& frame_info) { frame_with_inlines resolve_frame(const object_frame& frame_info) override {
if(!ok) {
return {
{
frame_info.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
frame_info.object_path,
"",
false
},
{}
};
}
stacktrace_frame frame = null_frame; stacktrace_frame frame = null_frame;
frame.filename = frame_info.object_path; frame.filename = frame_info.object_path;
frame.address = frame_info.raw_address; frame.address = frame_info.raw_address;
@ -873,55 +898,249 @@ namespace libdwarf {
} }
}; };
class null_resolver : public symbol_resolver {
public:
null_resolver() = default;
null_resolver(const std::string&) {}
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
frame_with_inlines resolve_frame(const object_frame& frame_info) override {
return {
{
frame_info.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
frame_info.object_path,
"",
false
},
{}
};
};
};
#if IS_APPLE
struct target_object {
std::string object_path;
bool path_ok = true;
optional<std::unordered_map<std::string, uint64_t>> symbols;
std::unique_ptr<symbol_resolver> resolver;
target_object(std::string object_path) : object_path(object_path) {}
std::unique_ptr<symbol_resolver>& get_resolver() {
if(!resolver) {
// this seems silly but it's an attempt to not repeatedly try to initialize new dwarf_resolvers if
// exceptions are thrown, e.g. if the path doesn't exist
resolver = std::unique_ptr<null_resolver>(new null_resolver);
resolver = std::unique_ptr<dwarf_resolver>(new dwarf_resolver(object_path));
}
return resolver;
}
std::unordered_map<std::string, uint64_t>& get_symbols() {
if(!symbols) {
// this is an attempt to not repeatedly try to reprocess mach-o files if exceptions are thrown, e.g. if
// the path doesn't exist
std::unordered_map<std::string, uint64_t> symbols;
this->symbols = symbols;
auto symbol_table = mach_o(object_path).symbol_table();
for(const auto& symbol : symbol_table) {
symbols[symbol.name] = symbol.address;
}
this->symbols = std::move(symbols);
}
return symbols.unwrap();
}
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
frame_with_inlines resolve_frame(
const object_frame& frame_info,
const std::string& symbol_name,
std::size_t offset
) {
const auto& symbol_table = get_symbols();
auto it = symbol_table.find(symbol_name);
if(it != symbol_table.end()) {
auto frame = frame_info;
frame.object_address = it->second + offset;
return get_resolver()->resolve_frame(frame);
} else {
return {
{
frame_info.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
frame_info.object_path,
symbol_name,
false
},
{}
};
}
}
};
struct debug_map_symbol_info {
uint64_t source_address;
uint64_t size;
std::string name;
nullable<uint64_t> target_address; // T(-1) is used as a sentinel
std::size_t object_index;
};
class debug_map_resolver : public symbol_resolver {
std::vector<target_object> target_objects;
std::vector<debug_map_symbol_info> symbols;
public:
debug_map_resolver(const std::string& source_object_path) {
// load mach-o
// TODO: Cache somehow?
mach_o source_mach(source_object_path);
auto source_debug_map = source_mach.get_debug_map();
// get symbol entries from debug map, as well as the various object files used to make this binary
for(auto& entry : source_debug_map) {
// object it came from
target_objects.push_back({std::move(entry.first)});
// push the symbols
auto& map_entry_symbols = entry.second;
symbols.reserve(symbols.size() + map_entry_symbols.size());
for(auto& symbol : map_entry_symbols) {
symbols.push_back({
symbol.source_address,
symbol.size,
std::move(symbol.name),
nullable<uint64_t>::null(),
target_objects.size() - 1
});
}
}
// sort for binary lookup later
std::sort(
symbols.begin(),
symbols.end(),
[] (
const debug_map_symbol_info& a,
const debug_map_symbol_info& b
) {
return a.source_address < b.source_address;
}
);
}
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
frame_with_inlines resolve_frame(const object_frame& frame_info) override {
// resolve object frame:
// find the symbol in this executable corresponding to the object address
// resolve the symbol in the object it came from, based on the symbol name
auto closest_symbol_it = first_less_than_or_equal(
symbols.begin(),
symbols.end(),
frame_info.object_address,
[] (
Dwarf_Addr pc,
const debug_map_symbol_info& symbol
) {
return pc < symbol.source_address;
}
);
if(closest_symbol_it != symbols.end()) {
if(frame_info.object_address <= closest_symbol_it->source_address + closest_symbol_it->size) {
return target_objects[closest_symbol_it->object_index].resolve_frame(
{
frame_info.raw_address,
// the resolver doesn't care about the object address here, only the offset from the start
// of the symbol and it'll lookup the symbol's base-address
0,
frame_info.object_path
},
closest_symbol_it->name,
frame_info.object_address - closest_symbol_it->source_address
);
}
}
// There was either no closest symbol or the closest symbol didn't end up containing the address we're
// looking for, so just return a blank frame
return {
{
frame_info.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
frame_info.object_path,
"",
false
},
{}
};
};
};
#endif
std::unique_ptr<symbol_resolver> get_resolver_for_object(const std::string& object_path) {
#if IS_APPLE
// Check if dSYM exist, if not fallback to debug map
if(!directory_exists(object_path + ".dSYM")) {
return std::unique_ptr<debug_map_resolver>(new debug_map_resolver(object_path));
}
#endif
return std::unique_ptr<dwarf_resolver>(new dwarf_resolver(object_path));
}
CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING CPPTRACE_FORCE_NO_INLINE_FOR_PROFILING
std::vector<stacktrace_frame> resolve_frames(const std::vector<object_frame>& frames) { std::vector<stacktrace_frame> resolve_frames(const std::vector<object_frame>& frames) {
std::vector<frame_with_inlines> trace(frames.size(), {null_frame, {}}); std::vector<frame_with_inlines> trace(frames.size(), {null_frame, {}});
static std::mutex mutex; static std::mutex mutex;
// cache resolvers since objects are likely to be traced more than once // cache resolvers since objects are likely to be traced more than once
static std::unordered_map<std::string, dwarf_resolver> resolver_map; static std::unordered_map<std::string, std::unique_ptr<symbol_resolver>> resolver_map;
// Locking around all libdwarf interaction per https://github.com/davea42/libdwarf-code/discussions/184 // Locking around all libdwarf interaction per https://github.com/davea42/libdwarf-code/discussions/184
// And also interactions with the above static map
const std::lock_guard<std::mutex> lock(mutex); const std::lock_guard<std::mutex> lock(mutex);
for(const auto& object_entry : collate_frames(frames, trace)) { for(const auto& object_entry : collate_frames(frames, trace)) {
try { try {
const auto& object_name = object_entry.first; const auto& object_name = object_entry.first;
optional<dwarf_resolver> resolver_object = nullopt; std::unique_ptr<symbol_resolver> resolver_object;
dwarf_resolver* resolver = nullptr; symbol_resolver* resolver = nullptr;
auto it = resolver_map.find(object_name); auto it = resolver_map.find(object_name);
if(it != resolver_map.end()) { if(it != resolver_map.end()) {
resolver = &it->second; resolver = it->second.get();
} else { } else {
resolver_object = dwarf_resolver(object_name); resolver_object = get_resolver_for_object(object_name);
resolver = &resolver_object.unwrap(); resolver = resolver_object.get();
} }
// If there's no debug information it'll mark itself as not ok // If there's no debug information it'll mark itself as not ok
if(resolver->ok) { for(const auto& entry : object_entry.second) {
for(const auto& entry : object_entry.second) { try {
try {
const auto& dlframe = entry.first.get();
auto& frame = entry.second.get();
frame = resolver->resolve_frame(dlframe);
} catch(...) {
if(!should_absorb_trace_exceptions()) {
throw;
}
}
}
} else {
// at least copy the addresses
for(const auto& entry : object_entry.second) {
const auto& dlframe = entry.first.get(); const auto& dlframe = entry.first.get();
auto& frame = entry.second.get(); auto& frame = entry.second.get();
frame.frame.address = dlframe.raw_address; frame = resolver->resolve_frame(dlframe);
} catch(...) {
if(!should_absorb_trace_exceptions()) {
throw;
}
} }
} }
if(resolver_object.has_value() && get_cache_mode() == cache_mode::prioritize_speed) { if(resolver_object && get_cache_mode() == cache_mode::prioritize_speed) {
// .emplace needed, for some reason .insert tries to copy <= gcc 7.2 // .emplace needed, for some reason .insert tries to copy <= gcc 7.2
resolver_map.emplace(object_name, std::move(resolver_object).unwrap()); resolver_map.emplace(object_name, std::move(resolver_object));
} }
} catch(...) { // NOSONAR } catch(...) { // NOSONAR
if(!should_absorb_trace_exceptions()) { if(!should_absorb_trace_exceptions()) {
throw; throw;
} }
for(const auto& entry : object_entry.second) {
const auto& dlframe = entry.first.get();
auto& frame = entry.second.get();
frame = {
{
dlframe.raw_address,
nullable<std::uint32_t>::null(),
nullable<std::uint32_t>::null(),
dlframe.object_path,
"",
false
},
{}
};
}
} }
} }
// flatten trace with inlines // flatten trace with inlines

View File

@ -1,6 +1,7 @@
#ifndef UTILS_HPP #ifndef UTILS_HPP
#define UTILS_HPP #define UTILS_HPP
#include <algorithm>
#include <cstdint> #include <cstdint>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
@ -72,6 +73,28 @@ namespace detail {
return str; return str;
} }
// first value in a sorted range such that *it <= value
template<typename ForwardIt, typename T>
ForwardIt first_less_than_or_equal(ForwardIt begin, ForwardIt end, const T& value) {
auto it = std::upper_bound(begin, end, value);
// it is first > value, we want first <= value
if(it != begin) {
return --it;
}
return end;
}
// first value in a sorted range such that *it <= value
template<typename ForwardIt, typename T, typename Compare>
ForwardIt first_less_than_or_equal(ForwardIt begin, ForwardIt end, const T& value, Compare compare) {
auto it = std::upper_bound(begin, end, value, compare);
// it is first > value, we want first <= value
if(it != begin) {
return --it;
}
return end;
}
constexpr const char* const whitespace = " \t\n\r\f\v"; constexpr const char* const whitespace = " \t\n\r\f\v";
inline std::string trim(const std::string& str) { inline std::string trim(const std::string& str) {