#ifndef MACHO_HPP #define MACHO_HPP #include "../utils/common.hpp" #include "../utils/utils.hpp" #if IS_APPLE // A number of mach-o functions are deprecated as of macos 13 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace cpptrace { namespace detail { inline bool is_mach_o(std::uint32_t magic) { switch(magic) { case FAT_MAGIC: case FAT_CIGAM: case MH_MAGIC: case MH_CIGAM: case MH_MAGIC_64: case MH_CIGAM_64: return true; default: return false; } } inline bool file_is_mach_o(const std::string& object_path) noexcept { try { FILE* file = std::fopen(object_path.c_str(), "rb"); if(file == nullptr) { return false; } auto magic = load_bytes(file, 0); return is_mach_o(magic); } catch(...) { return false; } } inline bool is_fat_magic(std::uint32_t magic) { return magic == FAT_MAGIC || magic == FAT_CIGAM; } // Based on https://github.com/AlexDenisov/segment_dumper/blob/master/main.c // and https://lowlevelbits.org/parsing-mach-o-files/ inline bool is_magic_64(std::uint32_t magic) { return magic == MH_MAGIC_64 || magic == MH_CIGAM_64; } inline bool should_swap_bytes(std::uint32_t magic) { return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM; } inline void swap_mach_header(mach_header_64& header) { swap_mach_header_64(&header, NX_UnknownByteOrder); } inline void swap_mach_header(mach_header& header) { swap_mach_header(&header, NX_UnknownByteOrder); } inline void swap_segment_command(segment_command_64& segment) { swap_segment_command_64(&segment, NX_UnknownByteOrder); } inline void swap_segment_command(segment_command& segment) { swap_segment_command(&segment, NX_UnknownByteOrder); } inline void swap_nlist(struct nlist& entry) { swap_nlist(&entry, 1, NX_UnknownByteOrder); } inline void swap_nlist(struct nlist_64& entry) { swap_nlist_64(&entry, 1, NX_UnknownByteOrder); } #ifdef __LP64__ #define LP(x) x##_64 #else #define LP(x) x #endif struct load_command_entry { std::uint32_t file_offset; std::uint32_t cmd; std::uint32_t cmdsize; }; class mach_o { std::FILE* file = nullptr; std::string object_path; std::uint32_t magic; cpu_type_t cputype; cpu_subtype_t cpusubtype; std::uint32_t filetype; std::uint32_t n_load_commands; std::uint32_t sizeof_load_commands; std::uint32_t flags; std::size_t bits = 0; // 32 or 64 once load_mach is called std::size_t load_base = 0; std::size_t fat_index = std::numeric_limits::max(); std::vector load_commands; struct symtab_info_data { symtab_command symtab; std::unique_ptr stringtab; const char* get_string(std::size_t index) const { if(stringtab && index < symtab.strsize) { return stringtab.get() + index; } else { throw std::runtime_error("can't retrieve symbol from symtab"); } } }; bool tried_to_load_symtab = false; optional symtab_info; public: mach_o(const std::string& object_path) : object_path(object_path) { file = std::fopen(object_path.c_str(), "rb"); if(file == nullptr) { throw file_error("Unable to read object file " + object_path); } magic = load_bytes(file, 0); VERIFY(is_mach_o(magic), "File is not Mach-O " + object_path); if(magic == FAT_MAGIC || magic == FAT_CIGAM) { load_fat_mach(); } else { fat_index = 0; if(is_magic_64(magic)) { load_mach<64>(false); } else { load_mach<32>(false); } } } ~mach_o() { if(file) { std::fclose(file); } } std::uintptr_t get_text_vmaddr() { for(const auto& command : load_commands) { if(command.cmd == LC_SEGMENT_64 || command.cmd == LC_SEGMENT) { auto segment = command.cmd == LC_SEGMENT_64 ? load_segment_command<64>(command.file_offset) : load_segment_command<32>(command.file_offset); if(std::strcmp(segment.segname, "__TEXT") == 0) { return segment.vmaddr; } } } // somehow no __TEXT section was found... PANIC("Couldn't find __TEXT section while parsing Mach-O object"); return 0; } std::size_t get_fat_index() const { VERIFY(fat_index != std::numeric_limits::max()); return fat_index; } void print_segments() const { int i = 0; for(const auto& command : load_commands) { if(command.cmd == LC_SEGMENT_64 || command.cmd == LC_SEGMENT) { auto segment = command.cmd == LC_SEGMENT_64 ? load_segment_command<64>(command.file_offset) : load_segment_command<32>(command.file_offset); fprintf(stderr, "Load command %d\n", i); fprintf(stderr, " cmd %u\n", segment.cmd); fprintf(stderr, " cmdsize %u\n", segment.cmdsize); fprintf(stderr, " segname %s\n", segment.segname); fprintf(stderr, " vmaddr 0x%llx\n", segment.vmaddr); fprintf(stderr, " vmsize 0x%llx\n", segment.vmsize); fprintf(stderr, " off 0x%llx\n", segment.fileoff); fprintf(stderr, " filesize %llu\n", segment.filesize); fprintf(stderr, " nsects %u\n", segment.nsects); } i++; } } optional& get_symtab_info() { if(!symtab_info.has_value() && !tried_to_load_symtab) { // don't try to load the symtab again if for some reason loading here fails tried_to_load_symtab = true; for(const auto& command : load_commands) { if(command.cmd == LC_SYMTAB) { symtab_info_data info; info.symtab = load_symbol_table_command(command.file_offset); info.stringtab = load_string_table(info.symtab.stroff, info.symtab.strsize); symtab_info = std::move(info); break; } } } return symtab_info; } void print_symbol_table_entry( const nlist_64& entry, const std::unique_ptr& stringtab, std::size_t stringsize, std::size_t j ) const { const char* type = ""; if(entry.n_type & N_STAB) { switch(entry.n_type) { case N_SO: type = "N_SO"; break; case N_OSO: type = "N_OSO"; break; case N_BNSYM: type = "N_BNSYM"; break; case N_ENSYM: type = "N_ENSYM"; break; case N_FUN: type = "N_FUN"; break; } } else if((entry.n_type & N_TYPE) == N_SECT) { type = "N_SECT"; } fprintf( stderr, "%5llu %8llx %2llx %7s %2llu %4llx %16llx %s\n", to_ull(j), to_ull(entry.n_un.n_strx), to_ull(entry.n_type), type, to_ull(entry.n_sect), to_ull(entry.n_desc), to_ull(entry.n_value), stringtab == nullptr ? "Stringtab error" : entry.n_un.n_strx < stringsize ? stringtab.get() + entry.n_un.n_strx : "String index out of bounds" ); } void print_symbol_table() { int i = 0; for(const auto& command : load_commands) { if(command.cmd == LC_SYMTAB) { auto symtab = load_symbol_table_command(command.file_offset); fprintf(stderr, "Load command %d\n", i); fprintf(stderr, " cmd %llu\n", to_ull(symtab.cmd)); fprintf(stderr, " cmdsize %llu\n", to_ull(symtab.cmdsize)); fprintf(stderr, " symoff 0x%llu\n", to_ull(symtab.symoff)); fprintf(stderr, " nsyms %llu\n", to_ull(symtab.nsyms)); fprintf(stderr, " stroff 0x%llu\n", to_ull(symtab.stroff)); fprintf(stderr, " strsize %llu\n", to_ull(symtab.strsize)); auto stringtab = load_string_table(symtab.stroff, symtab.strsize); for(std::size_t j = 0; j < symtab.nsyms; j++) { nlist_64 entry = bits == 32 ? load_symtab_entry<32>(symtab.symoff, j) : load_symtab_entry<64>(symtab.symoff, j); print_symbol_table_entry(entry, stringtab, symtab.strsize, j); } } i++; } } struct debug_map_entry { uint64_t source_address; uint64_t size; std::string name; }; struct symbol_entry { uint64_t address; std::string name; }; // map from object file to a vector of symbols to resolve using debug_map = std::unordered_map>; // produce information similar to dsymutil -dump-debug-map debug_map get_debug_map() { // we have a bunch of symbols in our binary we need to pair up with symbols from various .o files // first collect symbols and the objects they come from debug_map debug_map; const auto& symtab_info = get_symtab_info().unwrap(); const auto& symtab = symtab_info.symtab; // TODO: Take timestamp into account? std::string current_module; optional current_function; for(std::size_t j = 0; j < symtab.nsyms; j++) { nlist_64 entry = bits == 32 ? load_symtab_entry<32>(symtab.symoff, j) : load_symtab_entry<64>(symtab.symoff, j); // entry.n_type & N_STAB indicates symbolic debug info if(!(entry.n_type & N_STAB)) { continue; } switch(entry.n_type) { case N_SO: // pass - these encode path and filename for the module, if applicable break; case N_OSO: // sets the module current_module = symtab_info.get_string(entry.n_un.n_strx); break; case N_BNSYM: break; // pass case N_ENSYM: break; // pass case N_FUN: { const char* str = symtab_info.get_string(entry.n_un.n_strx); if(str[0] == 0) { // end of function scope if(!current_function) { /**/ } current_function.unwrap().size = entry.n_value; debug_map[current_module].push_back(std::move(current_function).unwrap()); } else { current_function = debug_map_entry{}; current_function.unwrap().source_address = entry.n_value; current_function.unwrap().name = str; } } break; } } return debug_map; } std::vector symbol_table() { // we have a bunch of symbols in our binary we need to pair up with symbols from various .o files // first collect symbols and the objects they come from std::vector symbols; const auto& symtab_info = get_symtab_info().unwrap(); const auto& symtab = symtab_info.symtab; // TODO: Take timestamp into account? for(std::size_t j = 0; j < symtab.nsyms; j++) { nlist_64 entry = bits == 32 ? load_symtab_entry<32>(symtab.symoff, j) : load_symtab_entry<64>(symtab.symoff, j); if(entry.n_type & N_STAB) { continue; } if((entry.n_type & N_TYPE) == N_SECT) { symbols.push_back({ entry.n_value, symtab_info.get_string(entry.n_un.n_strx) }); } } return symbols; } // produce information similar to dsymutil -dump-debug-map static void print_debug_map(const debug_map& debug_map) { for(const auto& entry : debug_map) { std::cout< void load_mach( bool allow_arch_mismatch ) { static_assert(Bits == 32 || Bits == 64, "Unexpected Bits argument"); bits = Bits; using Mach_Header = typename std::conditional::type; std::size_t header_size = sizeof(Mach_Header); Mach_Header header = load_bytes(file, load_base); magic = header.magic; if(should_swap()) { swap_mach_header(header); } thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader(); if( header.cputype != mhp->cputype || static_cast(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) != header.cpusubtype ) { if(allow_arch_mismatch) { return; } else { PANIC("Mach-O file cpu type and subtype do not match current machine " + object_path); } } cputype = header.cputype; cpusubtype = header.cpusubtype; filetype = header.filetype; n_load_commands = header.ncmds; sizeof_load_commands = header.sizeofcmds; flags = header.flags; // handle load commands std::uint32_t ncmds = header.ncmds; std::uint32_t load_commands_offset = load_base + header_size; // iterate load commands std::uint32_t actual_offset = load_commands_offset; for(std::uint32_t i = 0; i < ncmds; i++) { load_command cmd = load_bytes(file, actual_offset); if(should_swap()) { swap_load_command(&cmd, NX_UnknownByteOrder); } load_commands.push_back({ actual_offset, cmd.cmd, cmd.cmdsize }); actual_offset += cmd.cmdsize; } } void load_fat_mach() { std::size_t header_size = sizeof(fat_header); std::size_t arch_size = sizeof(fat_arch); fat_header header = load_bytes(file, 0); if(should_swap()) { swap_fat_header(&header, NX_UnknownByteOrder); } thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader(); off_t arch_offset = (off_t)header_size; for(std::size_t i = 0; i < header.nfat_arch; i++) { fat_arch arch = load_bytes(file, arch_offset); if(should_swap()) { swap_fat_arch(&arch, 1, NX_UnknownByteOrder); } off_t mach_header_offset = (off_t)arch.offset; arch_offset += arch_size; std::uint32_t magic = load_bytes(file, mach_header_offset); if( arch.cputype == mhp->cputype && static_cast(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) == arch.cpusubtype ) { load_base = mach_header_offset; fat_index = i; if(is_magic_64(magic)) { load_mach<64>(true); } else { load_mach<32>(true); } return; } } // If this is reached... something went wrong. The cpu we're on wasn't found. PANIC("Couldn't find appropriate architecture in fat Mach-O"); } template segment_command_64 load_segment_command(std::uint32_t offset) const { using Segment_Command = typename std::conditional::type; Segment_Command segment = load_bytes(file, offset); ASSERT(segment.cmd == LC_SEGMENT_64 || segment.cmd == LC_SEGMENT); if(should_swap()) { swap_segment_command(segment); } // fields match just u64 instead of u32 segment_command_64 common; common.cmd = segment.cmd; common.cmdsize = segment.cmdsize; static_assert(sizeof common.segname == 16 && sizeof segment.segname == 16, "xx"); memcpy(common.segname, segment.segname, 16); common.vmaddr = segment.vmaddr; common.vmsize = segment.vmsize; common.fileoff = segment.fileoff; common.filesize = segment.filesize; common.maxprot = segment.maxprot; common.initprot = segment.initprot; common.nsects = segment.nsects; common.flags = segment.flags; return common; } symtab_command load_symbol_table_command(std::uint32_t offset) const { symtab_command symtab = load_bytes(file, offset); ASSERT(symtab.cmd == LC_SYMTAB); if(should_swap()) { swap_symtab_command(&symtab, NX_UnknownByteOrder); } return symtab; } template nlist_64 load_symtab_entry(std::uint32_t symbol_base, std::size_t index) const { using Nlist = typename std::conditional::type; uint32_t offset = load_base + symbol_base + index * sizeof(Nlist); Nlist entry = load_bytes(file, offset); if(should_swap()) { swap_nlist(entry); } // fields match just u64 instead of u32 nlist_64 common; common.n_un.n_strx = entry.n_un.n_strx; common.n_type = entry.n_type; common.n_sect = entry.n_sect; common.n_desc = entry.n_desc; common.n_value = entry.n_value; return common; } std::unique_ptr load_string_table(std::uint32_t offset, std::uint32_t byte_count) const { std::unique_ptr buffer(new char[byte_count + 1]); VERIFY(std::fseek(file, load_base + offset, SEEK_SET) == 0, "fseek error"); VERIFY(std::fread(buffer.get(), sizeof(char), byte_count, file) == byte_count, "fread error"); buffer[byte_count] = 0; // just out of an abundance of caution return buffer; } bool should_swap() const { return should_swap_bytes(magic); } }; inline bool macho_is_fat(const std::string& object_path) { auto file = raii_wrap(std::fopen(object_path.c_str(), "rb"), file_deleter); if(file == nullptr) { throw file_error("Unable to read object file " + object_path); } std::uint32_t magic = load_bytes(file, 0); return is_fat_magic(magic); } } } #pragma GCC diagnostic pop #endif #endif