Mach-o refactoring (#77)

This is the first step towards a more comprehensive mach-o system. Next
step will be to add support for symbol table parsing.
This commit is contained in:
Jeremy Rifkin 2024-01-14 23:36:30 -06:00 committed by GitHub
parent 7f6e91e0ff
commit a654f2082e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 192 additions and 156 deletions

View File

@ -73,134 +73,205 @@ namespace detail {
#define LP(x) x #define LP(x) x
#endif #endif
template<std::size_t Bits> struct load_command_entry {
static optional<std::uintptr_t> macho_get_text_vmaddr_mach( std::uint32_t file_offset;
std::FILE* object_file, std::uint32_t cmd;
const std::string& object_path, std::uint32_t cmdsize;
off_t offset, };
bool should_swap,
bool allow_arch_mismatch class mach_o {
) { std::FILE* file = nullptr;
static_assert(Bits == 32 || Bits == 64, "Unexpected Bits argument"); std::string object_path;
using Mach_Header = typename std::conditional<Bits == 32, mach_header, mach_header_64>::type; std::uint32_t magic;
using Segment_Command = typename std::conditional<Bits == 32, segment_command, segment_command_64>::type; cpu_type_t cputype;
std::uint32_t ncmds; cpu_subtype_t cpusubtype;
off_t load_commands_offset = offset; std::uint32_t filetype;
std::size_t header_size = sizeof(Mach_Header); std::uint32_t n_load_commands;
Mach_Header header = load_bytes<Mach_Header>(object_file, offset); std::uint32_t sizeof_load_commands;
if(should_swap) { std::uint32_t flags;
swap_mach_header(header);
std::size_t load_base = 0;
std::size_t fat_index = std::numeric_limits<std::size_t>::max();
std::vector<load_command_entry> load_commands;
public:
mach_o(const std::string& object_path) : object_path(object_path) {
file = std::fopen(object_path.c_str(), "rb");
if(file == nullptr) {
throw file_error("Unable to read object file " + object_path);
} }
thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader(); magic = load_bytes<std::uint32_t>(file, 0);
//std::fprintf( VERIFY(is_mach_o(magic), "File is not Mach-O " + object_path);
// stderr, if(magic == FAT_MAGIC || magic == FAT_CIGAM) {
// "----> %d %d; %d %d\n", load_fat_mach();
// header.cputype,
// mhp->cputype,
// static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK),
// header.cpusubtype
//);
if(
header.cputype != mhp->cputype ||
static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) != header.cpusubtype
) {
if(allow_arch_mismatch) {
return nullopt;
} else { } else {
PANIC("Mach-O file cpu type and subtype do not match current machine " + object_path); fat_index = 0;
if(is_magic_64(magic)) {
load_mach<64>(false);
} else {
load_mach<32>(false);
} }
} }
ncmds = header.ncmds;
load_commands_offset += header_size;
// iterate load commands
off_t actual_offset = load_commands_offset;
for(std::uint32_t i = 0; i < ncmds; i++) {
load_command cmd = load_bytes<load_command>(object_file, actual_offset);
if(should_swap) {
swap_load_command(&cmd, NX_UnknownByteOrder);
} }
// TODO: This is a mistake? Need to check cmd.cmd == LC_SEGMENT_64 / cmd.cmd == LC_SEGMENT
Segment_Command segment = load_bytes<Segment_Command>(object_file, actual_offset); ~mach_o() {
if(should_swap) { if(file) {
swap_segment_command(segment); fclose(file);
} }
}
std::uintptr_t get_text_vmaddr() {
for(const auto& command : load_commands) {
if(command.cmd == LC_SEGMENT_64 || command.cmd == LC_SEGMENT) {
auto segment = command.cmd == LC_SEGMENT_64
? load_segment_command<64>(command.file_offset)
: load_segment_command<32>(command.file_offset);
if(std::strcmp(segment.segname, "__TEXT") == 0) { if(std::strcmp(segment.segname, "__TEXT") == 0) {
return segment.vmaddr; return segment.vmaddr;
} }
actual_offset += cmd.cmdsize; }
} }
// somehow no __TEXT section was found... // somehow no __TEXT section was found...
PANIC("Couldn't find __TEXT section while parsing Mach-O object"); PANIC("Couldn't find __TEXT section while parsing Mach-O object");
return 0; return 0;
} }
static std::uintptr_t macho_get_text_vmaddr_fat( std::size_t get_fat_index() const {
std::FILE* object_file, VERIFY(fat_index != std::numeric_limits<std::size_t>::max());
const std::string& object_path, return fat_index;
bool should_swap }
void print_segments() const {
int i = 0;
for(const auto& command : load_commands) {
if(command.cmd == LC_SEGMENT_64 || command.cmd == LC_SEGMENT) {
auto segment = command.cmd == LC_SEGMENT_64
? load_segment_command<64>(command.file_offset)
: load_segment_command<32>(command.file_offset);
fprintf(stderr, "Load command %d\n", i);
fprintf(stderr, " cmd %u\n", segment.cmd);
fprintf(stderr, " cmdsize %u\n", segment.cmdsize);
fprintf(stderr, " segname %s\n", segment.segname);
fprintf(stderr, " vmaddr 0x%llx\n", segment.vmaddr);
fprintf(stderr, " vmsize 0x%llx\n", segment.vmsize);
fprintf(stderr, " off 0x%llx\n", segment.fileoff);
fprintf(stderr, " filesize %llu\n", segment.filesize);
fprintf(stderr, " nsects %u\n", segment.nsects);
}
i++;
}
}
private:
template<std::size_t Bits>
void load_mach(
bool allow_arch_mismatch
) { ) {
static_assert(Bits == 32 || Bits == 64, "Unexpected Bits argument");
using Mach_Header = typename std::conditional<Bits == 32, mach_header, mach_header_64>::type;
std::size_t header_size = sizeof(Mach_Header);
Mach_Header header = load_bytes<Mach_Header>(file, load_base);
magic = header.magic;
if(should_swap()) {
swap_mach_header(header);
}
thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader();
if(
header.cputype != mhp->cputype ||
static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) != header.cpusubtype
) {
if(allow_arch_mismatch) {
return;
} else {
PANIC("Mach-O file cpu type and subtype do not match current machine " + object_path);
}
}
cputype = header.cputype;
cpusubtype = header.cpusubtype;
filetype = header.filetype;
n_load_commands = header.ncmds;
sizeof_load_commands = header.sizeofcmds;
flags = header.flags;
// handle load commands
std::uint32_t ncmds = header.ncmds;
std::uint32_t load_commands_offset = load_base + header_size;
// iterate load commands
std::uint32_t actual_offset = load_commands_offset;
for(std::uint32_t i = 0; i < ncmds; i++) {
load_command cmd = load_bytes<load_command>(file, actual_offset);
if(should_swap()) {
swap_load_command(&cmd, NX_UnknownByteOrder);
}
load_commands.push_back({ actual_offset, cmd.cmd, cmd.cmdsize });
actual_offset += cmd.cmdsize;
}
}
void load_fat_mach() {
std::size_t header_size = sizeof(fat_header); std::size_t header_size = sizeof(fat_header);
std::size_t arch_size = sizeof(fat_arch); std::size_t arch_size = sizeof(fat_arch);
fat_header header = load_bytes<fat_header>(object_file, 0); fat_header header = load_bytes<fat_header>(file, 0);
if(should_swap) { if(should_swap()) {
swap_fat_header(&header, NX_UnknownByteOrder); swap_fat_header(&header, NX_UnknownByteOrder);
} }
thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader();
off_t arch_offset = (off_t)header_size; off_t arch_offset = (off_t)header_size;
optional<std::uintptr_t> text_vmaddr; for(std::size_t i = 0; i < header.nfat_arch; i++) {
for(std::uint32_t i = 0; i < header.nfat_arch; i++) { fat_arch arch = load_bytes<fat_arch>(file, arch_offset);
fat_arch arch = load_bytes<fat_arch>(object_file, arch_offset); if(should_swap()) {
if(should_swap) {
swap_fat_arch(&arch, 1, NX_UnknownByteOrder); swap_fat_arch(&arch, 1, NX_UnknownByteOrder);
} }
off_t mach_header_offset = (off_t)arch.offset; off_t mach_header_offset = (off_t)arch.offset;
arch_offset += arch_size; arch_offset += arch_size;
std::uint32_t magic = load_bytes<std::uint32_t>(object_file, mach_header_offset); std::uint32_t magic = load_bytes<std::uint32_t>(file, mach_header_offset);
if(
arch.cputype == mhp->cputype &&
static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) == arch.cpusubtype
) {
load_base = mach_header_offset;
fat_index = i;
if(is_magic_64(magic)) { if(is_magic_64(magic)) {
text_vmaddr = macho_get_text_vmaddr_mach<64>( load_mach<64>(true);
object_file,
object_path,
mach_header_offset,
should_swap_bytes(magic),
true
);
} else { } else {
text_vmaddr = macho_get_text_vmaddr_mach<32>( load_mach<32>(true);
object_file,
object_path,
mach_header_offset,
should_swap_bytes(magic),
true
);
} }
if(text_vmaddr.has_value()) { return;
return text_vmaddr.unwrap();
} }
} }
// If this is reached... something went wrong. The cpu we're on wasn't found. // If this is reached... something went wrong. The cpu we're on wasn't found.
PANIC("Couldn't find appropriate architecture in fat Mach-O"); PANIC("Couldn't find appropriate architecture in fat Mach-O");
return 0;
} }
static std::uintptr_t macho_get_text_vmaddr(const std::string& object_path) { template<std::size_t Bits>
//std::fprintf(stderr, "--%s--\n", object_path.c_str()); segment_command_64 load_segment_command(std::uint32_t offset) const {
auto file = raii_wrap(std::fopen(object_path.c_str(), "rb"), file_deleter); using Segment_Command = typename std::conditional<Bits == 32, segment_command, segment_command_64>::type;
if(file == nullptr) { Segment_Command segment = load_bytes<Segment_Command>(file, offset);
throw file_error("Unable to read object file " + object_path); ASSERT(segment.cmd == LC_SEGMENT_64 || segment.cmd == LC_SEGMENT);
} if(should_swap()) {
std::uint32_t magic = load_bytes<std::uint32_t>(file, 0); swap_segment_command(segment);
VERIFY(is_mach_o(magic), "File is not Mach-O " + object_path);
bool is_64 = is_magic_64(magic);
bool should_swap = should_swap_bytes(magic);
if(magic == FAT_MAGIC || magic == FAT_CIGAM) {
return macho_get_text_vmaddr_fat(file, object_path, should_swap);
} else {
if(is_64) {
return macho_get_text_vmaddr_mach<64>(file, object_path, 0, should_swap, false).unwrap();
} else {
return macho_get_text_vmaddr_mach<32>(file, object_path, 0, should_swap, false).unwrap();
} }
// fields match just u64 instead of u32
segment_command_64 common;
common.cmd = segment.cmd;
common.cmdsize = segment.cmdsize;
static_assert(sizeof common.segname == 16 && sizeof segment.segname == 16, "xx");
memcpy(common.segname, segment.segname, 16);
common.vmaddr = segment.vmaddr;
common.vmsize = segment.vmsize;
common.fileoff = segment.fileoff;
common.filesize = segment.filesize;
common.maxprot = segment.maxprot;
common.initprot = segment.initprot;
common.nsects = segment.nsects;
common.flags = segment.flags;
return common;
} }
bool should_swap() const {
return should_swap_bytes(magic);
} }
};
inline bool macho_is_fat(const std::string& object_path) { inline bool macho_is_fat(const std::string& object_path) {
auto file = raii_wrap(std::fopen(object_path.c_str(), "rb"), file_deleter); auto file = raii_wrap(std::fopen(object_path.c_str(), "rb"), file_deleter);
@ -210,41 +281,6 @@ namespace detail {
std::uint32_t magic = load_bytes<std::uint32_t>(file, 0); std::uint32_t magic = load_bytes<std::uint32_t>(file, 0);
return is_fat_magic(magic); return is_fat_magic(magic);
} }
// returns index of the appropriate mach-o binary in the universal binary
// TODO: Code duplication with macho_get_text_vmaddr_fat
inline unsigned get_fat_macho_index(const std::string& object_path) {
auto file = raii_wrap(std::fopen(object_path.c_str(), "rb"), file_deleter);
if(file == nullptr) {
throw file_error("Unable to read object file " + object_path);
}
std::uint32_t magic = load_bytes<std::uint32_t>(file, 0);
VERIFY(is_fat_magic(magic));
bool should_swap = should_swap_bytes(magic);
std::size_t header_size = sizeof(fat_header);
std::size_t arch_size = sizeof(fat_arch);
fat_header header = load_bytes<fat_header>(file, 0);
if(should_swap) {
swap_fat_header(&header, NX_UnknownByteOrder);
}
off_t arch_offset = (off_t)header_size;
thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader();
for(std::uint32_t i = 0; i < header.nfat_arch; i++) {
fat_arch arch = load_bytes<fat_arch>(file, arch_offset);
if(should_swap) {
swap_fat_arch(&arch, 1, NX_UnknownByteOrder);
}
arch_offset += arch_size;
if(
arch.cputype == mhp->cputype &&
static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) == arch.cpusubtype
) {
return i;
}
}
// If this is reached... something went wrong. The cpu we're on wasn't found.
PANIC("Couldn't find appropriate architecture in fat Mach-O");
}
} }
} }

View File

@ -56,7 +56,7 @@ namespace detail {
if(it == cache.end()) { if(it == cache.end()) {
// arguably it'd be better to release the lock while computing this, but also arguably it's good to not // arguably it'd be better to release the lock while computing this, but also arguably it's good to not
// have two threads try to do the same computation // have two threads try to do the same computation
auto base = macho_get_text_vmaddr(object_path); auto base = mach_o(object_path).get_text_vmaddr();
cache.insert(it, {object_path, base}); cache.insert(it, {object_path, base});
return base; return base;
} else { } else {

View File

@ -126,7 +126,7 @@ namespace libdwarf {
object_path += ".dSYM/Contents/Resources/DWARF/" + basename(object_path); object_path += ".dSYM/Contents/Resources/DWARF/" + basename(object_path);
} }
if(macho_is_fat(object_path)) { if(macho_is_fat(object_path)) {
universal_number = get_fat_macho_index(object_path); universal_number = mach_o(object_path).get_fat_index();
} }
#endif #endif