Mach-o refactoring (#77)
This is the first step towards a more comprehensive mach-o system. Next step will be to add support for symbol table parsing.
This commit is contained in:
parent
7f6e91e0ff
commit
a654f2082e
@ -73,134 +73,205 @@ namespace detail {
|
|||||||
#define LP(x) x
|
#define LP(x) x
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<std::size_t Bits>
|
struct load_command_entry {
|
||||||
static optional<std::uintptr_t> macho_get_text_vmaddr_mach(
|
std::uint32_t file_offset;
|
||||||
std::FILE* object_file,
|
std::uint32_t cmd;
|
||||||
const std::string& object_path,
|
std::uint32_t cmdsize;
|
||||||
off_t offset,
|
};
|
||||||
bool should_swap,
|
|
||||||
bool allow_arch_mismatch
|
class mach_o {
|
||||||
) {
|
std::FILE* file = nullptr;
|
||||||
static_assert(Bits == 32 || Bits == 64, "Unexpected Bits argument");
|
std::string object_path;
|
||||||
using Mach_Header = typename std::conditional<Bits == 32, mach_header, mach_header_64>::type;
|
std::uint32_t magic;
|
||||||
using Segment_Command = typename std::conditional<Bits == 32, segment_command, segment_command_64>::type;
|
cpu_type_t cputype;
|
||||||
std::uint32_t ncmds;
|
cpu_subtype_t cpusubtype;
|
||||||
off_t load_commands_offset = offset;
|
std::uint32_t filetype;
|
||||||
std::size_t header_size = sizeof(Mach_Header);
|
std::uint32_t n_load_commands;
|
||||||
Mach_Header header = load_bytes<Mach_Header>(object_file, offset);
|
std::uint32_t sizeof_load_commands;
|
||||||
if(should_swap) {
|
std::uint32_t flags;
|
||||||
swap_mach_header(header);
|
|
||||||
|
std::size_t load_base = 0;
|
||||||
|
std::size_t fat_index = std::numeric_limits<std::size_t>::max();
|
||||||
|
|
||||||
|
std::vector<load_command_entry> load_commands;
|
||||||
|
|
||||||
|
public:
|
||||||
|
mach_o(const std::string& object_path) : object_path(object_path) {
|
||||||
|
file = std::fopen(object_path.c_str(), "rb");
|
||||||
|
if(file == nullptr) {
|
||||||
|
throw file_error("Unable to read object file " + object_path);
|
||||||
}
|
}
|
||||||
thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader();
|
magic = load_bytes<std::uint32_t>(file, 0);
|
||||||
//std::fprintf(
|
VERIFY(is_mach_o(magic), "File is not Mach-O " + object_path);
|
||||||
// stderr,
|
if(magic == FAT_MAGIC || magic == FAT_CIGAM) {
|
||||||
// "----> %d %d; %d %d\n",
|
load_fat_mach();
|
||||||
// header.cputype,
|
|
||||||
// mhp->cputype,
|
|
||||||
// static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK),
|
|
||||||
// header.cpusubtype
|
|
||||||
//);
|
|
||||||
if(
|
|
||||||
header.cputype != mhp->cputype ||
|
|
||||||
static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) != header.cpusubtype
|
|
||||||
) {
|
|
||||||
if(allow_arch_mismatch) {
|
|
||||||
return nullopt;
|
|
||||||
} else {
|
} else {
|
||||||
PANIC("Mach-O file cpu type and subtype do not match current machine " + object_path);
|
fat_index = 0;
|
||||||
|
if(is_magic_64(magic)) {
|
||||||
|
load_mach<64>(false);
|
||||||
|
} else {
|
||||||
|
load_mach<32>(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ncmds = header.ncmds;
|
|
||||||
load_commands_offset += header_size;
|
|
||||||
// iterate load commands
|
|
||||||
off_t actual_offset = load_commands_offset;
|
|
||||||
for(std::uint32_t i = 0; i < ncmds; i++) {
|
|
||||||
load_command cmd = load_bytes<load_command>(object_file, actual_offset);
|
|
||||||
if(should_swap) {
|
|
||||||
swap_load_command(&cmd, NX_UnknownByteOrder);
|
|
||||||
}
|
}
|
||||||
// TODO: This is a mistake? Need to check cmd.cmd == LC_SEGMENT_64 / cmd.cmd == LC_SEGMENT
|
|
||||||
Segment_Command segment = load_bytes<Segment_Command>(object_file, actual_offset);
|
~mach_o() {
|
||||||
if(should_swap) {
|
if(file) {
|
||||||
swap_segment_command(segment);
|
fclose(file);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::uintptr_t get_text_vmaddr() {
|
||||||
|
for(const auto& command : load_commands) {
|
||||||
|
if(command.cmd == LC_SEGMENT_64 || command.cmd == LC_SEGMENT) {
|
||||||
|
auto segment = command.cmd == LC_SEGMENT_64
|
||||||
|
? load_segment_command<64>(command.file_offset)
|
||||||
|
: load_segment_command<32>(command.file_offset);
|
||||||
if(std::strcmp(segment.segname, "__TEXT") == 0) {
|
if(std::strcmp(segment.segname, "__TEXT") == 0) {
|
||||||
return segment.vmaddr;
|
return segment.vmaddr;
|
||||||
}
|
}
|
||||||
actual_offset += cmd.cmdsize;
|
}
|
||||||
}
|
}
|
||||||
// somehow no __TEXT section was found...
|
// somehow no __TEXT section was found...
|
||||||
PANIC("Couldn't find __TEXT section while parsing Mach-O object");
|
PANIC("Couldn't find __TEXT section while parsing Mach-O object");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::uintptr_t macho_get_text_vmaddr_fat(
|
std::size_t get_fat_index() const {
|
||||||
std::FILE* object_file,
|
VERIFY(fat_index != std::numeric_limits<std::size_t>::max());
|
||||||
const std::string& object_path,
|
return fat_index;
|
||||||
bool should_swap
|
}
|
||||||
|
|
||||||
|
void print_segments() const {
|
||||||
|
int i = 0;
|
||||||
|
for(const auto& command : load_commands) {
|
||||||
|
if(command.cmd == LC_SEGMENT_64 || command.cmd == LC_SEGMENT) {
|
||||||
|
auto segment = command.cmd == LC_SEGMENT_64
|
||||||
|
? load_segment_command<64>(command.file_offset)
|
||||||
|
: load_segment_command<32>(command.file_offset);
|
||||||
|
fprintf(stderr, "Load command %d\n", i);
|
||||||
|
fprintf(stderr, " cmd %u\n", segment.cmd);
|
||||||
|
fprintf(stderr, " cmdsize %u\n", segment.cmdsize);
|
||||||
|
fprintf(stderr, " segname %s\n", segment.segname);
|
||||||
|
fprintf(stderr, " vmaddr 0x%llx\n", segment.vmaddr);
|
||||||
|
fprintf(stderr, " vmsize 0x%llx\n", segment.vmsize);
|
||||||
|
fprintf(stderr, " off 0x%llx\n", segment.fileoff);
|
||||||
|
fprintf(stderr, " filesize %llu\n", segment.filesize);
|
||||||
|
fprintf(stderr, " nsects %u\n", segment.nsects);
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<std::size_t Bits>
|
||||||
|
void load_mach(
|
||||||
|
bool allow_arch_mismatch
|
||||||
) {
|
) {
|
||||||
|
static_assert(Bits == 32 || Bits == 64, "Unexpected Bits argument");
|
||||||
|
using Mach_Header = typename std::conditional<Bits == 32, mach_header, mach_header_64>::type;
|
||||||
|
std::size_t header_size = sizeof(Mach_Header);
|
||||||
|
Mach_Header header = load_bytes<Mach_Header>(file, load_base);
|
||||||
|
magic = header.magic;
|
||||||
|
if(should_swap()) {
|
||||||
|
swap_mach_header(header);
|
||||||
|
}
|
||||||
|
thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader();
|
||||||
|
if(
|
||||||
|
header.cputype != mhp->cputype ||
|
||||||
|
static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) != header.cpusubtype
|
||||||
|
) {
|
||||||
|
if(allow_arch_mismatch) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
PANIC("Mach-O file cpu type and subtype do not match current machine " + object_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cputype = header.cputype;
|
||||||
|
cpusubtype = header.cpusubtype;
|
||||||
|
filetype = header.filetype;
|
||||||
|
n_load_commands = header.ncmds;
|
||||||
|
sizeof_load_commands = header.sizeofcmds;
|
||||||
|
flags = header.flags;
|
||||||
|
// handle load commands
|
||||||
|
std::uint32_t ncmds = header.ncmds;
|
||||||
|
std::uint32_t load_commands_offset = load_base + header_size;
|
||||||
|
// iterate load commands
|
||||||
|
std::uint32_t actual_offset = load_commands_offset;
|
||||||
|
for(std::uint32_t i = 0; i < ncmds; i++) {
|
||||||
|
load_command cmd = load_bytes<load_command>(file, actual_offset);
|
||||||
|
if(should_swap()) {
|
||||||
|
swap_load_command(&cmd, NX_UnknownByteOrder);
|
||||||
|
}
|
||||||
|
load_commands.push_back({ actual_offset, cmd.cmd, cmd.cmdsize });
|
||||||
|
actual_offset += cmd.cmdsize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void load_fat_mach() {
|
||||||
std::size_t header_size = sizeof(fat_header);
|
std::size_t header_size = sizeof(fat_header);
|
||||||
std::size_t arch_size = sizeof(fat_arch);
|
std::size_t arch_size = sizeof(fat_arch);
|
||||||
fat_header header = load_bytes<fat_header>(object_file, 0);
|
fat_header header = load_bytes<fat_header>(file, 0);
|
||||||
if(should_swap) {
|
if(should_swap()) {
|
||||||
swap_fat_header(&header, NX_UnknownByteOrder);
|
swap_fat_header(&header, NX_UnknownByteOrder);
|
||||||
}
|
}
|
||||||
|
thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader();
|
||||||
off_t arch_offset = (off_t)header_size;
|
off_t arch_offset = (off_t)header_size;
|
||||||
optional<std::uintptr_t> text_vmaddr;
|
for(std::size_t i = 0; i < header.nfat_arch; i++) {
|
||||||
for(std::uint32_t i = 0; i < header.nfat_arch; i++) {
|
fat_arch arch = load_bytes<fat_arch>(file, arch_offset);
|
||||||
fat_arch arch = load_bytes<fat_arch>(object_file, arch_offset);
|
if(should_swap()) {
|
||||||
if(should_swap) {
|
|
||||||
swap_fat_arch(&arch, 1, NX_UnknownByteOrder);
|
swap_fat_arch(&arch, 1, NX_UnknownByteOrder);
|
||||||
}
|
}
|
||||||
off_t mach_header_offset = (off_t)arch.offset;
|
off_t mach_header_offset = (off_t)arch.offset;
|
||||||
arch_offset += arch_size;
|
arch_offset += arch_size;
|
||||||
std::uint32_t magic = load_bytes<std::uint32_t>(object_file, mach_header_offset);
|
std::uint32_t magic = load_bytes<std::uint32_t>(file, mach_header_offset);
|
||||||
|
if(
|
||||||
|
arch.cputype == mhp->cputype &&
|
||||||
|
static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) == arch.cpusubtype
|
||||||
|
) {
|
||||||
|
load_base = mach_header_offset;
|
||||||
|
fat_index = i;
|
||||||
if(is_magic_64(magic)) {
|
if(is_magic_64(magic)) {
|
||||||
text_vmaddr = macho_get_text_vmaddr_mach<64>(
|
load_mach<64>(true);
|
||||||
object_file,
|
|
||||||
object_path,
|
|
||||||
mach_header_offset,
|
|
||||||
should_swap_bytes(magic),
|
|
||||||
true
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
text_vmaddr = macho_get_text_vmaddr_mach<32>(
|
load_mach<32>(true);
|
||||||
object_file,
|
|
||||||
object_path,
|
|
||||||
mach_header_offset,
|
|
||||||
should_swap_bytes(magic),
|
|
||||||
true
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
if(text_vmaddr.has_value()) {
|
return;
|
||||||
return text_vmaddr.unwrap();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If this is reached... something went wrong. The cpu we're on wasn't found.
|
// If this is reached... something went wrong. The cpu we're on wasn't found.
|
||||||
PANIC("Couldn't find appropriate architecture in fat Mach-O");
|
PANIC("Couldn't find appropriate architecture in fat Mach-O");
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::uintptr_t macho_get_text_vmaddr(const std::string& object_path) {
|
template<std::size_t Bits>
|
||||||
//std::fprintf(stderr, "--%s--\n", object_path.c_str());
|
segment_command_64 load_segment_command(std::uint32_t offset) const {
|
||||||
auto file = raii_wrap(std::fopen(object_path.c_str(), "rb"), file_deleter);
|
using Segment_Command = typename std::conditional<Bits == 32, segment_command, segment_command_64>::type;
|
||||||
if(file == nullptr) {
|
Segment_Command segment = load_bytes<Segment_Command>(file, offset);
|
||||||
throw file_error("Unable to read object file " + object_path);
|
ASSERT(segment.cmd == LC_SEGMENT_64 || segment.cmd == LC_SEGMENT);
|
||||||
}
|
if(should_swap()) {
|
||||||
std::uint32_t magic = load_bytes<std::uint32_t>(file, 0);
|
swap_segment_command(segment);
|
||||||
VERIFY(is_mach_o(magic), "File is not Mach-O " + object_path);
|
|
||||||
bool is_64 = is_magic_64(magic);
|
|
||||||
bool should_swap = should_swap_bytes(magic);
|
|
||||||
if(magic == FAT_MAGIC || magic == FAT_CIGAM) {
|
|
||||||
return macho_get_text_vmaddr_fat(file, object_path, should_swap);
|
|
||||||
} else {
|
|
||||||
if(is_64) {
|
|
||||||
return macho_get_text_vmaddr_mach<64>(file, object_path, 0, should_swap, false).unwrap();
|
|
||||||
} else {
|
|
||||||
return macho_get_text_vmaddr_mach<32>(file, object_path, 0, should_swap, false).unwrap();
|
|
||||||
}
|
}
|
||||||
|
// fields match just u64 instead of u32
|
||||||
|
segment_command_64 common;
|
||||||
|
common.cmd = segment.cmd;
|
||||||
|
common.cmdsize = segment.cmdsize;
|
||||||
|
static_assert(sizeof common.segname == 16 && sizeof segment.segname == 16, "xx");
|
||||||
|
memcpy(common.segname, segment.segname, 16);
|
||||||
|
common.vmaddr = segment.vmaddr;
|
||||||
|
common.vmsize = segment.vmsize;
|
||||||
|
common.fileoff = segment.fileoff;
|
||||||
|
common.filesize = segment.filesize;
|
||||||
|
common.maxprot = segment.maxprot;
|
||||||
|
common.initprot = segment.initprot;
|
||||||
|
common.nsects = segment.nsects;
|
||||||
|
common.flags = segment.flags;
|
||||||
|
return common;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool should_swap() const {
|
||||||
|
return should_swap_bytes(magic);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
inline bool macho_is_fat(const std::string& object_path) {
|
inline bool macho_is_fat(const std::string& object_path) {
|
||||||
auto file = raii_wrap(std::fopen(object_path.c_str(), "rb"), file_deleter);
|
auto file = raii_wrap(std::fopen(object_path.c_str(), "rb"), file_deleter);
|
||||||
@ -210,41 +281,6 @@ namespace detail {
|
|||||||
std::uint32_t magic = load_bytes<std::uint32_t>(file, 0);
|
std::uint32_t magic = load_bytes<std::uint32_t>(file, 0);
|
||||||
return is_fat_magic(magic);
|
return is_fat_magic(magic);
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns index of the appropriate mach-o binary in the universal binary
|
|
||||||
// TODO: Code duplication with macho_get_text_vmaddr_fat
|
|
||||||
inline unsigned get_fat_macho_index(const std::string& object_path) {
|
|
||||||
auto file = raii_wrap(std::fopen(object_path.c_str(), "rb"), file_deleter);
|
|
||||||
if(file == nullptr) {
|
|
||||||
throw file_error("Unable to read object file " + object_path);
|
|
||||||
}
|
|
||||||
std::uint32_t magic = load_bytes<std::uint32_t>(file, 0);
|
|
||||||
VERIFY(is_fat_magic(magic));
|
|
||||||
bool should_swap = should_swap_bytes(magic);
|
|
||||||
std::size_t header_size = sizeof(fat_header);
|
|
||||||
std::size_t arch_size = sizeof(fat_arch);
|
|
||||||
fat_header header = load_bytes<fat_header>(file, 0);
|
|
||||||
if(should_swap) {
|
|
||||||
swap_fat_header(&header, NX_UnknownByteOrder);
|
|
||||||
}
|
|
||||||
off_t arch_offset = (off_t)header_size;
|
|
||||||
thread_local static struct LP(mach_header)* mhp = _NSGetMachExecuteHeader();
|
|
||||||
for(std::uint32_t i = 0; i < header.nfat_arch; i++) {
|
|
||||||
fat_arch arch = load_bytes<fat_arch>(file, arch_offset);
|
|
||||||
if(should_swap) {
|
|
||||||
swap_fat_arch(&arch, 1, NX_UnknownByteOrder);
|
|
||||||
}
|
|
||||||
arch_offset += arch_size;
|
|
||||||
if(
|
|
||||||
arch.cputype == mhp->cputype &&
|
|
||||||
static_cast<cpu_subtype_t>(mhp->cpusubtype & ~CPU_SUBTYPE_MASK) == arch.cpusubtype
|
|
||||||
) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If this is reached... something went wrong. The cpu we're on wasn't found.
|
|
||||||
PANIC("Couldn't find appropriate architecture in fat Mach-O");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -56,7 +56,7 @@ namespace detail {
|
|||||||
if(it == cache.end()) {
|
if(it == cache.end()) {
|
||||||
// arguably it'd be better to release the lock while computing this, but also arguably it's good to not
|
// arguably it'd be better to release the lock while computing this, but also arguably it's good to not
|
||||||
// have two threads try to do the same computation
|
// have two threads try to do the same computation
|
||||||
auto base = macho_get_text_vmaddr(object_path);
|
auto base = mach_o(object_path).get_text_vmaddr();
|
||||||
cache.insert(it, {object_path, base});
|
cache.insert(it, {object_path, base});
|
||||||
return base;
|
return base;
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -126,7 +126,7 @@ namespace libdwarf {
|
|||||||
object_path += ".dSYM/Contents/Resources/DWARF/" + basename(object_path);
|
object_path += ".dSYM/Contents/Resources/DWARF/" + basename(object_path);
|
||||||
}
|
}
|
||||||
if(macho_is_fat(object_path)) {
|
if(macho_is_fat(object_path)) {
|
||||||
universal_number = get_fat_macho_index(object_path);
|
universal_number = mach_o(object_path).get_fat_index();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user