Refactor PE header parsing out of addr2line code

This commit is contained in:
Jeremy 2023-07-23 19:54:17 -04:00
parent d12cd313d3
commit 0e701903ed
No known key found for this signature in database
GPG Key ID: 19AA8270105E8EB4
4 changed files with 85 additions and 69 deletions

View File

@ -250,6 +250,14 @@ static_assert(n_digits(10) == 2, "n_digits utility producing the wrong result");
static_assert(n_digits(11) == 2, "n_digits utility producing the wrong result");
static_assert(n_digits(1024) == 4, "n_digits utility producing the wrong result");
template<typename T, typename std::enable_if<std::is_pod<T>::value, int>::type = 0>
T load_bytes(FILE* obj_file, off_t offset) {
T object;
internal_verify(fseek(obj_file, offset, SEEK_SET) == 0, "fseek error");
internal_verify(fread(&object, sizeof(T), 1, obj_file) == 1, "fread error");
return object;
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif

View File

@ -6,6 +6,8 @@
#include <cstring>
#include <type_traits>
#include "common.hpp"
#include <mach-o/loader.h>
#include <mach-o/swap.h>
#include <mach-o/fat.h>
@ -13,15 +15,6 @@
// Based on https://github.com/AlexDenisov/segment_dumper/blob/master/main.c
// and https://lowlevelbits.org/parsing-mach-o-files/
template<typename T>
T load_bytes(FILE* obj_file, off_t offset) {
static_assert(std::is_pod<T>::value, "Expected POD type");
T object;
fseek(obj_file, offset, SEEK_SET);
fread(&object, sizeof(T), 1, obj_file);
return object;
}
static bool is_magic_64(uint32_t magic) {
return magic == MH_MAGIC_64 || magic == MH_CIGAM_64;
}
@ -42,7 +35,7 @@ static bool should_swap_bytes(uint32_t magic) {
#error "Unknown CPU architecture"
#endif
static uintptr_t get_text_vmaddr_from_segments(FILE* obj_file, off_t offset, bool should_swap, uint32_t ncmds) {
static uintptr_t macho_get_text_vmaddr_from_segments(FILE* obj_file, off_t offset, bool should_swap, uint32_t ncmds) {
off_t actual_offset = offset;
for(uint32_t i = 0; i < ncmds; i++) {
load_command cmd = load_bytes<load_command>(obj_file, actual_offset);
@ -77,7 +70,7 @@ static uintptr_t get_text_vmaddr_from_segments(FILE* obj_file, off_t offset, boo
return 0;
}
static uintptr_t get_text_vmaddr_mach(FILE* obj_file, off_t offset, bool is_64, bool should_swap) {
static uintptr_t macho_get_text_vmaddr_mach(FILE* obj_file, off_t offset, bool is_64, bool should_swap) {
uint32_t ncmds;
off_t load_commands_offset = offset;
if(is_64) {
@ -107,10 +100,10 @@ static uintptr_t get_text_vmaddr_mach(FILE* obj_file, off_t offset, bool is_64,
ncmds = header.ncmds;
load_commands_offset += header_size;
}
return get_text_vmaddr_from_segments(obj_file, load_commands_offset, should_swap, ncmds);
return macho_get_text_vmaddr_from_segments(obj_file, load_commands_offset, should_swap, ncmds);
}
static uintptr_t get_text_vmaddr_fat(FILE* obj_file, bool should_swap) {
static uintptr_t macho_get_text_vmaddr_fat(FILE* obj_file, bool should_swap) {
size_t header_size = sizeof(fat_header);
size_t arch_size = sizeof(fat_arch);
fat_header header = load_bytes<fat_header>(obj_file, 0);
@ -127,7 +120,7 @@ static uintptr_t get_text_vmaddr_fat(FILE* obj_file, bool should_swap) {
off_t mach_header_offset = (off_t)arch.offset;
arch_offset += arch_size;
uint32_t magic = load_bytes<uint32_t>(obj_file, mach_header_offset);
text_vmaddr = get_text_vmaddr_mach(
text_vmaddr = macho_get_text_vmaddr_mach(
obj_file,
mach_header_offset,
is_magic_64(magic),
@ -141,16 +134,16 @@ static uintptr_t get_text_vmaddr_fat(FILE* obj_file, bool should_swap) {
return text_vmaddr;
}
static uintptr_t get_text_vmaddr(const char* path) {
static uintptr_t macho_get_text_vmaddr(const char* path) {
FILE* obj_file = fopen(path, "rb");
uint32_t magic = load_bytes<uint32_t>(obj_file, 0);
bool is_64 = is_magic_64(magic);
bool should_swap = should_swap_bytes(magic);
uintptr_t addr;
if(magic == FAT_MAGIC || magic == FAT_CIGAM) {
addr = get_text_vmaddr_fat(obj_file, should_swap);
addr = macho_get_text_vmaddr_fat(obj_file, should_swap);
} else {
addr = get_text_vmaddr_mach(obj_file, 0, is_64, should_swap);
addr = macho_get_text_vmaddr_mach(obj_file, 0, is_64, should_swap);
}
fclose(obj_file);
return addr;

65
src/platform/pe.hpp Normal file
View File

@ -0,0 +1,65 @@
#ifndef PE_HPP
#define PE_HPP
#include <cstddef>
#include <cstdio>
#include <cstring>
#include <string>
#include "common.hpp"
#if IS_WINDOWS
#include <windows.h>
static uintptr_t pe_get_module_image_base(const std::string& obj_path) {
// PE header values are little endian
bool do_swap = !is_little_endian();
FILE* file = fopen(obj_path.c_str(), "rb");
char magic[2];
internal_verify(fread(magic, 1, 2, file) == 2); // file + 0x0
internal_verify(memcmp(magic, "MZ", 2) == 0);
DWORD e_lfanew;
internal_verify(fseek(file, 0x3c, SEEK_SET) == 0);
internal_verify(fread(&e_lfanew, sizeof(DWORD), 1, file) == 1); // file + 0x3c
if(do_swap) e_lfanew = byteswap(e_lfanew);
long nt_header_offset = e_lfanew;
char signature[4];
internal_verify(fseek(file, nt_header_offset, SEEK_SET) == 0);
internal_verify(fread(signature, 1, 4, file) == 4); // NT header + 0x0
internal_verify(memcmp(signature, "PE\0\0", 4) == 0);
//WORD machine;
//internal_verify(fseek(file, nt_header_offset + 4, SEEK_SET) == 0); // file header + 0x0
//internal_verify(fread(&machine, sizeof(WORD), 1, file) == 1);
WORD size_of_optional_header;
internal_verify(fseek(file, nt_header_offset + 4 + 0x10, SEEK_SET) == 0); // file header + 0x10
internal_verify(fread(&size_of_optional_header, sizeof(DWORD), 1, file) == 1);
if(do_swap) size_of_optional_header = byteswap(size_of_optional_header);
internal_verify(size_of_optional_header != 0);
WORD optional_header_magic;
internal_verify(fseek(file, nt_header_offset + 0x18, SEEK_SET) == 0); // optional header + 0x0
internal_verify(fread(&optional_header_magic, sizeof(DWORD), 1, file) == 1);
if(do_swap) optional_header_magic = byteswap(optional_header_magic);
internal_verify(optional_header_magic == IMAGE_NT_OPTIONAL_HDR_MAGIC);
uintptr_t image_base;
if(optional_header_magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) {
// 32 bit
DWORD base;
internal_verify(fseek(file, nt_header_offset + 0x18 + 0x1c, SEEK_SET) == 0); // optional header + 0x1c
internal_verify(fread(&base, sizeof(DWORD), 1, file) == 1);
if(do_swap) base = byteswap(base);
image_base = base;
} else {
// 64 bit
// I get an "error: 'QWORD' was not declared in this scope" for some reason when using QWORD
unsigned __int64 base;
internal_verify(fseek(file, nt_header_offset + 0x18 + 0x18, SEEK_SET) == 0); // optional header + 0x18
internal_verify(fread(&base, sizeof(unsigned __int64), 1, file) == 1);
if(do_swap) base = byteswap(base);
image_base = base;
}
fclose(file);
return image_base;
}
#endif
#endif

View File

@ -23,7 +23,7 @@
#include "../platform/mach-o.hpp"
#endif
#elif IS_WINDOWS
#include <windows.h>
#include "../platform/pe.hpp"
#endif
namespace cpptrace {
@ -156,7 +156,7 @@ namespace cpptrace {
if(it == cache.end()) {
// arguably it'd be better to release the lock while computing this, but also arguably it's good to not
// have two threads try to do the same computation
auto base = get_text_vmaddr(entry.obj_path.c_str());
auto base = macho_get_text_vmaddr(entry.obj_path.c_str());
cache.insert(it, {entry.obj_path, base});
return base;
} else {
@ -242,56 +242,6 @@ namespace cpptrace {
return output;
}
uintptr_t pe_get_module_image_base(const std::string& obj_path) {
// PE header values are little endian
bool do_swap = !is_little_endian();
FILE* file = fopen(obj_path.c_str(), "rb");
char magic[2];
internal_verify(fread(magic, 1, 2, file) == 2); // file + 0x0
internal_verify(memcmp(magic, "MZ", 2) == 0);
DWORD e_lfanew;
internal_verify(fseek(file, 0x3c, SEEK_SET) == 0);
internal_verify(fread(&e_lfanew, sizeof(DWORD), 1, file) == 1); // file + 0x3c
if(do_swap) e_lfanew = byteswap(e_lfanew);
long nt_header_offset = e_lfanew;
char signature[4];
internal_verify(fseek(file, nt_header_offset, SEEK_SET) == 0);
internal_verify(fread(signature, 1, 4, file) == 4); // NT header + 0x0
internal_verify(memcmp(signature, "PE\0\0", 4) == 0);
//WORD machine;
//internal_verify(fseek(file, nt_header_offset + 4, SEEK_SET) == 0); // file header + 0x0
//internal_verify(fread(&machine, sizeof(WORD), 1, file) == 1);
WORD size_of_optional_header;
internal_verify(fseek(file, nt_header_offset + 4 + 0x10, SEEK_SET) == 0); // file header + 0x10
internal_verify(fread(&size_of_optional_header, sizeof(DWORD), 1, file) == 1);
if(do_swap) size_of_optional_header = byteswap(size_of_optional_header);
internal_verify(size_of_optional_header != 0);
WORD optional_header_magic;
internal_verify(fseek(file, nt_header_offset + 0x18, SEEK_SET) == 0); // optional header + 0x0
internal_verify(fread(&optional_header_magic, sizeof(DWORD), 1, file) == 1);
if(do_swap) optional_header_magic = byteswap(optional_header_magic);
internal_verify(optional_header_magic == IMAGE_NT_OPTIONAL_HDR_MAGIC);
uintptr_t image_base;
if(optional_header_magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) {
// 32 bit
DWORD base;
internal_verify(fseek(file, nt_header_offset + 0x18 + 0x1c, SEEK_SET) == 0); // optional header + 0x1c
internal_verify(fread(&base, sizeof(DWORD), 1, file) == 1);
if(do_swap) base = byteswap(base);
image_base = base;
} else {
// 64 bit
// I get an "error: 'QWORD' was not declared in this scope" for some reason when using QWORD
unsigned __int64 base;
internal_verify(fseek(file, nt_header_offset + 0x18 + 0x18, SEEK_SET) == 0); // optional header + 0x18
internal_verify(fread(&base, sizeof(unsigned __int64), 1, file) == 1);
if(do_swap) base = byteswap(base);
image_base = base;
}
fclose(file);
return image_base;
}
uintptr_t get_module_image_base(const dlframe &entry) {
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);