1
0
Fork 0
tinycc/tccmacho.c

1610 lines
60 KiB
C
Raw Normal View History

/*
2020-05-25 19:47:05 +03:00
* Mach-O file handling for TCC
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "tcc.h"
2020-06-20 23:25:08 +03:00
/* In order to make life easy for us we are generating Mach-O files which
don't make use of some modern features, but which aren't entirely classic
either in that they do use some modern features. We're also only
generating 64bit Mach-O files, and only native endian at that.
In particular we're generating executables that don't make use of
DYLD_INFO for dynamic linking info, as that requires us building a
trie of exported names. We're simply using classic symbol tables which
are still supported by modern dyld.
But we do use LC_MAIN, which is a "modern" feature in order to not have
to setup our own crt code. We're not using lazy linking, so even function
calls are resolved at startup. */
#if !defined TCC_TARGET_X86_64 && !defined TCC_TARGET_ARM64
#error Platform not supported
#endif
2020-05-22 07:27:37 +03:00
#define DEBUG_MACHO 0
#define dprintf if (DEBUG_MACHO) printf
2020-05-22 07:27:37 +03:00
#define MH_EXECUTE (0x2)
#define MH_DYLDLINK (0x4)
#define MH_PIE (0x200000)
#define CPU_SUBTYPE_LIB64 (0x80000000)
#define CPU_SUBTYPE_X86_ALL (3)
#define CPU_SUBTYPE_ARM64_ALL (0)
#define CPU_ARCH_ABI64 (0x01000000)
#define CPU_TYPE_X86 (7)
#define CPU_TYPE_X86_64 (CPU_TYPE_X86 | CPU_ARCH_ABI64)
#define CPU_TYPE_ARM (12)
#define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64)
2020-05-25 19:47:05 +03:00
struct fat_header {
uint32_t magic; /* FAT_MAGIC or FAT_MAGIC_64 */
uint32_t nfat_arch; /* number of structs that follow */
};
struct fat_arch {
int cputype; /* cpu specifier (int) */
int cpusubtype; /* machine specifier (int) */
uint32_t offset; /* file offset to this object file */
uint32_t size; /* size of this object file */
uint32_t align; /* alignment as a power of 2 */
};
#define FAT_MAGIC 0xcafebabe
#define FAT_CIGAM 0xbebafeca
#define FAT_MAGIC_64 0xcafebabf
#define FAT_CIGAM_64 0xbfbafeca
struct mach_header {
2020-05-25 19:47:05 +03:00
uint32_t magic; /* mach magic number identifier */
int cputype; /* cpu specifier */
int cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
};
struct mach_header_64 {
struct mach_header mh;
uint32_t reserved; /* reserved, pad to 64bit */
};
/* Constant for the magic field of the mach_header (32-bit architectures) */
#define MH_MAGIC 0xfeedface /* the mach magic number */
#define MH_CIGAM 0xcefaedfe /* NXSwapInt(MH_MAGIC) */
#define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */
#define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */
struct load_command {
2020-05-25 19:47:05 +03:00
uint32_t cmd; /* type of load command */
uint32_t cmdsize; /* total size of command in bytes */
};
2020-05-25 19:47:05 +03:00
#define LC_REQ_DYLD 0x80000000
#define LC_SYMTAB 0x2
#define LC_DYSYMTAB 0xb
#define LC_LOAD_DYLIB 0xc
#define LC_ID_DYLIB 0xd
#define LC_LOAD_DYLINKER 0xe
#define LC_SEGMENT_64 0x19
#define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD)
#define LC_DYLD_INFO_ONLY (0x22|LC_REQ_DYLD)
2020-05-25 19:47:05 +03:00
#define LC_MAIN (0x28|LC_REQ_DYLD)
2020-05-25 19:47:05 +03:00
typedef int vm_prot_t;
struct segment_command_64 { /* for 64-bit architectures */
2020-05-25 19:47:05 +03:00
uint32_t cmd; /* LC_SEGMENT_64 */
uint32_t cmdsize; /* includes sizeof section_64 structs */
char segname[16]; /* segment name */
uint64_t vmaddr; /* memory address of this segment */
uint64_t vmsize; /* memory size of this segment */
uint64_t fileoff; /* file offset of this segment */
uint64_t filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
uint32_t nsects; /* number of sections in segment */
uint32_t flags; /* flags */
};
struct section_64 { /* for 64-bit architectures */
2020-05-25 19:47:05 +03:00
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint64_t addr; /* memory address of this section */
uint64_t size; /* size in bytes of this section */
uint32_t offset; /* file offset of this section */
uint32_t align; /* section alignment (power of 2) */
uint32_t reloff; /* file offset of relocation entries */
uint32_t nreloc; /* number of relocation entries */
uint32_t flags; /* flags (section type and attributes)*/
uint32_t reserved1; /* reserved (for offset or index) */
uint32_t reserved2; /* reserved (for count or sizeof) */
uint32_t reserved3; /* reserved */
};
2020-05-25 19:47:05 +03:00
#define S_REGULAR 0x0
#define S_ZEROFILL 0x1
#define S_NON_LAZY_SYMBOL_POINTERS 0x6
#define S_LAZY_SYMBOL_POINTERS 0x7
#define S_SYMBOL_STUBS 0x8
2020-05-25 19:47:05 +03:00
#define S_MOD_INIT_FUNC_POINTERS 0x9
#define S_MOD_TERM_FUNC_POINTERS 0xa
#define S_ATTR_PURE_INSTRUCTIONS 0x80000000
#define S_ATTR_SOME_INSTRUCTIONS 0x00000400
typedef uint32_t lc_str;
struct dylib_command {
2020-05-25 19:47:05 +03:00
uint32_t cmd; /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB,
LC_REEXPORT_DYLIB */
uint32_t cmdsize; /* includes pathname string */
lc_str name; /* library's path name */
uint32_t timestamp; /* library's build time stamp */
uint32_t current_version; /* library's current version number */
uint32_t compatibility_version; /* library's compatibility vers number*/
};
struct dylinker_command {
2020-05-25 19:47:05 +03:00
uint32_t cmd; /* LC_ID_DYLINKER, LC_LOAD_DYLINKER or
LC_DYLD_ENVIRONMENT */
uint32_t cmdsize; /* includes pathname string */
lc_str name; /* dynamic linker's path name */
};
struct symtab_command {
2020-05-25 19:47:05 +03:00
uint32_t cmd; /* LC_SYMTAB */
uint32_t cmdsize; /* sizeof(struct symtab_command) */
uint32_t symoff; /* symbol table offset */
uint32_t nsyms; /* number of symbol table entries */
uint32_t stroff; /* string table offset */
uint32_t strsize; /* string table size in bytes */
};
struct dysymtab_command {
uint32_t cmd; /* LC_DYSYMTAB */
uint32_t cmdsize; /* sizeof(struct dysymtab_command) */
uint32_t ilocalsym; /* index to local symbols */
uint32_t nlocalsym; /* number of local symbols */
uint32_t iextdefsym;/* index to externally defined symbols */
uint32_t nextdefsym;/* number of externally defined symbols */
uint32_t iundefsym; /* index to undefined symbols */
uint32_t nundefsym; /* number of undefined symbols */
uint32_t tocoff; /* file offset to table of contents */
uint32_t ntoc; /* number of entries in table of contents */
uint32_t modtaboff; /* file offset to module table */
uint32_t nmodtab; /* number of module table entries */
2020-05-25 19:47:05 +03:00
uint32_t extrefsymoff; /* offset to referenced symbol table */
uint32_t nextrefsyms; /* number of referenced symbol table entries */
2020-05-25 19:47:05 +03:00
uint32_t indirectsymoff;/* file offset to the indirect symbol table */
uint32_t nindirectsyms; /* number of indirect symbol table entries */
uint32_t extreloff; /* offset to external relocation entries */
uint32_t nextrel; /* number of external relocation entries */
uint32_t locreloff; /* offset to local relocation entries */
uint32_t nlocrel; /* number of local relocation entries */
};
#define BIND_OPCODE_DONE 0x00
#define BIND_OPCODE_SET_DYLIB_SPECIAL_IMM 0x30
#define BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM 0x40
#define BIND_OPCODE_SET_TYPE_IMM 0x50
#define BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x70
#define BIND_OPCODE_DO_BIND 0x90
#define BIND_TYPE_POINTER 1
#define BIND_SPECIAL_DYLIB_FLAT_LOOKUP -2
#define REBASE_OPCODE_DONE 0x00
#define REBASE_OPCODE_SET_TYPE_IMM 0x10
#define REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x20
#define REBASE_OPCODE_DO_REBASE_IMM_TIMES 0x50
#define REBASE_TYPE_POINTER 1
struct dyld_info_command {
uint32_t cmd; /* LC_DYLD_INFO or LC_DYLD_INFO_ONLY */
uint32_t cmdsize; /* sizeof(struct dyld_info_command) */
uint32_t rebase_off; /* file offset to rebase info */
uint32_t rebase_size; /* size of rebase info */
uint32_t bind_off; /* file offset to binding info */
uint32_t bind_size; /* size of binding info */
uint32_t weak_bind_off; /* file offset to weak binding info */
uint32_t weak_bind_size; /* size of weak binding info */
uint32_t lazy_bind_off; /* file offset to lazy binding info */
uint32_t lazy_bind_size; /* size of lazy binding infs */
uint32_t export_off; /* file offset to lazy binding info */
uint32_t export_size; /* size of lazy binding infs */
};
2020-05-25 19:47:05 +03:00
#define INDIRECT_SYMBOL_LOCAL 0x80000000
struct entry_point_command {
uint32_t cmd; /* LC_MAIN only used in MH_EXECUTE filetypes */
uint32_t cmdsize; /* 24 */
uint64_t entryoff; /* file (__TEXT) offset of main() */
uint64_t stacksize;/* if not zero, initial stack size */
};
enum skind {
sk_unknown = 0,
sk_discard,
sk_text,
sk_stubs,
sk_stub_helper,
sk_ro_data,
sk_uw_info,
sk_nl_ptr, // non-lazy pointers, aka GOT
sk_la_ptr, // lazy pointers
sk_init,
sk_fini,
sk_rw_data,
sk_stab,
sk_stab_str,
sk_debug_info,
sk_debug_abbrev,
sk_debug_line,
sk_debug_aranges,
sk_debug_str,
sk_debug_line_str,
sk_bss,
sk_linkedit,
sk_last
};
struct nlist_64 {
uint32_t n_strx; /* index into the string table */
uint8_t n_type; /* type flag, see below */
uint8_t n_sect; /* section number or NO_SECT */
uint16_t n_desc; /* see <mach-o/stab.h> */
uint64_t n_value; /* value of this symbol (or stab offset) */
};
2020-05-25 19:47:05 +03:00
#define N_UNDF 0x0
#define N_ABS 0x2
#define N_EXT 0x1
#define N_SECT 0xe
#define N_WEAK_REF 0x0040
#define N_WEAK_DEF 0x0080
struct macho {
struct mach_header_64 mh;
2020-05-25 19:47:05 +03:00
int seg2lc[4], nseg;
struct load_command **lc;
2020-05-25 19:47:05 +03:00
struct entry_point_command *ep;
int nlc;
struct {
Section *s;
int machosect;
} sk_to_sect[sk_last];
int *elfsectomacho;
int *e2msym;
Section *rebase, *binding, *lazy_binding, *exports;
Section *symtab, *strtab, *wdata, *indirsyms;
Section *stubs, *stub_helper, *la_symbol_ptr;
int nr_plt, n_got;
struct dyld_info_command *dyldinfo;
int stubsym, helpsym, lasym, dyld_private, dyld_stub_binder;
uint32_t ilocal, iextdef, iundef;
int n_lazy_bind_rebase;
struct lazy_bind_rebase {
int section;
int bind;
int bind_offset;
int la_symbol_offset;
ElfW_Rel rel;
} *lazy_bind_rebase;
int n_bind;
struct bind {
int section;
ElfW_Rel rel;
} *bind;
};
#define SHT_LINKEDIT (SHT_LOOS + 42)
#define SHN_FROMDLL (SHN_LOOS + 2) /* Symbol is undefined, comes from a DLL */
static void * add_lc(struct macho *mo, uint32_t cmd, uint32_t cmdsize)
{
struct load_command *lc = tcc_mallocz(cmdsize);
2020-05-25 19:47:05 +03:00
lc->cmd = cmd;
lc->cmdsize = cmdsize;
mo->lc = tcc_realloc(mo->lc, sizeof(mo->lc[0]) * (mo->nlc + 1));
2020-05-25 19:47:05 +03:00
mo->lc[mo->nlc++] = lc;
return lc;
}
static struct segment_command_64 * add_segment(struct macho *mo, const char *name)
{
struct segment_command_64 *sc = add_lc(mo, LC_SEGMENT_64, sizeof(*sc));
strncpy(sc->segname, name, 16);
2020-05-25 19:47:05 +03:00
mo->seg2lc[mo->nseg++] = mo->nlc - 1;
return sc;
}
2020-05-25 19:47:05 +03:00
static struct segment_command_64 * get_segment(struct macho *mo, int i)
{
return (struct segment_command_64 *) (mo->lc[mo->seg2lc[i]]);
}
static int add_section(struct macho *mo, struct segment_command_64 **_seg, const char *name)
{
struct segment_command_64 *seg = *_seg;
int ret = seg->nsects;
struct section_64 *sec;
seg->nsects++;
seg->cmdsize += sizeof(*sec);
seg = tcc_realloc(seg, sizeof(*seg) + seg->nsects * sizeof(*sec));
sec = (struct section_64*)((char*)seg + sizeof(*seg)) + ret;
memset(sec, 0, sizeof(*sec));
strncpy(sec->sectname, name, 16);
strncpy(sec->segname, seg->segname, 16);
*_seg = seg;
return ret;
}
static struct section_64 *get_section(struct segment_command_64 *seg, int i)
{
2020-05-25 19:47:05 +03:00
return (struct section_64*)((char*)seg + sizeof(*seg)) + i;
}
static void * add_dylib(struct macho *mo, char *name)
{
struct dylib_command *lc;
int sz = (sizeof(*lc) + strlen(name) + 1 + 7) & -8;
lc = add_lc(mo, LC_LOAD_DYLIB, sz);
lc->name = sizeof(*lc);
strcpy((char*)lc + lc->name, name);
lc->timestamp = 2;
lc->current_version = 1 << 16;
lc->compatibility_version = 1 << 16;
2020-05-25 19:47:05 +03:00
return lc;
}
static void write_uleb128(Section *section, uint64_t value)
{
do {
unsigned char byte = value & 0x7f;
uint8_t *ptr = section_ptr_add(section, 1);
value >>= 7;
*ptr = byte | (value ? 0x80 : 0);
} while (value != 0);
}
static void tcc_macho_add_destructor(TCCState *s1)
{
int init_sym, mh_execute_header, at_exit_sym;
Section *s;
ElfW_Rel *rel;
uint8_t *ptr;
s = find_section(s1, ".fini_array");
if (s->data_offset == 0)
return;
init_sym = put_elf_sym(s1->symtab, text_section->data_offset, 0,
ELFW(ST_INFO)(STB_LOCAL, STT_FUNC), 0,
text_section->sh_num, "___GLOBAL_init_65535");
mh_execute_header = put_elf_sym(s1->symtab, 0x100000000ll, 0,
ELFW(ST_INFO)(STB_LOCAL, STT_OBJECT), 0,
SHN_ABS, "__mh_execute_header");
at_exit_sym = put_elf_sym(s1->symtab, 0, 0,
ELFW(ST_INFO)(STB_GLOBAL, STT_FUNC), 0,
SHN_UNDEF, "___cxa_atexit");
#ifdef TCC_TARGET_X86_64
ptr = section_ptr_add(text_section, 4);
ptr[0] = 0x55; // pushq %rbp
ptr[1] = 0x48; // movq %rsp, %rbp
ptr[2] = 0x89;
ptr[3] = 0xe5;
for_each_elem(s->reloc, 0, rel, ElfW_Rel) {
int sym_index = ELFW(R_SYM)(rel->r_info);
ptr = section_ptr_add(text_section, 26);
ptr[0] = 0x48; // lea destructor(%rip),%rax
ptr[1] = 0x8d;
ptr[2] = 0x05;
put_elf_reloca(s1->symtab, text_section,
text_section->data_offset - 23,
R_X86_64_PC32, sym_index, -4);
ptr[7] = 0x48; // mov %rax,%rdi
ptr[8] = 0x89;
ptr[9] = 0xc7;
ptr[10] = 0x31; // xorl %ecx, %ecx
ptr[11] = 0xc9;
ptr[12] = 0x89; // movl %ecx, %esi
ptr[13] = 0xce;
ptr[14] = 0x48; // lea mh_execute_header(%rip),%rdx
ptr[15] = 0x8d;
ptr[16] = 0x15;
put_elf_reloca(s1->symtab, text_section,
text_section->data_offset - 9,
R_X86_64_PC32, mh_execute_header, -4);
ptr[21] = 0xe8; // call __cxa_atexit
put_elf_reloca(s1->symtab, text_section,
text_section->data_offset - 4,
R_X86_64_PLT32, at_exit_sym, -4);
}
ptr = section_ptr_add(text_section, 2);
ptr[0] = 0x5d; // pop %rbp
ptr[1] = 0xc3; // ret
#elif defined TCC_TARGET_ARM64
ptr = section_ptr_add(text_section, 8);
write32le(ptr, 0xa9bf7bfd); // stp x29, x30, [sp, #-16]!
write32le(ptr + 4, 0x910003fd); // mov x29, sp
for_each_elem(s->reloc, 0, rel, ElfW_Rel) {
int sym_index = ELFW(R_SYM)(rel->r_info);
ptr = section_ptr_add(text_section, 24);
put_elf_reloc(s1->symtab, text_section,
text_section->data_offset - 24,
R_AARCH64_ADR_PREL_PG_HI21, sym_index);
write32le(ptr, 0x90000000); // adrp x0, destructor@page
put_elf_reloc(s1->symtab, text_section,
text_section->data_offset - 20,
R_AARCH64_LDST8_ABS_LO12_NC, sym_index);
write32le(ptr + 4, 0x91000000); // add x0,x0,destructor@pageoff
write32le(ptr + 8, 0xd2800001); // mov x1, #0
put_elf_reloc(s1->symtab, text_section,
text_section->data_offset - 12,
R_AARCH64_ADR_PREL_PG_HI21, mh_execute_header);
write32le(ptr + 12, 0x90000002); // adrp x2, mh_execute_header@page
put_elf_reloc(s1->symtab, text_section,
text_section->data_offset - 8,
R_AARCH64_LDST8_ABS_LO12_NC, mh_execute_header);
write32le(ptr + 16, 0x91000042); // add x2,x2,mh_execute_header@pageoff
put_elf_reloc(s1->symtab, text_section,
text_section->data_offset - 4,
R_AARCH64_CALL26, at_exit_sym);
write32le(ptr + 20, 0x94000000); // bl __cxa_atexit
}
ptr = section_ptr_add(text_section, 8);
write32le(ptr, 0xa8c17bfd); // ldp x29, x30, [sp], #16
write32le(ptr + 4, 0xd65f03c0); // ret
#endif
s->reloc->data_offset = s->data_offset = 0;
s->sh_flags &= ~SHF_ALLOC;
add_array (s1, ".init_array", init_sym);
}
static void check_relocs(TCCState *s1, struct macho *mo)
{
uint8_t *jmp;
Section *s;
ElfW_Rel *rel;
ElfW(Sym) *sym;
int i, type, gotplt_entry, sym_index, for_code;
int sh_num, debug, bind_offset, la_symbol_offset;
uint32_t *pi, *goti;
struct sym_attr *attr;
mo->indirsyms = new_section(s1, "LEINDIR", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
#ifdef TCC_TARGET_X86_64
jmp = section_ptr_add(mo->stub_helper, 16);
jmp[0] = 0x4c; /* leaq _dyld_private(%rip), %r11 */
jmp[1] = 0x8d;
jmp[2] = 0x1d;
put_elf_reloca(s1->symtab, mo->stub_helper, 3,
R_X86_64_PC32, mo->dyld_private, -4);
jmp[7] = 0x41; /* pushq %r11 */
jmp[8] = 0x53;
jmp[9] = 0xff; /* jmpq *dyld_stub_binder@GOT(%rip) */
jmp[10] = 0x25;
put_elf_reloca(s1->symtab, mo->stub_helper, 11,
R_X86_64_GOTPCREL, mo->dyld_stub_binder, -4);
jmp[15] = 0x90; /* nop */
#elif defined TCC_TARGET_ARM64
jmp = section_ptr_add(mo->stub_helper, 24);
put_elf_reloc(s1->symtab, mo->stub_helper, 0,
R_AARCH64_ADR_PREL_PG_HI21, mo->dyld_private);
write32le(jmp, 0x90000011); // adrp x17, _dyld_private@page
put_elf_reloc(s1->symtab, mo->stub_helper, 4,
R_AARCH64_LDST64_ABS_LO12_NC, mo->dyld_private);
write32le(jmp + 4, 0x91000231); // add x17,x17,_dyld_private@pageoff
write32le(jmp + 8, 0xa9bf47f0); // stp x16/x17, [sp, #-16]!
put_elf_reloc(s1->symtab, mo->stub_helper, 12,
R_AARCH64_ADR_GOT_PAGE, mo->dyld_stub_binder);
write32le(jmp + 12, 0x90000010); // adrp x16, dyld_stub_binder@page
put_elf_reloc(s1->symtab, mo->stub_helper, 16,
R_AARCH64_LD64_GOT_LO12_NC, mo->dyld_stub_binder);
write32le(jmp + 16, 0xf9400210); // ldr x16,[x16,dyld_stub_binder@pageoff]
write32le(jmp + 20, 0xd61f0200); // br x16
#endif
goti = NULL;
mo->nr_plt = mo->n_got = 0;
for (i = 1; i < s1->nb_sections; i++) {
s = s1->sections[i];
if (s->sh_type != SHT_RELX)
continue;
sh_num = s1->sections[s->sh_info]->sh_num;
debug = sh_num >= s1->dwlo && sh_num < s1->dwhi;
for_each_elem(s, 0, rel, ElfW_Rel) {
type = ELFW(R_TYPE)(rel->r_info);
gotplt_entry = gotplt_entry_type(type);
for_code = code_reloc(type);
/* We generate a non-lazy pointer for used undefined symbols
and for defined symbols that must have a place for their
address due to codegen (i.e. a reloc requiring a got slot). */
sym_index = ELFW(R_SYM)(rel->r_info);
sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
if (!debug &&
(sym->st_shndx == SHN_UNDEF
|| gotplt_entry == ALWAYS_GOTPLT_ENTRY)) {
attr = get_sym_attr(s1, sym_index, 1);
if (!attr->dyn_index) {
attr->got_offset = s1->got->data_offset;
attr->plt_offset = -1;
attr->dyn_index = 1; /* used as flag */
section_ptr_add(s1->got, PTR_SIZE);
put_elf_reloc(s1->symtab, s1->got, attr->got_offset,
R_DATA_PTR, sym_index);
goti = tcc_realloc(goti, (mo->n_got + 1) * sizeof(*goti));
if (ELFW(ST_BIND)(sym->st_info) == STB_LOCAL) {
if (sym->st_shndx == SHN_UNDEF)
tcc_error("undefined local symbo: '%s'",
(char *) symtab_section->link->data + sym->st_name);
goti[mo->n_got++] = INDIRECT_SYMBOL_LOCAL;
} else {
goti[mo->n_got++] = mo->e2msym[sym_index];
if (sym->st_shndx == SHN_UNDEF
#ifdef TCC_TARGET_X86_64
&& type == R_X86_64_GOTPCREL
#elif defined TCC_TARGET_ARM64
&& type == R_AARCH64_ADR_GOT_PAGE
#endif
) {
mo->bind =
tcc_realloc(mo->bind,
(mo->n_bind + 1) *
sizeof(struct bind));
mo->bind[mo->n_bind].section = s1->got->reloc->sh_info;
mo->bind[mo->n_bind].rel = *rel;
mo->bind[mo->n_bind].rel.r_offset = attr->got_offset;
mo->n_bind++;
s1->got->reloc->data_offset -= sizeof (ElfW_Rel);
}
}
}
if (for_code && sym->st_shndx == SHN_UNDEF) {
if (attr->plt_offset == -1) {
pi = section_ptr_add(mo->indirsyms, sizeof(*pi));
*pi = mo->e2msym[sym_index];
mo->nr_plt++;
attr->plt_offset = mo->stubs->data_offset;
#ifdef TCC_TARGET_X86_64
if (type != R_X86_64_PLT32)
continue;
/* __stubs */
jmp = section_ptr_add(mo->stubs, 6);
jmp[0] = 0xff; /* jmpq *__la_symbol_ptr(%rip) */
jmp[1] = 0x25;
put_elf_reloca(s1->symtab, mo->stubs,
mo->stubs->data_offset - 4,
R_X86_64_PC32, mo->lasym,
mo->la_symbol_ptr->data_offset - 4);
/* __stub_helper */
bind_offset = mo->stub_helper->data_offset + 1;
jmp = section_ptr_add(mo->stub_helper, 10);
jmp[0] = 0x68; /* pushq $bind_offset */
jmp[5] = 0xe9; /* jmpq __stub_helper */
write32le(jmp + 6, -mo->stub_helper->data_offset);
/* __la_symbol_ptr */
la_symbol_offset = mo->la_symbol_ptr->data_offset;
put_elf_reloca(s1->symtab, mo->la_symbol_ptr,
mo->la_symbol_ptr->data_offset,
R_DATA_PTR, mo->helpsym,
mo->stub_helper->data_offset - 10);
jmp = section_ptr_add(mo->la_symbol_ptr, PTR_SIZE);
#elif defined TCC_TARGET_ARM64
if (type != R_AARCH64_CALL26)
continue;
/* __stubs */
jmp = section_ptr_add(mo->stubs, 12);
put_elf_reloca(s1->symtab, mo->stubs,
mo->stubs->data_offset - 12,
R_AARCH64_ADR_PREL_PG_HI21, mo->lasym,
mo->la_symbol_ptr->data_offset);
write32le(jmp, // adrp x16, __la_symbol_ptr@page
0x90000010);
put_elf_reloca(s1->symtab, mo->stubs,
mo->stubs->data_offset - 8,
R_AARCH64_LDST64_ABS_LO12_NC, mo->lasym,
mo->la_symbol_ptr->data_offset);
write32le(jmp + 4, // ldr x16,[x16, __la_symbol_ptr@pageoff]
0xf9400210);
write32le(jmp + 8, // br x16
0xd61f0200);
/* __stub_helper */
bind_offset = mo->stub_helper->data_offset + 8;
jmp = section_ptr_add(mo->stub_helper, 12);
write32le(jmp + 0, // ldr w16, l0
0x18000050);
write32le(jmp + 4, // b stubHelperHeader
0x14000000 +
((-(mo->stub_helper->data_offset - 8) / 4) &
0x3ffffff));
write32le(jmp + 8, 0); // l0: .long bind_offset
/* __la_symbol_ptr */
la_symbol_offset = mo->la_symbol_ptr->data_offset;
put_elf_reloca(s1->symtab, mo->la_symbol_ptr,
mo->la_symbol_ptr->data_offset,
R_DATA_PTR, mo->helpsym,
mo->stub_helper->data_offset - 12);
jmp = section_ptr_add(mo->la_symbol_ptr, PTR_SIZE);
#endif
mo->lazy_bind_rebase =
tcc_realloc(mo->lazy_bind_rebase,
(mo->n_lazy_bind_rebase + 1) *
sizeof(struct lazy_bind_rebase));
mo->lazy_bind_rebase[mo->n_lazy_bind_rebase].section =
mo->stub_helper->reloc->sh_info;
mo->lazy_bind_rebase[mo->n_lazy_bind_rebase].bind = 1;
mo->lazy_bind_rebase[mo->n_lazy_bind_rebase].bind_offset = bind_offset;
mo->lazy_bind_rebase[mo->n_lazy_bind_rebase].la_symbol_offset = la_symbol_offset;
mo->lazy_bind_rebase[mo->n_lazy_bind_rebase].rel = *rel;
mo->lazy_bind_rebase[mo->n_lazy_bind_rebase].rel.r_offset =
attr->plt_offset;
mo->n_lazy_bind_rebase++;
}
rel->r_info = ELFW(R_INFO)(mo->stubsym, type);
rel->r_addend += attr->plt_offset;
}
}
if (type == R_DATA_PTR) {
mo->lazy_bind_rebase =
tcc_realloc(mo->lazy_bind_rebase,
(mo->n_lazy_bind_rebase + 1) *
sizeof(struct lazy_bind_rebase));
mo->lazy_bind_rebase[mo->n_lazy_bind_rebase].section = s->sh_info;
mo->lazy_bind_rebase[mo->n_lazy_bind_rebase].bind = 0;
mo->lazy_bind_rebase[mo->n_lazy_bind_rebase].rel = *rel;
mo->n_lazy_bind_rebase++;
}
}
}
pi = section_ptr_add(mo->indirsyms, mo->n_got * sizeof(*pi));
memcpy(pi, goti, mo->n_got * sizeof(*pi));
pi = section_ptr_add(mo->indirsyms, mo->nr_plt * sizeof(*pi));
memcpy(pi, mo->indirsyms->data, mo->nr_plt * sizeof(*pi));
tcc_free(goti);
}
static int check_symbols(TCCState *s1, struct macho *mo)
{
int sym_index, sym_end;
int ret = 0;
mo->ilocal = mo->iextdef = mo->iundef = -1;
sym_end = symtab_section->data_offset / sizeof(ElfW(Sym));
for (sym_index = 1; sym_index < sym_end; ++sym_index) {
int elf_index = ((struct nlist_64 *)mo->symtab->data + sym_index - 1)->n_value;
ElfW(Sym) *sym = (ElfW(Sym) *)symtab_section->data + elf_index;
const char *name = (char*)symtab_section->link->data + sym->st_name;
unsigned type = ELFW(ST_TYPE)(sym->st_info);
unsigned bind = ELFW(ST_BIND)(sym->st_info);
unsigned vis = ELFW(ST_VISIBILITY)(sym->st_other);
dprintf("%4d (%4d): %09lx %4d %4d %4d %3d %s\n",
sym_index, elf_index, (long)sym->st_value,
type, bind, vis, sym->st_shndx, name);
if (bind == STB_LOCAL) {
if (mo->ilocal == -1)
mo->ilocal = sym_index - 1;
if (mo->iextdef != -1 || mo->iundef != -1)
tcc_error("local syms after global ones");
} else if (sym->st_shndx != SHN_UNDEF) {
if (mo->iextdef == -1)
mo->iextdef = sym_index - 1;
if (mo->iundef != -1)
tcc_error("external defined symbol after undefined");
} else if (sym->st_shndx == SHN_UNDEF) {
if (mo->iundef == -1)
mo->iundef = sym_index - 1;
if (ELFW(ST_BIND)(sym->st_info) == STB_WEAK
|| find_elf_sym(s1->dynsymtab_section, name)) {
/* Mark the symbol as coming from a dylib so that
relocate_syms doesn't complain. Normally bind_exe_dynsyms
would do this check, and place the symbol into dynsym
which is checked by relocate_syms. But Mach-O doesn't use
bind_exe_dynsyms. */
sym->st_shndx = SHN_FROMDLL;
continue;
}
tcc_error_noabort("undefined symbol '%s'", name);
ret = -1;
}
}
return ret;
}
static void convert_symbol(TCCState *s1, struct macho *mo, struct nlist_64 *pn)
{
struct nlist_64 n = *pn;
ElfSym *sym = (ElfW(Sym) *)symtab_section->data + pn->n_value;
const char *name = (char*)symtab_section->link->data + sym->st_name;
switch(ELFW(ST_TYPE)(sym->st_info)) {
case STT_NOTYPE:
case STT_OBJECT:
case STT_FUNC:
case STT_SECTION:
2020-05-25 19:47:05 +03:00
n.n_type = N_SECT;
break;
case STT_FILE:
2020-05-25 19:47:05 +03:00
n.n_type = N_ABS;
break;
default:
tcc_error("unhandled ELF symbol type %d %s",
ELFW(ST_TYPE)(sym->st_info), name);
}
if (sym->st_shndx == SHN_UNDEF)
tcc_error("should have been rewritten to SHN_FROMDLL: %s", name);
else if (sym->st_shndx == SHN_FROMDLL)
2020-05-25 19:47:05 +03:00
n.n_type = N_UNDF, n.n_sect = 0;
else if (sym->st_shndx == SHN_ABS)
2020-05-25 19:47:05 +03:00
n.n_type = N_ABS, n.n_sect = 0;
else if (sym->st_shndx >= SHN_LORESERVE)
tcc_error("unhandled ELF symbol section %d %s", sym->st_shndx, name);
else if (!mo->elfsectomacho[sym->st_shndx]) {
int sh_num = s1->sections[sym->st_shndx]->sh_num;
if (sh_num < s1->dwlo || sh_num >= s1->dwhi)
tcc_error("ELF section %d(%s) not mapped into Mach-O for symbol %s",
sym->st_shndx, s1->sections[sym->st_shndx]->name, name);
}
else
n.n_sect = mo->elfsectomacho[sym->st_shndx];
if (ELFW(ST_BIND)(sym->st_info) == STB_GLOBAL)
2020-05-25 19:47:05 +03:00
n.n_type |= N_EXT;
else if (ELFW(ST_BIND)(sym->st_info) == STB_WEAK)
n.n_desc |= N_WEAK_REF | (n.n_type != N_UNDF ? N_WEAK_DEF : 0);
n.n_strx = pn->n_strx;
n.n_value = sym->st_value;
*pn = n;
}
static void convert_symbols(TCCState *s1, struct macho *mo)
{
2020-05-25 19:47:05 +03:00
struct nlist_64 *pn;
for_each_elem(mo->symtab, 0, pn, struct nlist_64)
convert_symbol(s1, mo, pn);
}
static int machosymcmp(const void *_a, const void *_b, void *arg)
{
TCCState *s1 = arg;
int ea = ((struct nlist_64 *)_a)->n_value;
int eb = ((struct nlist_64 *)_b)->n_value;
ElfSym *sa = (ElfSym *)symtab_section->data + ea;
ElfSym *sb = (ElfSym *)symtab_section->data + eb;
int r;
2020-05-25 19:47:05 +03:00
/* locals, then defined externals, then undefined externals, the
last two sections also by name, otherwise stable sort */
r = (ELFW(ST_BIND)(sb->st_info) == STB_LOCAL)
- (ELFW(ST_BIND)(sa->st_info) == STB_LOCAL);
if (r)
return r;
r = (sa->st_shndx == SHN_UNDEF) - (sb->st_shndx == SHN_UNDEF);
if (r)
return r;
if (ELFW(ST_BIND)(sa->st_info) != STB_LOCAL) {
const char * na = (char*)symtab_section->link->data + sa->st_name;
const char * nb = (char*)symtab_section->link->data + sb->st_name;
r = strcmp(na, nb);
if (r)
return r;
}
return ea - eb;
}
/* cannot use qsort because code has to be reentrant */
static void tcc_qsort (void *base, size_t nel, size_t width,
int (*comp)(const void *, const void *, void *), void *arg)
{
size_t wnel, gap, wgap, i, j, k;
char *a, *b, tmp;
wnel = width * nel;
for (gap = 0; ++gap < nel;)
gap *= 3;
while ( gap /= 3 ) {
wgap = width * gap;
for (i = wgap; i < wnel; i += width) {
for (j = i - wgap; ;j -= wgap) {
a = j + (char *)base;
b = a + wgap;
if ( (*comp)(a, b, arg) <= 0 )
break;
k = width;
do {
tmp = *a;
*a++ = *b;
*b++ = tmp;
} while ( --k );
if (j < wgap)
break;
}
}
}
}
static void create_symtab(TCCState *s1, struct macho *mo)
{
int sym_index, sym_end;
struct nlist_64 *pn;
/* Stub creation belongs to check_relocs, but we need to create
the symbol now, so its included in the sorting. */
mo->stubs = new_section(s1, "__stubs", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR);
mo->stub_helper = new_section(s1, "__stub_helper", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR);
s1->got = new_section(s1, ".got", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE);
mo->la_symbol_ptr = new_section(s1, "__la_symbol_ptr", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE);
mo->stubsym = put_elf_sym(s1->symtab, 0, 0,
2020-05-25 19:47:05 +03:00
ELFW(ST_INFO)(STB_LOCAL, STT_SECTION), 0,
mo->stubs->sh_num, ".__stubs");
mo->helpsym = put_elf_sym(s1->symtab, 0, 0,
ELFW(ST_INFO)(STB_LOCAL, STT_SECTION), 0,
mo->stub_helper->sh_num, ".__stub_helper");
mo->lasym = put_elf_sym(s1->symtab, 0, 0,
ELFW(ST_INFO)(STB_LOCAL, STT_SECTION), 0,
mo->la_symbol_ptr->sh_num, ".__la_symbol_ptr");
section_ptr_add(data_section, -data_section->data_offset & (PTR_SIZE - 1));
mo->dyld_private = put_elf_sym(s1->symtab, data_section->data_offset, PTR_SIZE,
ELFW(ST_INFO)(STB_LOCAL, STT_OBJECT), 0,
data_section->sh_num, ".__dyld_private");
section_ptr_add(data_section, PTR_SIZE);
mo->dyld_stub_binder = put_elf_sym(s1->symtab, 0, 0,
ELFW(ST_INFO)(STB_GLOBAL, STT_OBJECT), 0,
SHN_UNDEF, "dyld_stub_binder");
mo->rebase = new_section(s1, "REBASE", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
mo->binding = new_section(s1, "BINDING", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
mo->lazy_binding = new_section(s1, "LAZY_BINDING", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
mo->exports = new_section(s1, "EXPORT", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
mo->symtab = new_section(s1, "LESYMTAB", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
mo->strtab = new_section(s1, "LESTRTAB", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
put_elf_str(mo->strtab, " "); /* Mach-O starts strtab with a space */
sym_end = symtab_section->data_offset / sizeof(ElfW(Sym));
pn = section_ptr_add(mo->symtab, sizeof(*pn) * (sym_end - 1));
for (sym_index = 1; sym_index < sym_end; ++sym_index) {
ElfW(Sym) *sym = (ElfW(Sym) *)symtab_section->data + sym_index;
const char *name = (char*)symtab_section->link->data + sym->st_name;
pn[sym_index - 1].n_strx = put_elf_str(mo->strtab, name);
pn[sym_index - 1].n_value = sym_index;
}
tcc_qsort(pn, sym_end - 1, sizeof(*pn), machosymcmp, s1);
mo->e2msym = tcc_malloc(sym_end * sizeof(*mo->e2msym));
mo->e2msym[0] = -1;
for (sym_index = 1; sym_index < sym_end; ++sym_index) {
mo->e2msym[pn[sym_index - 1].n_value] = sym_index - 1;
}
}
const struct {
int seg;
uint32_t flags;
const char *name;
} skinfo[sk_last] = {
/*[sk_unknown] =*/ { 0 },
/*[sk_discard] =*/ { 0 },
/*[sk_text] =*/ { 1, S_REGULAR | S_ATTR_PURE_INSTRUCTIONS
| S_ATTR_SOME_INSTRUCTIONS, "__text" },
/*[sk_stubs] =*/ { 1, S_REGULAR | S_ATTR_PURE_INSTRUCTIONS | S_SYMBOL_STUBS
| S_ATTR_SOME_INSTRUCTIONS , "__stubs" },
/*[sk_stub_helper] =*/ { 1, S_REGULAR | S_ATTR_PURE_INSTRUCTIONS
| S_ATTR_SOME_INSTRUCTIONS , "__stub_helper" },
/*[sk_ro_data] =*/ { 1, S_REGULAR, "__rodata" },
/*[sk_uw_info] =*/ { 0 },
/*[sk_nl_ptr] =*/ { 2, S_NON_LAZY_SYMBOL_POINTERS, "__got" },
/*[sk_la_ptr] =*/ { 2, S_LAZY_SYMBOL_POINTERS, "__la_symbol_ptr" },
/*[sk_init] =*/ { 2, S_MOD_INIT_FUNC_POINTERS, "__mod_init_func" },
/*[sk_fini] =*/ { 2, S_MOD_TERM_FUNC_POINTERS, "__mod_term_func" },
/*[sk_rw_data] =*/ { 2, S_REGULAR, "__data" },
/*[sk_stab] =*/ { 2, S_REGULAR, "__stab" },
/*[sk_stab_str] =*/ { 2, S_REGULAR, "__stab_str" },
/*[sk_debug_info] =*/ { 2, S_REGULAR, "__debug_info" },
/*[sk_debug_abbrev] =*/ { 2, S_REGULAR, "__debug_abbrev" },
/*[sk_debug_line] =*/ { 2, S_REGULAR, "__debug_line" },
/*[sk_debug_aranges] =*/ { 2, S_REGULAR, "__debug_aranges" },
/*[sk_debug_str] =*/ { 2, S_REGULAR, "__debug_str" },
/*[sk_debug_line_str] =*/ { 2, S_REGULAR, "__debug_line_str" },
/*[sk_bss] =*/ { 2, S_ZEROFILL, "__bss" },
/*[sk_linkedit] =*/ { 3, S_REGULAR, NULL },
};
static void set_segment_and_offset(struct macho *mo, addr_t addr,
uint8_t *ptr, int opcode,
Section *sec, addr_t offset)
{
int i;
struct segment_command_64 *seg = NULL;
for (i = 1; i < mo->nseg - 1; i++) {
seg = get_segment(mo, i);
if (addr >= seg->vmaddr && addr < (seg->vmaddr + seg->vmsize))
break;
}
*ptr = opcode | i;
write_uleb128(sec, offset - seg->vmaddr);
}
static void do_bind_rebase(TCCState *s1, struct macho *mo)
{
int i;
uint8_t *ptr;
ElfW(Sym) *sym;
char *name;
for (i = 0; i < mo->n_lazy_bind_rebase; i++) {
int sym_index = ELFW(R_SYM)(mo->lazy_bind_rebase[i].rel.r_info);
Section *s = s1->sections[mo->lazy_bind_rebase[i].section];
sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
name = (char *) symtab_section->link->data + sym->st_name;
if (mo->lazy_bind_rebase[i].bind) {
write32le(mo->stub_helper->data +
mo->lazy_bind_rebase[i].bind_offset,
mo->lazy_binding->data_offset);
ptr = section_ptr_add(mo->lazy_binding, 1);
set_segment_and_offset(mo, mo->la_symbol_ptr->sh_addr, ptr,
BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB,
mo->lazy_binding,
mo->la_symbol_ptr->sh_addr +
mo->lazy_bind_rebase[i].la_symbol_offset);
ptr = section_ptr_add(mo->lazy_binding, 5 + strlen(name));
*ptr++ = BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
(BIND_SPECIAL_DYLIB_FLAT_LOOKUP & 0xf);
*ptr++ = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0;
strcpy(ptr, name);
ptr += strlen(name) + 1;
*ptr++ = BIND_OPCODE_DO_BIND;
*ptr = BIND_OPCODE_DONE;
}
else {
ptr = section_ptr_add(mo->rebase, 2);
*ptr++ = REBASE_OPCODE_SET_TYPE_IMM | REBASE_TYPE_POINTER;
set_segment_and_offset(mo, s->sh_addr, ptr,
REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB,
mo->rebase,
mo->lazy_bind_rebase[i].rel.r_offset +
s->sh_addr);
ptr = section_ptr_add(mo->rebase, 1);
*ptr = REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1;
}
}
for (i = 0; i < mo->n_bind; i++) {
int sym_index = ELFW(R_SYM)(mo->bind[i].rel.r_info);
Section *s = s1->sections[mo->bind[i].section];
sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
name = (char *) symtab_section->link->data + sym->st_name;
ptr = section_ptr_add(mo->binding, 5 + strlen(name));
*ptr++ = BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
(BIND_SPECIAL_DYLIB_FLAT_LOOKUP & 0xf);
*ptr++ = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0;
strcpy(ptr, name);
ptr += strlen(name) + 1;
*ptr++ = BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER;
set_segment_and_offset(mo, s->sh_addr, ptr,
BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB,
mo->binding,
mo->bind[i].rel.r_offset + s->sh_addr);
ptr = section_ptr_add(mo->binding, 1);
*ptr++ = BIND_OPCODE_DO_BIND;
}
if (mo->rebase->data_offset) {
ptr = section_ptr_add(mo->rebase, 1);
*ptr = REBASE_OPCODE_DONE;
}
tcc_free(mo->lazy_bind_rebase);
tcc_free(mo->bind);
}
static void collect_sections(TCCState *s1, struct macho *mo)
{
int i, sk, numsec;
uint64_t curaddr, fileofs;
Section *s;
struct segment_command_64 *seg = NULL;
struct dylinker_command *dyldlc;
struct symtab_command *symlc;
struct dysymtab_command *dysymlc;
char *str;
2020-05-25 19:47:05 +03:00
seg = add_segment(mo, "__PAGEZERO");
2020-05-25 19:47:05 +03:00
seg->vmsize = (uint64_t)1 << 32;
seg = add_segment(mo, "__TEXT");
2020-05-25 19:47:05 +03:00
seg->vmaddr = (uint64_t)1 << 32;
seg->maxprot = 7; // rwx
seg->initprot = 5; // r-x
seg = add_segment(mo, "__DATA");
2020-05-25 19:47:05 +03:00
seg->vmaddr = -1;
seg->maxprot = 7; // rwx
seg->initprot = 3; // rw-
seg = add_segment(mo, "__LINKEDIT");
2020-05-25 19:47:05 +03:00
seg->vmaddr = -1;
seg->maxprot = 7; // rwx
seg->initprot = 1; // r--
mo->ep = add_lc(mo, LC_MAIN, sizeof(*mo->ep));
2020-05-25 19:47:05 +03:00
mo->ep->entryoff = 4096;
i = (sizeof(*dyldlc) + strlen("/usr/lib/dyld") + 1 + 7) &-8;
dyldlc = add_lc(mo, LC_LOAD_DYLINKER, i);
dyldlc->name = sizeof(*dyldlc);
str = (char*)dyldlc + dyldlc->name;
strcpy(str, "/usr/lib/dyld");
mo->dyldinfo = add_lc(mo, LC_DYLD_INFO_ONLY, sizeof(*mo->dyldinfo));
symlc = add_lc(mo, LC_SYMTAB, sizeof(*symlc));
dysymlc = add_lc(mo, LC_DYSYMTAB, sizeof(*dysymlc));
for(i = 0; i < s1->nb_loaded_dlls; i++) {
DLLReference *dllref = s1->loaded_dlls[i];
if (dllref->level == 0)
add_dylib(mo, dllref->name);
}
2020-05-25 19:47:05 +03:00
/* dyld requires a writable segment with classic Mach-O, but it ignores
zero-sized segments for this, so force to have some data. */
section_ptr_add(data_section, 1);
memset (mo->sk_to_sect, 0, sizeof(mo->sk_to_sect));
for (i = s1->nb_sections; i-- > 1;) {
int type, flags;
s = s1->sections[i];
type = s->sh_type;
flags = s->sh_flags;
sk = sk_unknown;
if (flags & SHF_ALLOC) {
switch (type) {
default: sk = sk_unknown; break;
case SHT_INIT_ARRAY: sk = sk_init; break;
case SHT_FINI_ARRAY: sk = sk_fini; break;
case SHT_NOBITS: sk = sk_bss; break;
case SHT_SYMTAB: sk = sk_discard; break;
case SHT_STRTAB:
if (s == stabstr_section)
sk = sk_stab_str;
else
sk = sk_discard;
break;
case SHT_RELX: sk = sk_discard; break;
case SHT_LINKEDIT: sk = sk_linkedit; break;
case SHT_PROGBITS:
if (s == mo->stubs)
sk = sk_stubs;
else if (s == mo->stub_helper)
sk = sk_stub_helper;
else if (s == s1->got)
sk = sk_nl_ptr;
else if (s == mo->la_symbol_ptr)
sk = sk_la_ptr;
else if (s == stab_section)
sk = sk_stab;
else if (s == dwarf_info_section)
sk = sk_debug_info;
else if (s == dwarf_abbrev_section)
sk = sk_debug_abbrev;
else if (s == dwarf_line_section)
sk = sk_debug_line;
else if (s == dwarf_aranges_section)
sk = sk_debug_aranges;
else if (s == dwarf_str_section)
sk = sk_debug_str;
else if (s == dwarf_line_str_section)
sk = sk_debug_line_str;
else if (flags & SHF_EXECINSTR)
sk = sk_text;
else if (flags & SHF_WRITE)
sk = sk_rw_data;
else
sk = sk_ro_data;
break;
}
} else
sk = sk_discard;
s->prev = mo->sk_to_sect[sk].s;
mo->sk_to_sect[sk].s = s;
}
fileofs = 4096; /* leave space for mach-o headers */
2020-05-25 19:47:05 +03:00
curaddr = get_segment(mo, 1)->vmaddr;
curaddr += 4096;
seg = NULL;
numsec = 0;
mo->elfsectomacho = tcc_mallocz(sizeof(*mo->elfsectomacho) * s1->nb_sections);
for (sk = sk_unknown; sk < sk_last; sk++) {
struct section_64 *sec = NULL;
#define SEG_PAGE_SIZE 16384
if (sk == sk_linkedit)
do_bind_rebase(s1, mo);
if (seg) {
seg->vmsize = (curaddr - seg->vmaddr + SEG_PAGE_SIZE - 1) & -SEG_PAGE_SIZE;
seg->filesize = (fileofs - seg->fileoff + SEG_PAGE_SIZE - 1) & -SEG_PAGE_SIZE;
curaddr = seg->vmaddr + seg->vmsize;
fileofs = seg->fileoff + seg->filesize;
}
if (skinfo[sk].seg && mo->sk_to_sect[sk].s) {
uint64_t al = 0;
int si;
2020-05-25 19:47:05 +03:00
seg = get_segment(mo, skinfo[sk].seg);
if (skinfo[sk].name) {
si = add_section(mo, &seg, skinfo[sk].name);
numsec++;
2020-05-25 19:47:05 +03:00
mo->lc[mo->seg2lc[skinfo[sk].seg]] = (struct load_command*)seg;
mo->sk_to_sect[sk].machosect = si;
sec = get_section(seg, si);
sec->flags = skinfo[sk].flags;
if (sk == sk_stubs)
#ifdef TCC_TARGET_X86_64
sec->reserved2 = 6;
#elif defined TCC_TARGET_ARM64
sec->reserved2 = 12;
#endif
if (sk == sk_nl_ptr)
sec->reserved1 = mo->nr_plt;
if (sk == sk_la_ptr)
sec->reserved1 = mo->nr_plt + mo->n_got;
}
if (seg->vmaddr == -1) {
curaddr = (curaddr + 4095) & -4096;
seg->vmaddr = curaddr;
fileofs = (fileofs + 4095) & -4096;
seg->fileoff = fileofs;
}
for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
int a = exact_log2p1(s->sh_addralign);
if (a && al < (a - 1))
al = a - 1;
s->sh_size = s->data_offset;
}
if (sec)
sec->align = al;
al = 1ULL << al;
if (al > 4096)
tcc_warning("alignment > 4096"), sec->align = 12, al = 4096;
curaddr = (curaddr + al - 1) & -al;
fileofs = (fileofs + al - 1) & -al;
if (sec) {
sec->addr = curaddr;
sec->offset = fileofs;
}
for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
al = s->sh_addralign;
curaddr = (curaddr + al - 1) & -al;
dprintf("curaddr now 0x%lx\n", (long)curaddr);
s->sh_addr = curaddr;
curaddr += s->sh_size;
if (s->sh_type != SHT_NOBITS) {
fileofs = (fileofs + al - 1) & -al;
s->sh_offset = fileofs;
fileofs += s->sh_size;
dprintf("fileofs now %ld\n", (long)fileofs);
}
if (sec)
mo->elfsectomacho[s->sh_num] = numsec;
}
if (sec)
sec->size = curaddr - sec->addr;
}
2020-05-22 07:27:37 +03:00
if (DEBUG_MACHO)
for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
int type = s->sh_type;
int flags = s->sh_flags;
printf("%d section %-16s %-10s %09lx %04x %02d %s,%s,%s\n",
2020-05-22 07:27:37 +03:00
sk,
s->name,
type == SHT_PROGBITS ? "progbits" :
type == SHT_NOBITS ? "nobits" :
type == SHT_SYMTAB ? "symtab" :
type == SHT_STRTAB ? "strtab" :
type == SHT_INIT_ARRAY ? "init" :
type == SHT_FINI_ARRAY ? "fini" :
type == SHT_RELX ? "rel" : "???",
(long)s->sh_addr,
2020-05-22 07:27:37 +03:00
(unsigned)s->data_offset,
s->sh_addralign,
flags & SHF_ALLOC ? "alloc" : "",
flags & SHF_WRITE ? "write" : "",
flags & SHF_EXECINSTR ? "exec" : ""
);
}
}
if (seg) {
seg->vmsize = curaddr - seg->vmaddr;
seg->filesize = fileofs - seg->fileoff;
}
/* Fill symtab info */
symlc->symoff = mo->symtab->sh_offset;
symlc->nsyms = mo->symtab->data_offset / sizeof(struct nlist_64);
symlc->stroff = mo->strtab->sh_offset;
symlc->strsize = mo->strtab->data_offset;
dysymlc->iundefsym = mo->iundef == -1 ? symlc->nsyms : mo->iundef;
dysymlc->iextdefsym = mo->iextdef == -1 ? dysymlc->iundefsym : mo->iextdef;
dysymlc->ilocalsym = mo->ilocal == -1 ? dysymlc->iextdefsym : mo->ilocal;
dysymlc->nlocalsym = dysymlc->iextdefsym - dysymlc->ilocalsym;
dysymlc->nextdefsym = dysymlc->iundefsym - dysymlc->iextdefsym;
dysymlc->nundefsym = symlc->nsyms - dysymlc->iundefsym;
dysymlc->indirectsymoff = mo->indirsyms->sh_offset;
dysymlc->nindirectsyms = mo->indirsyms->data_offset / sizeof(uint32_t);
if (mo->rebase->data_offset) {
mo->dyldinfo->rebase_off = mo->rebase->sh_offset;
mo->dyldinfo->rebase_size = mo->rebase->data_offset;
}
if (mo->binding->data_offset) {
mo->dyldinfo->bind_off = mo->binding->sh_offset;
mo->dyldinfo->bind_size = mo->binding->data_offset;
}
if (mo->lazy_binding->data_offset) {
mo->dyldinfo->lazy_bind_off = mo->lazy_binding->sh_offset;
mo->dyldinfo->lazy_bind_size = mo->lazy_binding->data_offset;
}
if (mo->exports->data_offset) {
mo->dyldinfo->export_off = mo->exports->sh_offset;
mo->dyldinfo->export_size = mo->exports->data_offset;
}
}
static void macho_write(TCCState *s1, struct macho *mo, FILE *fp)
{
int i, sk;
uint64_t fileofs = 0;
Section *s;
mo->mh.mh.magic = MH_MAGIC_64;
#ifdef TCC_TARGET_X86_64
mo->mh.mh.cputype = CPU_TYPE_X86_64;
mo->mh.mh.cpusubtype = CPU_SUBTYPE_LIB64 | CPU_SUBTYPE_X86_ALL;
#elif defined TCC_TARGET_ARM64
mo->mh.mh.cputype = CPU_TYPE_ARM64;
mo->mh.mh.cpusubtype = CPU_SUBTYPE_ARM64_ALL;
#endif
mo->mh.mh.filetype = MH_EXECUTE;
mo->mh.mh.flags = MH_DYLDLINK | MH_PIE;
2020-05-25 19:47:05 +03:00
mo->mh.mh.ncmds = mo->nlc;
mo->mh.mh.sizeofcmds = 0;
for (i = 0; i < mo->nlc; i++)
mo->mh.mh.sizeofcmds += mo->lc[i]->cmdsize;
fwrite(&mo->mh, 1, sizeof(mo->mh), fp);
fileofs += sizeof(mo->mh);
for (i = 0; i < mo->nlc; i++) {
fwrite(mo->lc[i], 1, mo->lc[i]->cmdsize, fp);
fileofs += mo->lc[i]->cmdsize;
}
for (sk = sk_unknown; sk < sk_last; sk++) {
//struct segment_command_64 *seg;
if (!skinfo[sk].seg || !mo->sk_to_sect[sk].s)
continue;
/*seg =*/ get_segment(mo, skinfo[sk].seg);
for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
if (s->sh_type != SHT_NOBITS) {
while (fileofs < s->sh_offset)
fputc(0, fp), fileofs++;
if (s->sh_size) {
fwrite(s->data, 1, s->sh_size, fp);
fileofs += s->sh_size;
}
}
}
}
}
ST_FUNC int macho_output_file(TCCState *s1, const char *filename)
{
int fd, mode, file_type;
FILE *fp;
int i, ret = -1;
struct macho mo;
(void)memset(&mo, 0, sizeof(mo));
file_type = s1->output_type;
if (file_type == TCC_OUTPUT_OBJ)
mode = 0666;
else
mode = 0777;
unlink(filename);
fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, mode);
if (fd < 0 || (fp = fdopen(fd, "wb")) == NULL) {
tcc_error_noabort("could not write '%s: %s'", filename, strerror(errno));
return -1;
}
if (s1->verbose)
printf("<- %s\n", filename);
tcc_add_runtime(s1);
tcc_macho_add_destructor(s1);
resolve_common_syms(s1);
create_symtab(s1, &mo);
check_relocs(s1, &mo);
ret = check_symbols(s1, &mo);
if (!ret) {
collect_sections(s1, &mo);
relocate_syms(s1, s1->symtab, 0);
mo.ep->entryoff = get_sym_addr(s1, "main", 1, 1)
- get_segment(&mo, 1)->vmaddr;
if (s1->nb_errors)
goto do_ret;
relocate_sections(s1);
convert_symbols(s1, &mo);
macho_write(s1, &mo, fp);
}
do_ret:
for (i = 0; i < mo.nlc; i++)
tcc_free(mo.lc[i]);
tcc_free(mo.lc);
tcc_free(mo.elfsectomacho);
tcc_free(mo.e2msym);
fclose(fp);
return ret;
}
2021-01-12 10:59:52 +02:00
static uint32_t macho_swap32(uint32_t x)
2020-05-25 19:47:05 +03:00
{
return (x >> 24) | (x << 24) | ((x >> 8) & 0xff00) | ((x & 0xff00) << 8);
}
2021-01-12 10:59:52 +02:00
#define SWAP(x) (swap ? macho_swap32(x) : (x))
#define tbd_parse_movepast(s) \
(pos = (pos = strstr(pos, s)) ? pos + strlen(s) : NULL)
#define tbd_parse_movetoany(cs) (pos = strpbrk(pos, cs))
#define tbd_parse_skipws while (*pos && (*pos==' '||*pos=='\n')) ++pos
#define tbd_parse_tramplequote if(*pos=='\''||*pos=='"') tbd_parse_trample
#define tbd_parse_tramplespace if(*pos==' ') tbd_parse_trample
#define tbd_parse_trample *pos++=0
#ifdef TCC_IS_NATIVE
/* Looks for the active developer SDK set by xcode-select (or the default
one set during installation.) */
ST_FUNC void tcc_add_macos_sdkpath(TCCState* s)
{
char *sdkroot = NULL, *pos = NULL;
void* xcs = dlopen("libxcselect.dylib", RTLD_GLOBAL | RTLD_LAZY);
CString path;
int (*f)(unsigned int, char**) = dlsym(xcs, "xcselect_host_sdk_path");
cstr_new(&path);
if (f) f(1, &sdkroot);
if (sdkroot)
pos = strstr(sdkroot,"SDKs/MacOSX");
if (pos)
2021-07-27 08:54:46 +03:00
cstr_printf(&path, "%.*s.sdk/usr/lib", (int)(pos - sdkroot + 11), sdkroot);
/* must use free from libc directly */
#pragma push_macro("free")
#undef free
free(sdkroot);
#pragma pop_macro("free")
if (path.size)
tcc_add_library_path(s, (char*)path.data);
else
tcc_add_library_path(s,
"/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib"
":" "/Applications/Xcode.app/Developer/SDKs/MacOSX.sdk/usr/lib"
);
cstr_free(&path);
}
ST_FUNC const char* macho_tbd_soname(const char* filename) {
char *soname, *data, *pos;
const char *ret = filename;
int fd = open(filename,O_RDONLY);
if (fd<0) return ret;
pos = data = tcc_load_text(fd);
if (!tbd_parse_movepast("install-name: ")) goto the_end;
tbd_parse_skipws;
tbd_parse_tramplequote;
soname = pos;
if (!tbd_parse_movetoany("\n \"'")) goto the_end;
tbd_parse_trample;
ret = tcc_strdup(soname);
the_end:
tcc_free(data);
return ret;
}
#endif /* TCC_IS_NATIVE */
ST_FUNC int macho_load_tbd(TCCState* s1, int fd, const char* filename, int lev)
{
2021-07-06 16:59:12 +03:00
char *soname, *data, *pos;
int ret = -1;
pos = data = tcc_load_text(fd);
if (!tbd_parse_movepast("install-name: ")) goto the_end;
tbd_parse_skipws;
tbd_parse_tramplequote;
soname = pos;
if (!tbd_parse_movetoany("\n \"'")) goto the_end;
tbd_parse_trample;
ret = 0;
if (tcc_add_dllref(s1, soname, lev)->found)
goto the_end;
while(pos) {
char* sym = NULL;
2021-07-06 16:59:12 +03:00
int cont = 1;
if (!tbd_parse_movepast("symbols: ")) break;
if (!tbd_parse_movepast("[")) break;
while (cont) {
tbd_parse_skipws;
tbd_parse_tramplequote;
sym = pos;
if (!tbd_parse_movetoany(",] \"'")) break;
tbd_parse_tramplequote;
tbd_parse_tramplespace;
tbd_parse_skipws;
if (*pos==0||*pos==']') cont=0;
tbd_parse_trample;
set_elf_sym(s1->dynsymtab_section, 0, 0,
ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE), 0, SHN_UNDEF, sym);
}
}
the_end:
tcc_free(data);
return ret;
}
ST_FUNC int macho_load_dll(TCCState * s1, int fd, const char* filename, int lev)
{
unsigned char buf[sizeof(struct mach_header_64)];
void *buf2;
uint32_t machofs = 0;
struct fat_header fh;
struct mach_header mh;
struct load_command *lc;
int i, swap = 0;
const char *soname = filename;
struct nlist_64 *symtab = 0;
uint32_t nsyms = 0;
char *strtab = 0;
uint32_t strsize = 0;
uint32_t iextdef = 0;
uint32_t nextdef = 0;
again:
if (full_read(fd, buf, sizeof(buf)) != sizeof(buf))
return -1;
memcpy(&fh, buf, sizeof(fh));
if (fh.magic == FAT_MAGIC || fh.magic == FAT_CIGAM) {
struct fat_arch *fa = load_data(fd, sizeof(fh),
fh.nfat_arch * sizeof(*fa));
swap = fh.magic == FAT_CIGAM;
for (i = 0; i < SWAP(fh.nfat_arch); i++)
#ifdef TCC_TARGET_X86_64
if (SWAP(fa[i].cputype) == CPU_TYPE_X86_64
&& SWAP(fa[i].cpusubtype) == CPU_SUBTYPE_X86_ALL)
#elif defined TCC_TARGET_ARM64
if (SWAP(fa[i].cputype) == CPU_TYPE_ARM64
&& SWAP(fa[i].cpusubtype) == CPU_SUBTYPE_ARM64_ALL)
#endif
break;
if (i == SWAP(fh.nfat_arch)) {
tcc_free(fa);
return -1;
}
machofs = SWAP(fa[i].offset);
tcc_free(fa);
lseek(fd, machofs, SEEK_SET);
goto again;
} else if (fh.magic == FAT_MAGIC_64 || fh.magic == FAT_CIGAM_64) {
tcc_warning("%s: Mach-O fat 64bit files of type 0x%x not handled",
filename, fh.magic);
return -1;
}
memcpy(&mh, buf, sizeof(mh));
if (mh.magic != MH_MAGIC_64)
return -1;
dprintf("found Mach-O at %d\n", machofs);
buf2 = load_data(fd, machofs + sizeof(struct mach_header_64), mh.sizeofcmds);
for (i = 0, lc = buf2; i < mh.ncmds; i++) {
dprintf("lc %2d: 0x%08x\n", i, lc->cmd);
2020-05-25 19:47:05 +03:00
switch (lc->cmd) {
case LC_SYMTAB:
{
struct symtab_command *sc = (struct symtab_command*)lc;
nsyms = sc->nsyms;
symtab = load_data(fd, machofs + sc->symoff, nsyms * sizeof(*symtab));
strsize = sc->strsize;
strtab = load_data(fd, machofs + sc->stroff, strsize);
2020-05-25 19:47:05 +03:00
break;
}
case LC_ID_DYLIB:
{
struct dylib_command *dc = (struct dylib_command*)lc;
soname = (char*)lc + dc->name;
dprintf(" ID_DYLIB %d 0x%x 0x%x %s\n",
dc->timestamp, dc->current_version,
dc->compatibility_version, soname);
2020-05-25 19:47:05 +03:00
break;
}
case LC_REEXPORT_DYLIB:
{
struct dylib_command *dc = (struct dylib_command*)lc;
char *name = (char*)lc + dc->name;
int subfd = open(name, O_RDONLY | O_BINARY);
dprintf(" REEXPORT %s\n", name);
if (subfd < 0)
tcc_warning("can't open %s (reexported from %s)", name, filename);
else {
/* Hopefully the REEXPORTs never form a cycle, we don't check
for that! */
macho_load_dll(s1, subfd, name, lev + 1);
close(subfd);
}
2020-05-25 19:47:05 +03:00
break;
}
case LC_DYSYMTAB:
{
struct dysymtab_command *dc = (struct dysymtab_command*)lc;
iextdef = dc->iextdefsym;
nextdef = dc->nextdefsym;
2020-05-25 19:47:05 +03:00
break;
}
}
lc = (struct load_command*) ((char*)lc + lc->cmdsize);
}
if (tcc_add_dllref(s1, soname, lev)->found)
goto the_end;
if (!nsyms || !nextdef)
tcc_warning("%s doesn't export any symbols?", filename);
//dprintf("symbols (all):\n");
dprintf("symbols (exported):\n");
dprintf(" n: typ sec desc value name\n");
//for (i = 0; i < nsyms; i++) {
for (i = iextdef; i < iextdef + nextdef; i++) {
struct nlist_64 *sym = symtab + i;
dprintf("%5d: %3d %3d 0x%04x 0x%016lx %s\n",
i, sym->n_type, sym->n_sect, sym->n_desc, (long)sym->n_value,
strtab + sym->n_strx);
set_elf_sym(s1->dynsymtab_section, 0, 0,
ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE),
0, SHN_UNDEF, strtab + sym->n_strx);
}
the_end:
tcc_free(strtab);
tcc_free(symtab);
tcc_free(buf2);
return 0;
}