Why does LC_SYMTAB have invalid stroff/strsize but only for some loaded images?

筅森魡賤 提交于 2021-02-19 03:10:23

问题


I wrote the below program to iterate over all images in memory and dump their string tables.

#include <mach-o/dyld.h>
#include <stdio.h>
#include <string.h>

int main(int argc, char** argv) {
    uint32_t count = _dyld_image_count();
    for (uint32_t i = 0 ; i < count ; i++) {
        const char* imageName = _dyld_get_image_name(i);
        printf("IMAGE[%u]=%s\n", i, imageName);
        const struct mach_header* header = _dyld_get_image_header(i);
        if (header->magic != MH_MAGIC_64)
            continue;
        struct mach_header_64* header64 = (struct mach_header_64*)header;
        char *ptr = ((void*)header64) + sizeof(struct mach_header_64);
        for (uint32_t j = 0; j < header64->ncmds; j++) {
            struct load_command *lc = (struct load_command *)ptr;
            ptr += lc->cmdsize;
            if (lc->cmd != LC_SYMTAB)
                continue;
            struct symtab_command* symtab = (struct symtab_command*)lc;
            printf("\t\tLC_SYMTAB.stroff=%u\n", symtab->stroff);
            printf("\t\tLC_SYMTAB.strsize=%u\n", symtab->strsize);
            if (symtab->strsize > 100*1024*1024) {
                printf("\t\tHUH? Don't believe string table is over 100MiB in size!\n");
                continue;
            }
            char *strtab = (((void*)header64) + symtab->stroff);
            uint32_t off = 0;
            while (off < symtab->strsize) {
                char *e = &(strtab[off]);
                if (e[0] != 0)
                    printf("\t\tSTR[%u]=\"%s\"\n", off, e);
                off += strlen(e) + 1;
            }
        }
    }
    return 0;
}

It seems to randomly work for some images, but for others the stroff/strsize have nonsensical values:

        LC_SYMTAB.stroff=1266154560
        LC_SYMTAB.strsize=143767728

It seems to always be the same two magic values, but I'm not sure if this is system-dependent in some way or if other people will get the same specific values.

If I comment out the check for strsize being over 100MiB, then printing the string table segfaults.

Most images seem to have this problem, but some don't. When I run it, I get the issue for 29 images out of 38.

I can't observe any pattern as to which do and which won't. What is going on here?

If it is relevant, I am testing on macOS 10.14.6 and compiling with Apple LLVM version 10.0.1 (clang-1001.0.46.4).


回答1:


As you already worked out, those are from the dyld_shared_cache. And the 0x80000000 flag is indeed documented, in the headers shipped with Xcode or any semi-recent XNU source:

#define MH_DYLIB_IN_CACHE 0x80000000    /* Only for use on dylibs. When this bit
                                           is set, the dylib is part of the dyld
                                           shared cache, rather than loose in
                                           the filesystem. */

As you've also discovered, the stroff/strsize values do not yield usable results when added to the dyld_shared_cache base. That is because those are not memory offsets, but file offsets. This is true for all Mach-O's, it's just often the case that the segments of non-cached binaries have the same relative position in file and memory offsets. But this is definitely not true for the shared cache.

To translate the file offset into a memory address, you'll have to parse the segments in the shared cache header. You can find struct definitions in the dyld source.




回答2:


Here's a program which prints out the contents of the string table of the dyld shared cache.

My original program in the question can be enhanced to skip dumping string table of images with MH_DYLIB_IN_CACHE set, and combined with this program to dump the shared cache string table. (All images in the shared cache share the same string table.)

#include <mach-o/dyld.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>

const void* _dyld_get_shared_cache_range(size_t* cacheLen);

struct dyld_cache_header {
        char            magic[16];
        uint32_t        mappingOffset;
        uint32_t        mappingCount;
        // Omitted remaining fields, not relevant to this task
};

struct dyld_cache_mapping_info {
    uint64_t    address;
    uint64_t    size;
    uint64_t    fileOffset;
    uint32_t    maxProt;
    uint32_t    initProt;
};

#ifndef MH_DYLIB_IN_CACHE
#       define MH_DYLIB_IN_CACHE 0x80000000
#endif

// Finds first shared cache DYLD image. Any will do, just grab the first
const struct mach_header_64* findSharedCacheDyldImage(void) {
        uint32_t count = _dyld_image_count();
        for (uint32_t i = 0 ; i < count ; i++) {
                const struct mach_header* header = _dyld_get_image_header(i);
                if (header->magic != MH_MAGIC_64)
                        continue;
                const struct mach_header_64* header64 = (const struct mach_header_64*)header;
                if (!(header64->flags & MH_DYLIB_IN_CACHE))
                        continue;
                return header64;
        }
        return NULL;
}

// Find first instance of given load command in image
const struct load_command* findFirstLoadCommand(const struct mach_header_64* header64, uint32_t cmd) {
        const char *ptr = ((void*)header64) + sizeof(struct mach_header_64);
        for (uint32_t j = 0; j < header64->ncmds; j++) {
                const struct load_command *lc = (const struct load_command *)ptr;
                ptr += lc->cmdsize;
                if (lc->cmd == cmd)
                        return lc;
        }
        return NULL;
}

// Translates a shared cache file offset to a memory address
void *translateOffset(const struct dyld_cache_header *cache, uint64_t offset) {
        const struct dyld_cache_mapping_info* mappings = (struct dyld_cache_mapping_info*)(((void*)cache) + cache->mappingOffset);
        for (uint32_t i = 0; i < cache->mappingCount; i++) {
                if (offset < mappings[i].fileOffset) continue;
                if (offset >= (mappings[i].fileOffset + mappings[i].size)) continue;
                return (void*)(mappings[i].address - mappings[0].address + (offset - mappings[i].fileOffset) + (uint64_t)cache);
        }
        return NULL;
}

int main(int argc, char** argv) {
        size_t cacheLen;
        const struct dyld_cache_header *cache = _dyld_get_shared_cache_range(&cacheLen);
        const struct mach_header_64* sharedCacheDyldImage = findSharedCacheDyldImage();
        const struct symtab_command* symtab = (const struct symtab_command*)findFirstLoadCommand(sharedCacheDyldImage,LC_SYMTAB);
        const void *stringTbl = translateOffset(cache, symtab->stroff);
        uint32_t off = 0;
        while (off < symtab->strsize) {
                const char *e = &(stringTbl[off]);
                if (e[0] != 0)
                        printf("STR[%u]=\"%s\"\n", off, e);
                off += strlen(e) + 1;
        }
        return 0;
}


来源:https://stackoverflow.com/questions/65248584/why-does-lc-symtab-have-invalid-stroff-strsize-but-only-for-some-loaded-images

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!