Mapping non-contiguous blocks from a file into contiguous memory addresses

ε祈祈猫儿з 提交于 2019-12-04 00:22:46

Your question is a little confusing. From what I understood, this code will do what you need:

#define PAGESIZE 4096

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <errno.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>

struct StoredObject
{
    int IntVal;
    char StrVal[25];
};

int main(int argc, char **argv)
{
    int fd = open("mmapfile", O_RDWR | O_CREAT | O_TRUNC, (mode_t) 0600);
    //Set the file to the size of our data (2 pages)
    lseek(fd, PAGESIZE*2 - 1, SEEK_SET);
    write(fd, "", 1); //The final byte

    unsigned char *mapPtr = (unsigned char *) mmap(0, PAGESIZE * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

    struct StoredObject controlObject;
    controlObject.IntVal = 12;
    strcpy(controlObject.StrVal, "Mary had a little lamb.\n");

    struct StoredObject *mary1;
    mary1 = (struct StoredObject *)(mapPtr + PAGESIZE - 4); //Will fall on the boundary between first and second page
    memcpy(mary1, &controlObject, sizeof(StoredObject));

    printf("%d, %s", mary1->IntVal, mary1->StrVal);
    //Should print "12, Mary had a little lamb.\n"

    struct StoredObject *john1;
    john1 = mary1 + 1; //Comes immediately after mary1 in memory; will start and end in the second page
    memcpy(john1, &controlObject, sizeof(StoredObject));

    john1->IntVal = 42;
    strcpy(john1->StrVal, "John had a little lamb.\n");

    printf("%d, %s", john1->IntVal, john1->StrVal);
    //Should print "12, Mary had a little lamb.\n"

    //Make sure the data's on the disk, as this is the initial, "read-only" data
    msync(mapPtr, PAGESIZE * 2, MS_SYNC);

    //This is the inital data set, now in memory, loaded across two pages
    //At this point, someone could be reading from there. We don't know or care.
    //We want to modify john1, but don't want to write over the existing data
    //Easy as pie.

    //This is the shadow map. COW-like optimization will take place: 
    //we'll map the entire address space from the shared source, then overlap with a new map to modify
    //This is mapped anywhere, letting the system decide what address we'll be using for the new data pointer
    unsigned char *mapPtr2 = (unsigned char *) mmap(0, PAGESIZE * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

    //Map the second page on top of the first mapping; this is the one that we're modifying. It is *not* backed by disk
    unsigned char *temp = (unsigned char *) mmap(mapPtr2 + PAGESIZE, PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_ANON, 0, 0);
    if (temp == MAP_FAILED)
    {
        printf("Fixed map failed. %s", strerror(errno));
    }
    assert(temp == mapPtr2 + PAGESIZE);

    //Make a copy of the old data that will later be changed
    memcpy(mapPtr2 + PAGESIZE, mapPtr + PAGESIZE, PAGESIZE);

    //The two address spaces should still be identical until this point
    assert(memcmp(mapPtr, mapPtr2, PAGESIZE * 2) == 0);

    //We can now make our changes to the second page as needed
    struct StoredObject *mary2 = (struct StoredObject *)(((unsigned char *)mary1 - mapPtr) + mapPtr2);
    struct StoredObject *john2 = (struct StoredObject *)(((unsigned char *)john1 - mapPtr) + mapPtr2);

    john2->IntVal = 52;
    strcpy(john2->StrVal, "Mike had a little lamb.\n");

    //Test that everything worked OK
    assert(memcmp(mary1, mary2, sizeof(struct StoredObject)) == 0);
    printf("%d, %s", john2->IntVal, john2->StrVal);
    //Should print "52, Mike had a little lamb.\n"

    //Now assume our garbage collection routine has detected that no one is using the original copy of the data
    munmap(mapPtr, PAGESIZE * 2);

    mapPtr = mapPtr2;

    //Now we're done with all our work and want to completely clean up
    munmap(mapPtr2, PAGESIZE * 2);

    close(fd);

    return 0;
}

My modified answer should address your safety concerns. Only use MAP_FIXED on the second mmap call (like I have above). The cool thing about MAP_FIXED is that it lets you overwrite an existing mmap address section. It'll unload the range you're overlapping and replace it with your new mapped content:

 MAP_FIXED
              [...] If the memory
              region specified by addr and len overlaps pages of any existing
              mapping(s), then the overlapped part of the existing mapping(s) will be
              discarded. [...]

This way, you let the OS take care of finding a contiguous memory block of hundreds of megs for you (never call MAP_FIXED on address you don't know for sure isn't available). Then you call MAP_FIXED on a subsection of that now-mapped huge space with the data that you will be modifying. Tada.


On Windows, something like this should work (I'm on a Mac at the moment, so untested):

int main(int argc, char **argv)
{
    HANDLE hFile = CreateFile(L"mmapfile", GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
    //Set the file to the size of our data (2 pages)
    SetFilePointer(hFile, PAGESIZE*2 - 1, 0, FILE_BEGIN);
    DWORD bytesWritten = -1;
    WriteFile(hFile, "", 1, &bytesWritten, NULL);

    HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, PAGESIZE * 2, NULL);
    unsigned char *mapPtr = (unsigned char *) MapViewOfFile(hMap, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, PAGESIZE * 2);

    struct StoredObject controlObject;
    controlObject.IntVal = 12;
    strcpy(controlObject.StrVal, "Mary had a little lamb.\n");

    struct StoredObject *mary1;
    mary1 = (struct StoredObject *)(mapPtr + PAGESIZE - 4); //Will fall on the boundary between first and second page
    memcpy(mary1, &controlObject, sizeof(StoredObject));

    printf("%d, %s", mary1->IntVal, mary1->StrVal);
    //Should print "12, Mary had a little lamb.\n"

    struct StoredObject *john1;
    john1 = mary1 + 1; //Comes immediately after mary1 in memory; will start and end in the second page
    memcpy(john1, &controlObject, sizeof(StoredObject));

    john1->IntVal = 42;
    strcpy(john1->StrVal, "John had a little lamb.\n");

    printf("%d, %s", john1->IntVal, john1->StrVal);
    //Should print "12, Mary had a little lamb.\n"

    //Make sure the data's on the disk, as this is the initial, "read-only" data
    //msync(mapPtr, PAGESIZE * 2, MS_SYNC);

    //This is the inital data set, now in memory, loaded across two pages
    //At this point, someone could be reading from there. We don't know or care.
    //We want to modify john1, but don't want to write over the existing data
    //Easy as pie.

    //This is the shadow map. COW-like optimization will take place: 
    //we'll map the entire address space from the shared source, then overlap with a new map to modify
    //This is mapped anywhere, letting the system decide what address we'll be using for the new data pointer
    unsigned char *reservedMem = (unsigned char *) VirtualAlloc(NULL, PAGESIZE * 2, MEM_RESERVE, PAGE_READWRITE);
    HANDLE hMap2 = CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, PAGESIZE, NULL);
    unsigned char *mapPtr2 = (unsigned char *) MapViewOfFileEx(hMap2, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, PAGESIZE, reservedMem);

    //Map the second page on top of the first mapping; this is the one that we're modifying. It is *not* backed by disk
    unsigned char *temp = (unsigned char *) MapViewOfFileEx(hMap2, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, PAGESIZE, reservedMem + PAGESIZE);
    if (temp == NULL)
    {
        printf("Fixed map failed. 0x%x\n", GetLastError());
        return -1;
    }
    assert(temp == mapPtr2 + PAGESIZE);

    //Make a copy of the old data that will later be changed
    memcpy(mapPtr2 + PAGESIZE, mapPtr + PAGESIZE, PAGESIZE);

    //The two address spaces should still be identical until this point
    assert(memcmp(mapPtr, mapPtr2, PAGESIZE * 2) == 0);

    //We can now make our changes to the second page as needed
    struct StoredObject *mary2 = (struct StoredObject *)(((unsigned char *)mary1 - mapPtr) + mapPtr2);
    struct StoredObject *john2 = (struct StoredObject *)(((unsigned char *)john1 - mapPtr) + mapPtr2);

    john2->IntVal = 52;
    strcpy(john2->StrVal, "Mike had a little lamb.\n");

    //Test that everything worked OK
    assert(memcmp(mary1, mary2, sizeof(struct StoredObject)) == 0);
    printf("%d, %s", john2->IntVal, john2->StrVal);
    //Should print "52, Mike had a little lamb.\n"

    //Now assume our garbage collection routine has detected that no one is using the original copy of the data
    //munmap(mapPtr, PAGESIZE * 2);

    mapPtr = mapPtr2;

    //Now we're done with all our work and want to completely clean up
    //munmap(mapPtr2, PAGESIZE * 2);

    //close(fd);

    return 0;
}

but I'm unclear how I should reserve address space in order to do this safely

That's going to vary by OS, but a little digging on msdn for mmap (I started with "xp mmap" on the msdn search) shows Microsoft have their usual VerboseAndHelpfullyCapitalizedNames for (the many) functions that implement pieces of mmap. Both the file- and anonymous- mappers can handle fixed-address requests just the same as any POSIX-2001 system can, i.e. if something else in your address space is talking to the kernel, you get to sort it out. No way I'm going to touch "safely", there's no such thing as "safely" with code you're wanting to port to unspecified platforms. You're going to have to build your own pool of pre-mapped anonymous memory that you can unmap and parcel out later under your own control.

I tested the windows code from @Mahmoud, well actually I tested the following similar code, and it doesn't work (the Linux code works.) If you uncomment VirtualFree, it will work. As mentioned in my comment above, on windows you can reserve the address space with VirtualAlloc, but you can't use MapViewOfFileEx with an already mapped address, so you need to VirtualFree it first. Then there's a race condition where another thread can grab the memory address before you do, so you have to do everything in a loop, e.g. try up to 1000 times and then give up.

package main

import (
    "fmt"
    "os"
    "syscall"
)

func main() {
    const size = 1024 * 1024

    file, err := os.Create("foo.dat")
    if err != nil {
        panic(err)
    }

    if err := file.Truncate(size); err != nil {
        panic(err)
    }

    const MEM_COMMIT = 0x1000

    addr, err := virtualAlloc(0, size, MEM_COMMIT, protReadWrite)
    if err != nil {
        panic(err)
    }

    fd, err := syscall.CreateFileMapping(
        syscall.Handle(file.Fd()),
        nil,
        uint32(protReadWrite),
        0,
        uint32(size),
        nil,
    )

    //if err := virtualFree(addr); err != nil {
    //  panic(err)
    //}

    base, err := mapViewOfFileEx(fd, syscall.FILE_MAP_READ|syscall.FILE_MAP_WRITE, 0, 0, size, addr)
    if base == 0 {
        panic("mapViewOfFileEx returned 0")
    }
    if err != nil {
        panic(err)
    }

    fmt.Println("success!")
}

type memProtect uint32

const (
    protReadOnly  memProtect = 0x02
    protReadWrite memProtect = 0x04
    protExecute   memProtect = 0x20
    protAll       memProtect = 0x40
)

var (
    modkernel32         = syscall.MustLoadDLL("kernel32.dll")
    procMapViewOfFileEx = modkernel32.MustFindProc("MapViewOfFileEx")
    procVirtualAlloc    = modkernel32.MustFindProc("VirtualAlloc")
    procVirtualFree     = modkernel32.MustFindProc("VirtualFree")
    procVirtualProtect  = modkernel32.MustFindProc("VirtualProtect")
)

func mapViewOfFileEx(handle syscall.Handle, prot memProtect, offsetHigh uint32, offsetLow uint32, length uintptr, target uintptr) (addr uintptr, err error) {
    r0, _, e1 := syscall.Syscall6(procMapViewOfFileEx.Addr(), 6, uintptr(handle), uintptr(prot), uintptr(offsetHigh), uintptr(offsetLow), length, target)
    addr = uintptr(r0)
    if addr == 0 {
        if e1 != 0 {
            err = error(e1)
        } else {
            err = syscall.EINVAL
        }
    }
    return addr, nil
}

func virtualAlloc(addr, size uintptr, allocType uint32, prot memProtect) (mem uintptr, err error) {
    r0, _, e1 := syscall.Syscall6(procVirtualAlloc.Addr(), 4, addr, size, uintptr(allocType), uintptr(prot), 0, 0)
    mem = uintptr(r0)
    if e1 != 0 {
        return 0, error(e1)
    }
    return mem, nil
}

func virtualFree(addr uintptr) error {
    const MEM_RELEASE = 0x8000
    _, _, e1 := syscall.Syscall(procVirtualFree.Addr(), 3, addr, 0, MEM_RELEASE)
    if e1 != 0 {
        return error(e1)
    }
    return nil
}
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!