可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
Let's say the buffer is allocated using a page based scheme. One way to implement mmap would be to use remap_pfn_range but LDD3 says this does not work for conventional memory. It appears we can work around this by marking the page(s) reserved using SetPageReserved so that it gets locked in memory. But isn't all kernel memory already non-swappable i.e. already reserved? Why the need to set the reserved bit explicitly?
Does this have something to do with pages allocated from HIGH_MEM?
回答1:
The simplest way to map a set of pages from the kernel in your mmap method is to use the fault handler to map the pages. Basically you end up with something like:
static int my_mmap(struct file *filp, struct vm_area_struct *vma) { vma->vm_ops = &my_vm_ops; return 0; } static const struct file_operations my_fops = { .owner = THIS_MODULE, .open = nonseekable_open, .mmap = my_mmap, .llseek = no_llseek, };
(where the other file operations are whatever your module needs). Also in my_mmap
you do whatever range checking etc. is needed to validate the mmap parameters.
Then the vm_ops
look like:
static int my_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { vmf->page = my_page_at_index(vmf->pgoff); get_page(vmf->page); return 0; } static const struct vm_operations_struct my_vm_ops = { .fault = my_fault }
where you just need to figure out for a given vma / vmf passed to your fault function which page to map into userspace. This depends on exactly how your module works. For example, if you did
my_buf = vmalloc_user(MY_BUF_SIZE);
then the page you use would be something like
vmalloc_to_page(my_buf + (vmf->pgoff
But you could easily create an array and allocate a page for each entry, use kmalloc, whatever.
[just noticed that my_fault
is a slightly amusing name for a function]
回答2:
Minimal runnable example and userland test
Kernel module:
#include /* copy_from_user */ #include #include #include #include /* min */ #include #include #include #include static const char *filename = "lkmc_mmap"; enum { BUFFER_SIZE = 4 }; struct mmap_info { char *data; }; /* After unmap. */ static void vm_close(struct vm_area_struct *vma) { pr_info("vm_close\n"); } /* First page access. */ static int vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page; struct mmap_info *info; pr_info("vm_fault\n"); info = (struct mmap_info *)vma->vm_private_data; if (info->data) { page = virt_to_page(info->data); get_page(page); vmf->page = page; } return 0; } /* Aftr mmap. TODO vs mmap, when can this happen at a different time than mmap? */ static void vm_open(struct vm_area_struct *vma) { pr_info("vm_open\n"); } static struct vm_operations_struct vm_ops = { .close = vm_close, .fault = vm_fault, .open = vm_open, }; static int mmap(struct file *filp, struct vm_area_struct *vma) { pr_info("mmap\n"); vma->vm_ops = &vm_ops; vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = filp->private_data; vm_open(vma); return 0; } static int open(struct inode *inode, struct file *filp) { struct mmap_info *info; pr_info("open\n"); info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL); pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info)); info->data = (char *)get_zeroed_page(GFP_KERNEL); memcpy(info->data, "asdf", BUFFER_SIZE); filp->private_data = info; return 0; } static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off) { struct mmap_info *info; int ret; pr_info("read\n"); info = filp->private_data; ret = min(len, (size_t)BUFFER_SIZE); if (copy_to_user(buf, info->data, ret)) { ret = -EFAULT; } return ret; } static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off) { struct mmap_info *info; pr_info("write\n"); info = filp->private_data; if (copy_from_user(info->data, buf, min(len, (size_t)BUFFER_SIZE))) { return -EFAULT; } else { return len; } } static int release(struct inode *inode, struct file *filp) { struct mmap_info *info; pr_info("release\n"); info = filp->private_data; free_page((unsigned long)info->data); kfree(info); filp->private_data = NULL; return 0; } static const struct file_operations fops = { .mmap = mmap, .open = open, .release = release, .read = read, .write = write, }; static int myinit(void) { proc_create(filename, 0, NULL, &fops); return 0; } static void myexit(void) { remove_proc_entry(filename, NULL); } module_init(myinit) module_exit(myexit) MODULE_LICENSE("GPL");
Userland test:
#define _XOPEN_SOURCE 700 #include #include #include #include #include /* uintmax_t */ #include #include #include /* sysconf */ #include "common.h" /* virt_to_phys_user */ enum { BUFFER_SIZE = 4 }; int main(int argc, char **argv) { int fd; long page_size; char *address1, *address2; char buf[BUFFER_SIZE]; uintptr_t paddr; if (argc \n", argv[0]); return EXIT_FAILURE; } page_size = sysconf(_SC_PAGE_SIZE); printf("open pathname = %s\n", argv[1]); fd = open(argv[1], O_RDWR | O_SYNC); if (fd
回答3:
Though the pages are reserved via a kernel driver, it is meant to be accessed via user space. As a result, the PTE (page table entries) do not know if the pfn belongs to user space or kernel space (even though they are allocated via kernel driver).
This is why they are marked with SetPageReserved
.