From: Alban Crequy - PROCESS_VM_PIDFD: refer to the remote process via PID file descriptor instead of PID. Such a file descriptor can be obtained with pidfd_open(2). - PROCESS_VM_NOWAIT: do not block on IO if the memory access causes a page fault. If a given flag is unsupported, the syscall returns the error EINVAL without checking the buffers. This gives a way to userspace to detect whether the current kernel supports a specific flag: process_vm_readv(pid, NULL, 1, NULL, 1, PROCESS_VM_PIDFD) -> EINVAL if the kernel does not support the flag PROCESS_VM_PIDFD (before this patch) -> EFAULT if the kernel supports the flag (after this patch) Signed-off-by: Alban Crequy --- MAINTAINERS | 1 + include/uapi/linux/process_vm.h | 9 +++++++++ mm/process_vm_access.c | 20 +++++++++++++++----- 3 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 include/uapi/linux/process_vm.h diff --git a/MAINTAINERS b/MAINTAINERS index e64b94e6b5a9..91b4647cf761 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16272,6 +16272,7 @@ F: include/linux/pgtable.h F: include/linux/ptdump.h F: include/linux/vmpressure.h F: include/linux/vmstat.h +F: include/uapi/linux/process_vm.h F: kernel/fork.c F: mm/Kconfig F: mm/debug.c diff --git a/include/uapi/linux/process_vm.h b/include/uapi/linux/process_vm.h new file mode 100644 index 000000000000..4168e09f3f4e --- /dev/null +++ b/include/uapi/linux/process_vm.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_PROCESS_VM_H +#define _UAPI_LINUX_PROCESS_VM_H + +/* Flags for process_vm_readv/process_vm_writev */ +#define PROCESS_VM_PIDFD (1UL << 0) +#define PROCESS_VM_NOWAIT (1UL << 1) + +#endif /* _UAPI_LINUX_PROCESS_VM_H */ diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 656d3e88755b..b5eac870ef24 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -14,6 +14,7 @@ #include #include #include +#include /** * process_vm_rw_pages - read/write pages from task specified @@ -68,6 +69,7 @@ static int process_vm_rw_pages(struct page **pages, * @mm: mm for task * @task: task to read/write from * @vm_write: 0 means copy from, 1 means copy to + * @pvm_flags: PROCESS_VM_* flags * Returns 0 on success or on failure error code */ static int process_vm_rw_single_vec(unsigned long addr, @@ -76,7 +78,8 @@ static int process_vm_rw_single_vec(unsigned long addr, struct page **process_pages, struct mm_struct *mm, struct task_struct *task, - int vm_write) + int vm_write, + unsigned int pvm_flags) { unsigned long pa = addr & PAGE_MASK; unsigned long start_offset = addr - pa; @@ -91,6 +94,8 @@ static int process_vm_rw_single_vec(unsigned long addr, if (vm_write) flags |= FOLL_WRITE; + if (pvm_flags & PROCESS_VM_NOWAIT) + flags |= FOLL_NOWAIT; while (!rc && nr_pages && iov_iter_count(iter)) { int pinned_pages = min_t(unsigned long, nr_pages, PVM_MAX_USER_PAGES); @@ -141,7 +146,7 @@ static int process_vm_rw_single_vec(unsigned long addr, * @iter: where to copy to/from locally * @rvec: iovec array specifying where to copy to/from in the other process * @riovcnt: size of rvec array - * @flags: currently unused + * @flags: process_vm_readv/writev flags * @vm_write: 0 if reading from other process, 1 if writing to other process * * Returns the number of bytes read/written or error code. May @@ -163,6 +168,7 @@ static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter, unsigned long nr_pages_iov; ssize_t iov_len; size_t total_len = iov_iter_count(iter); + unsigned int f_flags; /* * Work out how many pages of struct pages we're going to need @@ -194,7 +200,11 @@ static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter, } /* Get process information */ - task = find_get_task_by_vpid(pid); + if (flags & PROCESS_VM_PIDFD) + task = pidfd_get_task(pid, &f_flags); + else + task = find_get_task_by_vpid(pid); + if (!task) { rc = -ESRCH; goto free_proc_pages; @@ -215,7 +225,7 @@ static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter, for (i = 0; i < riovcnt && iov_iter_count(iter) && !rc; i++) rc = process_vm_rw_single_vec( (unsigned long)rvec[i].iov_base, rvec[i].iov_len, - iter, process_pages, mm, task, vm_write); + iter, process_pages, mm, task, vm_write, flags); /* copied = space before - space after */ total_len -= iov_iter_count(iter); @@ -266,7 +276,7 @@ static ssize_t process_vm_rw(pid_t pid, ssize_t rc; int dir = vm_write ? ITER_SOURCE : ITER_DEST; - if (flags != 0) + if (flags & ~(PROCESS_VM_NOWAIT | PROCESS_VM_PIDFD)) return -EINVAL; /* Check iovecs */ -- 2.45.0