From: Josh Poimboeuf In preparation for unwinding user space stacks with sframe, add basic sframe compile infrastructure and support for reading the .sframe section header. sframe_add_section() reads the header and unconditionally returns an error, so it's not very useful yet. A subsequent patch will improve that. Link: https://lore.kernel.org/all/f27e8463783febfa0dabb0432a3dd6be8ad98412.1737511963.git.jpoimboe@kernel.org/ [ Jens Remus: Add support for PC-relative FDE function start address. Cleanup includes and indentation. ] Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- Notes (jremus): Changes in v12: - Move include of linux/unwind_user_types.h to "unwind_user/sframe: Add support for reading .sframe contents". - Move include of linux/mm_types.h and variable sframe_mt to "unwind_user/sframe: Store sframe section data in per-mm maple tree". - Fix indentation of struct sframe_fde field padding. Changes in v11: - Support for SFrame V2 PC-relative FDE function start address. MAINTAINERS | 1 + arch/Kconfig | 3 + include/linux/sframe.h | 37 +++++++++++ kernel/unwind/Makefile | 3 +- kernel/unwind/sframe.c | 136 +++++++++++++++++++++++++++++++++++++++++ kernel/unwind/sframe.h | 72 ++++++++++++++++++++++ 6 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 include/linux/sframe.h create mode 100644 kernel/unwind/sframe.c create mode 100644 kernel/unwind/sframe.h diff --git a/MAINTAINERS b/MAINTAINERS index b0569f6fc48f..eb65bf7bd10d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -26811,6 +26811,7 @@ USERSPACE STACK UNWINDING M: Josh Poimboeuf M: Steven Rostedt S: Maintained +F: include/linux/sframe.h F: include/linux/unwind*.h F: kernel/unwind/ diff --git a/arch/Kconfig b/arch/Kconfig index 61130b88964b..cdb0e72773be 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -489,6 +489,9 @@ config HAVE_UNWIND_USER_FP bool select UNWIND_USER +config HAVE_UNWIND_USER_SFRAME + bool + config HAVE_PERF_REGS bool help diff --git a/include/linux/sframe.h b/include/linux/sframe.h new file mode 100644 index 000000000000..0642595534f9 --- /dev/null +++ b/include/linux/sframe.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SFRAME_H +#define _LINUX_SFRAME_H + +#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME + +struct sframe_section { + unsigned long sframe_start; + unsigned long sframe_end; + unsigned long text_start; + unsigned long text_end; + + unsigned long fdes_start; + unsigned long fres_start; + unsigned long fres_end; + unsigned int num_fdes; + + signed char ra_off; + signed char fp_off; +}; + +extern int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, + unsigned long text_start, unsigned long text_end); +extern int sframe_remove_section(unsigned long sframe_addr); + +#else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */ + +static inline int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, + unsigned long text_start, unsigned long text_end) +{ + return -ENOSYS; +} +static inline int sframe_remove_section(unsigned long sframe_addr) { return -ENOSYS; } + +#endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */ + +#endif /* _LINUX_SFRAME_H */ diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile index eae37bea54fd..146038165865 100644 --- a/kernel/unwind/Makefile +++ b/kernel/unwind/Makefile @@ -1 +1,2 @@ - obj-$(CONFIG_UNWIND_USER) += user.o deferred.o + obj-$(CONFIG_UNWIND_USER) += user.o deferred.o + obj-$(CONFIG_HAVE_UNWIND_USER_SFRAME) += sframe.o diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c new file mode 100644 index 000000000000..26bb16f76a8d --- /dev/null +++ b/kernel/unwind/sframe.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Userspace sframe access functions + */ + +#define pr_fmt(fmt) "sframe: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sframe.h" + +#define dbg(fmt, ...) \ + pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) + +static void free_section(struct sframe_section *sec) +{ + kfree(sec); +} + +static int sframe_read_header(struct sframe_section *sec) +{ + unsigned long header_end, fdes_start, fdes_end, fres_start, fres_end; + struct sframe_header shdr; + unsigned int num_fdes; + + if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))) { + dbg("header usercopy failed\n"); + return -EFAULT; + } + + if (shdr.preamble.magic != SFRAME_MAGIC || + shdr.preamble.version != SFRAME_VERSION_2 || + !(shdr.preamble.flags & SFRAME_F_FDE_SORTED) || + !(shdr.preamble.flags & SFRAME_F_FDE_FUNC_START_PCREL) || + shdr.auxhdr_len) { + dbg("bad/unsupported sframe header\n"); + return -EINVAL; + } + + if (!shdr.num_fdes || !shdr.num_fres) { + dbg("no fde/fre entries\n"); + return -EINVAL; + } + + header_end = sec->sframe_start + SFRAME_HEADER_SIZE(shdr); + if (header_end >= sec->sframe_end) { + dbg("header doesn't fit in section\n"); + return -EINVAL; + } + + num_fdes = shdr.num_fdes; + fdes_start = header_end + shdr.fdes_off; + fdes_end = fdes_start + (num_fdes * sizeof(struct sframe_fde)); + + fres_start = header_end + shdr.fres_off; + fres_end = fres_start + shdr.fre_len; + + if (fres_start < fdes_end || fres_end > sec->sframe_end) { + dbg("inconsistent fde/fre offsets\n"); + return -EINVAL; + } + + sec->num_fdes = num_fdes; + sec->fdes_start = fdes_start; + sec->fres_start = fres_start; + sec->fres_end = fres_end; + + sec->ra_off = shdr.cfa_fixed_ra_offset; + sec->fp_off = shdr.cfa_fixed_fp_offset; + + return 0; +} + +int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, + unsigned long text_start, unsigned long text_end) +{ + struct vm_area_struct *sframe_vma, *text_vma; + struct mm_struct *mm = current->mm; + struct sframe_section *sec; + int ret; + + if (!sframe_start || !sframe_end || !text_start || !text_end) { + dbg("zero-length sframe/text address\n"); + return -EINVAL; + } + + scoped_guard(mmap_read_lock, mm) { + sframe_vma = vma_lookup(mm, sframe_start); + if (!sframe_vma || sframe_end > sframe_vma->vm_end) { + dbg("bad sframe address (0x%lx - 0x%lx)\n", + sframe_start, sframe_end); + return -EINVAL; + } + + text_vma = vma_lookup(mm, text_start); + if (!text_vma || + !(text_vma->vm_flags & VM_EXEC) || + text_end > text_vma->vm_end) { + dbg("bad text address (0x%lx - 0x%lx)\n", + text_start, text_end); + return -EINVAL; + } + } + + sec = kzalloc(sizeof(*sec), GFP_KERNEL); + if (!sec) + return -ENOMEM; + + sec->sframe_start = sframe_start; + sec->sframe_end = sframe_end; + sec->text_start = text_start; + sec->text_end = text_end; + + ret = sframe_read_header(sec); + if (ret) + goto err_free; + + /* TODO nowhere to store it yet - just free it and return an error */ + ret = -ENOSYS; + +err_free: + free_section(sec); + return ret; +} + +int sframe_remove_section(unsigned long sframe_start) +{ + return -ENOSYS; +} diff --git a/kernel/unwind/sframe.h b/kernel/unwind/sframe.h new file mode 100644 index 000000000000..69ce0d5b9694 --- /dev/null +++ b/kernel/unwind/sframe.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * From https://www.sourceware.org/binutils/docs/sframe-spec.html + */ +#ifndef _SFRAME_H +#define _SFRAME_H + +#include + +#define SFRAME_VERSION_1 1 +#define SFRAME_VERSION_2 2 +#define SFRAME_MAGIC 0xdee2 + +#define SFRAME_F_FDE_SORTED 0x1 +#define SFRAME_F_FRAME_POINTER 0x2 +#define SFRAME_F_FDE_FUNC_START_PCREL 0x4 + +#define SFRAME_ABI_AARCH64_ENDIAN_BIG 1 +#define SFRAME_ABI_AARCH64_ENDIAN_LITTLE 2 +#define SFRAME_ABI_AMD64_ENDIAN_LITTLE 3 + +#define SFRAME_FDE_TYPE_PCINC 0 +#define SFRAME_FDE_TYPE_PCMASK 1 + +struct sframe_preamble { + u16 magic; + u8 version; + u8 flags; +} __packed; + +struct sframe_header { + struct sframe_preamble preamble; + u8 abi_arch; + s8 cfa_fixed_fp_offset; + s8 cfa_fixed_ra_offset; + u8 auxhdr_len; + u32 num_fdes; + u32 num_fres; + u32 fre_len; + u32 fdes_off; + u32 fres_off; +} __packed; + +#define SFRAME_HEADER_SIZE(header) \ + ((sizeof(struct sframe_header) + header.auxhdr_len)) + +#define SFRAME_AARCH64_PAUTH_KEY_A 0 +#define SFRAME_AARCH64_PAUTH_KEY_B 1 + +struct sframe_fde { + s32 start_addr; + u32 func_size; + u32 fres_off; + u32 fres_num; + u8 info; + u8 rep_size; + u16 padding; +} __packed; + +#define SFRAME_FUNC_FRE_TYPE(data) (data & 0xf) +#define SFRAME_FUNC_FDE_TYPE(data) ((data >> 4) & 0x1) +#define SFRAME_FUNC_PAUTH_KEY(data) ((data >> 5) & 0x1) + +#define SFRAME_BASE_REG_FP 0 +#define SFRAME_BASE_REG_SP 1 + +#define SFRAME_FRE_CFA_BASE_REG_ID(data) (data & 0x1) +#define SFRAME_FRE_OFFSET_COUNT(data) ((data >> 1) & 0xf) +#define SFRAME_FRE_OFFSET_SIZE(data) ((data >> 5) & 0x3) +#define SFRAME_FRE_MANGLED_RA_P(data) ((data >> 7) & 0x1) + +#endif /* _SFRAME_H */ -- 2.48.1 From: Josh Poimboeuf Associate an .sframe section with its mm by adding it to a per-mm maple tree which is indexed by the corresponding text address range. A single .sframe section can be associated with multiple text ranges. [ Jens Remus: Minor cleanups. Reword commit subject/message. ] Cc: Thomas Gleixner Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Andrew Morton Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: "Liam R. Howlett" Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: x86@kernel.org Cc: linux-mm@kvack.org Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- Notes (jremus): Changes in v12: - Add include of linux/mm_types.h and variable sframe_mt from "unwind_user/sframe: Add support for reading .sframe headers". - Reword commit subject/message to consistently use ".sframe" when referring to the section. arch/x86/include/asm/mmu.h | 2 +- include/linux/mm_types.h | 3 ++ include/linux/sframe.h | 15 ++++++++++ kernel/fork.c | 10 +++++++ kernel/unwind/sframe.c | 56 ++++++++++++++++++++++++++++++++++++-- mm/init-mm.c | 2 ++ 6 files changed, 84 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 0fe9c569d171..227a32899a59 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -87,7 +87,7 @@ typedef struct { .context = { \ .ctx_id = 1, \ .lock = __MUTEX_INITIALIZER(mm.context.lock), \ - } + }, void leave_mm(void); #define leave_mm leave_mm diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 90e5790c318f..01e2cffbd84e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1227,6 +1227,9 @@ struct mm_struct { #ifdef CONFIG_MM_ID mm_id_t mm_id; #endif /* CONFIG_MM_ID */ +#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME + struct maple_tree sframe_mt; +#endif } __randomize_layout; /* diff --git a/include/linux/sframe.h b/include/linux/sframe.h index 0642595534f9..7ea6a97ed8af 100644 --- a/include/linux/sframe.h +++ b/include/linux/sframe.h @@ -2,6 +2,8 @@ #ifndef _LINUX_SFRAME_H #define _LINUX_SFRAME_H +#include + #ifdef CONFIG_HAVE_UNWIND_USER_SFRAME struct sframe_section { @@ -19,18 +21,31 @@ struct sframe_section { signed char fp_off; }; +#define INIT_MM_SFRAME .sframe_mt = MTREE_INIT(sframe_mt, 0), +extern void sframe_free_mm(struct mm_struct *mm); + extern int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, unsigned long text_start, unsigned long text_end); extern int sframe_remove_section(unsigned long sframe_addr); +static inline bool current_has_sframe(void) +{ + struct mm_struct *mm = current->mm; + + return mm && !mtree_empty(&mm->sframe_mt); +} + #else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */ +#define INIT_MM_SFRAME +static inline void sframe_free_mm(struct mm_struct *mm) {} static inline int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, unsigned long text_start, unsigned long text_end) { return -ENOSYS; } static inline int sframe_remove_section(unsigned long sframe_addr) { return -ENOSYS; } +static inline bool current_has_sframe(void) { return false; } #endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */ diff --git a/kernel/fork.c b/kernel/fork.c index 3da0f08615a9..15c24411dd7d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,7 @@ #include #include #include +#include #include #include @@ -689,6 +690,7 @@ void __mmdrop(struct mm_struct *mm) mm_pasid_drop(mm); mm_destroy_cid(mm); percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS); + sframe_free_mm(mm); free_mm(mm); } @@ -1027,6 +1029,13 @@ static void mmap_init_lock(struct mm_struct *mm) #endif } +static void mm_init_sframe(struct mm_struct *mm) +{ +#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME + mt_init(&mm->sframe_mt); +#endif +} + static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, struct user_namespace *user_ns) { @@ -1055,6 +1064,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm->pmd_huge_pte = NULL; #endif mm_init_uprobes_state(mm); + mm_init_sframe(mm); hugetlb_count_init(mm); mm_flags_clear_all(mm); diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 26bb16f76a8d..149ce70e4229 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -81,6 +81,7 @@ static int sframe_read_header(struct sframe_section *sec) int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, unsigned long text_start, unsigned long text_end) { + struct maple_tree *sframe_mt = ¤t->mm->sframe_mt; struct vm_area_struct *sframe_vma, *text_vma; struct mm_struct *mm = current->mm; struct sframe_section *sec; @@ -122,15 +123,64 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, if (ret) goto err_free; - /* TODO nowhere to store it yet - just free it and return an error */ - ret = -ENOSYS; + ret = mtree_insert_range(sframe_mt, sec->text_start, sec->text_end, sec, GFP_KERNEL); + if (ret) { + dbg("mtree_insert_range failed: text=%lx-%lx\n", + sec->text_start, sec->text_end); + goto err_free; + } + + return 0; err_free: free_section(sec); return ret; } +static int __sframe_remove_section(struct mm_struct *mm, + struct sframe_section *sec) +{ + if (!mtree_erase(&mm->sframe_mt, sec->text_start)) { + dbg("mtree_erase failed: text=%lx\n", sec->text_start); + return -EINVAL; + } + + free_section(sec); + + return 0; +} + int sframe_remove_section(unsigned long sframe_start) { - return -ENOSYS; + struct mm_struct *mm = current->mm; + struct sframe_section *sec; + unsigned long index = 0; + bool found = false; + int ret = 0; + + mt_for_each(&mm->sframe_mt, sec, index, ULONG_MAX) { + if (sec->sframe_start == sframe_start) { + found = true; + ret |= __sframe_remove_section(mm, sec); + } + } + + if (!found || ret) + return -EINVAL; + + return 0; +} + +void sframe_free_mm(struct mm_struct *mm) +{ + struct sframe_section *sec; + unsigned long index = 0; + + if (!mm) + return; + + mt_for_each(&mm->sframe_mt, sec, index, ULONG_MAX) + free_section(sec); + + mtree_destroy(&mm->sframe_mt); } diff --git a/mm/init-mm.c b/mm/init-mm.c index 4600e7605cab..b32fcf167cc2 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #ifndef INIT_MM_CONTEXT @@ -46,6 +47,7 @@ struct mm_struct init_mm = { .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, INIT_MM_CONTEXT(init_mm) + INIT_MM_SFRAME }; void setup_initial_init_mm(void *start_code, void *end_code, -- 2.48.1 From: Josh Poimboeuf Add an x86 implementation of unsafe_copy_from_user() similar to the existing unsafe_copy_to_user(). Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- arch/x86/include/asm/uaccess.h | 39 +++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 367297b188c3..dfe143235967 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -598,7 +598,7 @@ _label: \ * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. */ -#define unsafe_copy_loop(dst, src, len, type, label) \ +#define unsafe_copy_to_user_loop(dst, src, len, type, label) \ while (len >= sizeof(type)) { \ unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ dst += sizeof(type); \ @@ -606,15 +606,34 @@ _label: \ len -= sizeof(type); \ } -#define unsafe_copy_to_user(_dst,_src,_len,label) \ -do { \ - char __user *__ucu_dst = (_dst); \ - const char *__ucu_src = (_src); \ - size_t __ucu_len = (_len); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +#define unsafe_copy_to_user(_dst, _src, _len, label) \ +do { \ + void __user *__dst = (_dst); \ + const void *__src = (_src); \ + size_t __len = (_len); \ + unsafe_copy_to_user_loop(__dst, __src, __len, u64, label); \ + unsafe_copy_to_user_loop(__dst, __src, __len, u32, label); \ + unsafe_copy_to_user_loop(__dst, __src, __len, u16, label); \ + unsafe_copy_to_user_loop(__dst, __src, __len, u8, label); \ +} while (0) + +#define unsafe_copy_from_user_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_get_user(*(type *)(dst), (type __user *)(src), label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_from_user(_dst, _src, _len, label) \ +do { \ + void *__dst = (_dst); \ + void __user *__src = (_src); \ + size_t __len = (_len); \ + unsafe_copy_from_user_loop(__dst, __src, __len, u64, label); \ + unsafe_copy_from_user_loop(__dst, __src, __len, u32, label); \ + unsafe_copy_from_user_loop(__dst, __src, __len, u16, label); \ + unsafe_copy_from_user_loop(__dst, __src, __len, u8, label); \ } while (0) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT -- 2.48.1 From: Josh Poimboeuf In preparation for using sframe to unwind user space stacks, add an sframe_find() interface for finding the sframe information associated with a given text address. For performance, use user_read_access_begin() and the corresponding unsafe_*() accessors. Note that use of pr_debug() in uaccess-enabled regions would break noinstr validation, so there aren't any debug messages yet. That will be added in a subsequent commit. Link: https://lore.kernel.org/all/77c0d1ec143bf2a53d66c4ecb190e7e0a576fbfd.1737511963.git.jpoimboe@kernel.org/ Link: https://lore.kernel.org/all/b35ca3a3-8de5-4d32-8d30-d4e562f6b0de@linux.ibm.com/ [ Jens Remus: Add support for PC-relative FDE function start address. Simplify logic by using an internal SFrame FDE representation, whose FDE function start address field is an address instead of a PC-relative offset (from FDE). Rename struct sframe_fre to sframe_fre_internal to align with struct sframe_fde_internal. Cleanup includes. Fix checkpatch errors "spaces required around that ':'". ] Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- Notes (jremus): Changes in v12: - Simplify logic by using an internal SFrame FDE representation, whose FDE function start address field is an address instead of a PC-relative offset (from FDE). - Rename struct sframe_fre to sframe_fre_internal to align with struct sframe_fde_internal. - Add include of linux/unwind_user_types.h from "unwind_user/sframe: Add support for reading .sframe headers". - Fix checkpatch errors "spaces required around that ':'". Changes in v11: - Support for SFrame V2 PC-relative FDE function start address. include/linux/sframe.h | 6 + kernel/unwind/sframe.c | 330 ++++++++++++++++++++++++++++++++++- kernel/unwind/sframe_debug.h | 35 ++++ 3 files changed, 367 insertions(+), 4 deletions(-) create mode 100644 kernel/unwind/sframe_debug.h diff --git a/include/linux/sframe.h b/include/linux/sframe.h index 7ea6a97ed8af..9a72209696f9 100644 --- a/include/linux/sframe.h +++ b/include/linux/sframe.h @@ -3,10 +3,14 @@ #define _LINUX_SFRAME_H #include +#include +#include #ifdef CONFIG_HAVE_UNWIND_USER_SFRAME struct sframe_section { + struct rcu_head rcu; + unsigned long sframe_start; unsigned long sframe_end; unsigned long text_start; @@ -27,6 +31,7 @@ extern void sframe_free_mm(struct mm_struct *mm); extern int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, unsigned long text_start, unsigned long text_end); extern int sframe_remove_section(unsigned long sframe_addr); +extern int sframe_find(unsigned long ip, struct unwind_user_frame *frame); static inline bool current_has_sframe(void) { @@ -45,6 +50,7 @@ static inline int sframe_add_section(unsigned long sframe_start, unsigned long s return -ENOSYS; } static inline int sframe_remove_section(unsigned long sframe_addr) { return -ENOSYS; } +static inline int sframe_find(unsigned long ip, struct unwind_user_frame *frame) { return -ENOSYS; } static inline bool current_has_sframe(void) { return false; } #endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */ diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 149ce70e4229..d4ef825b1cbc 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -15,9 +15,322 @@ #include #include "sframe.h" +#include "sframe_debug.h" + +struct sframe_fde_internal { + unsigned long func_start_addr; + u32 func_size; + u32 fres_off; + u32 fres_num; + u8 info; + u8 rep_size; +}; + +struct sframe_fre_internal { + unsigned int size; + u32 ip_off; + s32 cfa_off; + s32 ra_off; + s32 fp_off; + u8 info; +}; + +DEFINE_STATIC_SRCU(sframe_srcu); + +static __always_inline unsigned char fre_type_to_size(unsigned char fre_type) +{ + if (fre_type > 2) + return 0; + return 1 << fre_type; +} + +static __always_inline unsigned char offset_size_enum_to_size(unsigned char off_size) +{ + if (off_size > 2) + return 0; + return 1 << off_size; +} + +static __always_inline int __read_fde(struct sframe_section *sec, + unsigned int fde_num, + struct sframe_fde_internal *fde) +{ + unsigned long fde_addr, func_addr; + struct sframe_fde _fde; + + fde_addr = sec->fdes_start + (fde_num * sizeof(struct sframe_fde)); + unsafe_copy_from_user(&_fde, (void __user *)fde_addr, + sizeof(struct sframe_fde), Efault); + + func_addr = fde_addr + _fde.start_addr; + if (func_addr < sec->text_start || func_addr > sec->text_end) + return -EINVAL; + + fde->func_start_addr = func_addr; + fde->func_size = _fde.func_size; + fde->fres_off = _fde.fres_off; + fde->fres_num = _fde.fres_num; + fde->info = _fde.info; + fde->rep_size = _fde.rep_size; + + return 0; + +Efault: + return -EFAULT; +} + +static __always_inline int __find_fde(struct sframe_section *sec, + unsigned long ip, + struct sframe_fde_internal *fde) +{ + unsigned long func_addr_low = 0, func_addr_high = ULONG_MAX; + struct sframe_fde __user *first, *low, *high, *found = NULL; + int ret; + + first = (void __user *)sec->fdes_start; + low = first; + high = first + sec->num_fdes - 1; + + while (low <= high) { + struct sframe_fde __user *mid; + s32 func_off; + unsigned long func_addr; + + mid = low + ((high - low) / 2); + + unsafe_get_user(func_off, (s32 __user *)mid, Efault); + func_addr = (unsigned long)mid + func_off; + + if (ip >= func_addr) { + if (func_addr < func_addr_low) + return -EFAULT; + + func_addr_low = func_addr; + + found = mid; + low = mid + 1; + } else { + if (func_addr > func_addr_high) + return -EFAULT; + + func_addr_high = func_addr; + + high = mid - 1; + } + } + + if (!found) + return -EINVAL; + + ret = __read_fde(sec, found - first, fde); + if (ret) + return ret; + + /* make sure it's not in a gap */ + if (ip < fde->func_start_addr || + ip >= fde->func_start_addr + fde->func_size) + return -EINVAL; + + return 0; + +Efault: + return -EFAULT; +} + +#define ____UNSAFE_GET_USER_INC(to, from, type, label) \ +({ \ + type __to; \ + unsafe_get_user(__to, (type __user *)from, label); \ + from += sizeof(__to); \ + to = __to; \ +}) + +#define __UNSAFE_GET_USER_INC(to, from, size, label, u_or_s) \ +({ \ + switch (size) { \ + case 1: \ + ____UNSAFE_GET_USER_INC(to, from, u_or_s##8, label); \ + break; \ + case 2: \ + ____UNSAFE_GET_USER_INC(to, from, u_or_s##16, label); \ + break; \ + case 4: \ + ____UNSAFE_GET_USER_INC(to, from, u_or_s##32, label); \ + break; \ + default: \ + return -EFAULT; \ + } \ +}) + +#define UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label) \ + __UNSAFE_GET_USER_INC(to, from, size, label, u) + +#define UNSAFE_GET_USER_SIGNED_INC(to, from, size, label) \ + __UNSAFE_GET_USER_INC(to, from, size, label, s) + +#define UNSAFE_GET_USER_INC(to, from, size, label) \ + _Generic(to, \ + u8 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \ + u16 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \ + u32 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \ + s8 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \ + s16 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \ + s32 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label)) + +static __always_inline int __read_fre(struct sframe_section *sec, + struct sframe_fde_internal *fde, + unsigned long fre_addr, + struct sframe_fre_internal *fre) +{ + unsigned char fde_type = SFRAME_FUNC_FDE_TYPE(fde->info); + unsigned char fre_type = SFRAME_FUNC_FRE_TYPE(fde->info); + unsigned char offset_count, offset_size; + s32 cfa_off, ra_off, fp_off; + unsigned long cur = fre_addr; + unsigned char addr_size; + u32 ip_off; + u8 info; + + addr_size = fre_type_to_size(fre_type); + if (!addr_size) + return -EFAULT; + + if (fre_addr + addr_size + 1 > sec->fres_end) + return -EFAULT; + + UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault); + if (fde_type == SFRAME_FDE_TYPE_PCINC && ip_off > fde->func_size) + return -EFAULT; + + UNSAFE_GET_USER_INC(info, cur, 1, Efault); + offset_count = SFRAME_FRE_OFFSET_COUNT(info); + offset_size = offset_size_enum_to_size(SFRAME_FRE_OFFSET_SIZE(info)); + if (!offset_count || !offset_size) + return -EFAULT; + + if (cur + (offset_count * offset_size) > sec->fres_end) + return -EFAULT; + + fre->size = addr_size + 1 + (offset_count * offset_size); + + UNSAFE_GET_USER_INC(cfa_off, cur, offset_size, Efault); + offset_count--; + + ra_off = sec->ra_off; + if (!ra_off) { + if (!offset_count--) + return -EFAULT; + + UNSAFE_GET_USER_INC(ra_off, cur, offset_size, Efault); + } -#define dbg(fmt, ...) \ - pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) + fp_off = sec->fp_off; + if (!fp_off && offset_count) { + offset_count--; + UNSAFE_GET_USER_INC(fp_off, cur, offset_size, Efault); + } + + if (offset_count) + return -EFAULT; + + fre->ip_off = ip_off; + fre->cfa_off = cfa_off; + fre->ra_off = ra_off; + fre->fp_off = fp_off; + fre->info = info; + + return 0; + +Efault: + return -EFAULT; +} + +static __always_inline int __find_fre(struct sframe_section *sec, + struct sframe_fde_internal *fde, + unsigned long ip, + struct unwind_user_frame *frame) +{ + unsigned char fde_type = SFRAME_FUNC_FDE_TYPE(fde->info); + struct sframe_fre_internal *fre, *prev_fre = NULL; + struct sframe_fre_internal fres[2]; + unsigned long fre_addr; + bool which = false; + unsigned int i; + u32 ip_off; + + ip_off = ip - fde->func_start_addr; + + if (fde_type == SFRAME_FDE_TYPE_PCMASK) + ip_off %= fde->rep_size; + + fre_addr = sec->fres_start + fde->fres_off; + + for (i = 0; i < fde->fres_num; i++) { + int ret; + + /* + * Alternate between the two fre_addr[] entries for 'fre' and + * 'prev_fre'. + */ + fre = which ? fres : fres + 1; + which = !which; + + ret = __read_fre(sec, fde, fre_addr, fre); + if (ret) + return ret; + + fre_addr += fre->size; + + if (prev_fre && fre->ip_off <= prev_fre->ip_off) + return -EFAULT; + + if (fre->ip_off > ip_off) + break; + + prev_fre = fre; + } + + if (!prev_fre) + return -EINVAL; + fre = prev_fre; + + frame->cfa_off = fre->cfa_off; + frame->ra_off = fre->ra_off; + frame->fp_off = fre->fp_off; + frame->use_fp = SFRAME_FRE_CFA_BASE_REG_ID(fre->info) == SFRAME_BASE_REG_FP; + + return 0; +} + +int sframe_find(unsigned long ip, struct unwind_user_frame *frame) +{ + struct mm_struct *mm = current->mm; + struct sframe_section *sec; + struct sframe_fde_internal fde; + int ret; + + if (!mm) + return -EINVAL; + + guard(srcu)(&sframe_srcu); + + sec = mtree_load(&mm->sframe_mt, ip); + if (!sec) + return -EINVAL; + + if (!user_read_access_begin((void __user *)sec->sframe_start, + sec->sframe_end - sec->sframe_start)) + return -EFAULT; + + ret = __find_fde(sec, ip, &fde); + if (ret) + goto end; + + ret = __find_fre(sec, &fde, ip, frame); +end: + user_read_access_end(); + return ret; +} static void free_section(struct sframe_section *sec) { @@ -120,8 +433,10 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, sec->text_end = text_end; ret = sframe_read_header(sec); - if (ret) + if (ret) { + dbg_print_header(sec); goto err_free; + } ret = mtree_insert_range(sframe_mt, sec->text_start, sec->text_end, sec, GFP_KERNEL); if (ret) { @@ -137,6 +452,13 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, return ret; } +static void sframe_free_srcu(struct rcu_head *rcu) +{ + struct sframe_section *sec = container_of(rcu, struct sframe_section, rcu); + + free_section(sec); +} + static int __sframe_remove_section(struct mm_struct *mm, struct sframe_section *sec) { @@ -145,7 +467,7 @@ static int __sframe_remove_section(struct mm_struct *mm, return -EINVAL; } - free_section(sec); + call_srcu(&sframe_srcu, &sec->rcu, sframe_free_srcu); return 0; } diff --git a/kernel/unwind/sframe_debug.h b/kernel/unwind/sframe_debug.h new file mode 100644 index 000000000000..055c8c8fae24 --- /dev/null +++ b/kernel/unwind/sframe_debug.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SFRAME_DEBUG_H +#define _SFRAME_DEBUG_H + +#include +#include "sframe.h" + +#ifdef CONFIG_DYNAMIC_DEBUG + +#define dbg(fmt, ...) \ + pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) + +static __always_inline void dbg_print_header(struct sframe_section *sec) +{ + unsigned long fdes_end; + + fdes_end = sec->fdes_start + (sec->num_fdes * sizeof(struct sframe_fde)); + + dbg("SEC: sframe:0x%lx-0x%lx text:0x%lx-0x%lx " + "fdes:0x%lx-0x%lx fres:0x%lx-0x%lx " + "ra_off:%d fp_off:%d\n", + sec->sframe_start, sec->sframe_end, sec->text_start, sec->text_end, + sec->fdes_start, fdes_end, sec->fres_start, sec->fres_end, + sec->ra_off, sec->fp_off); +} + +#else /* !CONFIG_DYNAMIC_DEBUG */ + +#define dbg(args...) no_printk(args) + +static inline void dbg_print_header(struct sframe_section *sec) {} + +#endif /* !CONFIG_DYNAMIC_DEBUG */ + +#endif /* _SFRAME_DEBUG_H */ -- 2.48.1 From: Josh Poimboeuf When loading an ELF executable, automatically detect an .sframe section and associate it with the mm_struct. [ Jens Remus: Fix checkpatch warning "braces {} are not necessary for single statement blocks". ] Cc: linux-mm@kvack.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- Notes (jremus): Changes in v12: - Fix checkpatch warning "braces {} are not necessary for single statement blocks". fs/binfmt_elf.c | 48 +++++++++++++++++++++++++++++++++++++--- include/uapi/linux/elf.h | 1 + 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3eb734c192e9..fc6ecb4d239e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -637,6 +638,21 @@ static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state, return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp); } +static void elf_add_sframe(struct elf_phdr *text, struct elf_phdr *sframe, + unsigned long base_addr) +{ + unsigned long sframe_start, sframe_end, text_start, text_end; + + sframe_start = base_addr + sframe->p_vaddr; + sframe_end = sframe_start + sframe->p_memsz; + + text_start = base_addr + text->p_vaddr; + text_end = text_start + text->p_memsz; + + /* Ignore return value, sframe section isn't critical */ + sframe_add_section(sframe_start, sframe_end, text_start, text_end); +} + /* This is much more generalized than the library routine read function, so we keep this separate. Technically the library read function is only provided so that we can read a.out libraries that have @@ -647,7 +663,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, unsigned long no_base, struct elf_phdr *interp_elf_phdata, struct arch_elf_state *arch_state) { - struct elf_phdr *eppnt; + struct elf_phdr *eppnt, *sframe_phdr = NULL; unsigned long load_addr = 0; int load_addr_set = 0; unsigned long error = ~0UL; @@ -673,7 +689,8 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, eppnt = interp_elf_phdata; for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { - if (eppnt->p_type == PT_LOAD) { + switch (eppnt->p_type) { + case PT_LOAD: { int elf_type = MAP_PRIVATE; int elf_prot = make_prot(eppnt->p_flags, arch_state, true, true); @@ -712,6 +729,19 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, error = -ENOMEM; goto out; } + break; + } + case PT_GNU_SFRAME: + sframe_phdr = eppnt; + break; + } + } + + if (sframe_phdr) { + eppnt = interp_elf_phdata; + for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { + if (eppnt->p_flags & PF_X) + elf_add_sframe(eppnt, sframe_phdr, load_addr); } } @@ -836,7 +866,7 @@ static int load_elf_binary(struct linux_binprm *bprm) int first_pt_load = 1; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; - struct elf_phdr *elf_property_phdata = NULL; + struct elf_phdr *elf_property_phdata = NULL, *sframe_phdr = NULL; unsigned long elf_brk; bool brk_moved = false; int retval, i; @@ -945,6 +975,10 @@ static int load_elf_binary(struct linux_binprm *bprm) executable_stack = EXSTACK_DISABLE_X; break; + case PT_GNU_SFRAME: + sframe_phdr = elf_ppnt; + break; + case PT_LOPROC ... PT_HIPROC: retval = arch_elf_pt_proc(elf_ex, elf_ppnt, bprm->file, false, @@ -1242,6 +1276,14 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_brk = k; } + if (sframe_phdr) { + for (i = 0, elf_ppnt = elf_phdata; + i < elf_ex->e_phnum; i++, elf_ppnt++) { + if ((elf_ppnt->p_flags & PF_X)) + elf_add_sframe(elf_ppnt, sframe_phdr, load_bias); + } + } + e_entry = elf_ex->e_entry + load_bias; phdr_addr += load_bias; elf_brk += load_bias; diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 819ded2d39de..92c16c94fca8 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -41,6 +41,7 @@ typedef __u16 Elf64_Versym; #define PT_GNU_STACK (PT_LOOS + 0x474e551) #define PT_GNU_RELRO (PT_LOOS + 0x474e552) #define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) +#define PT_GNU_SFRAME (PT_LOOS + 0x474e554) /* ARM MTE memory tag segment type */ -- 2.48.1 From: Josh Poimboeuf Now that the sframe infrastructure is fully in place, make it work by hooking it up to the unwind_user interface. [ Jens Remus: Remove unused pt_regs from unwind_user_next_common() and its callers. Simplify unwind_user_next_sframe(). ] Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- Notes (jremus): Changes in v12: - Remove unused pt_regs from unwind_user_next_common() and its callers. (Peter) - Simplify unwind_user_next_sframe(). (Peter) arch/Kconfig | 1 + include/linux/unwind_user_types.h | 4 +++- kernel/unwind/user.c | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index cdb0e72773be..ab1941ef1411 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -491,6 +491,7 @@ config HAVE_UNWIND_USER_FP config HAVE_UNWIND_USER_SFRAME bool + select UNWIND_USER config HAVE_PERF_REGS bool diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h index 412729a269bc..43e4b160883f 100644 --- a/include/linux/unwind_user_types.h +++ b/include/linux/unwind_user_types.h @@ -9,7 +9,8 @@ * available. */ enum unwind_user_type_bits { - UNWIND_USER_TYPE_FP_BIT = 0, + UNWIND_USER_TYPE_SFRAME_BIT = 0, + UNWIND_USER_TYPE_FP_BIT = 1, NR_UNWIND_USER_TYPE_BITS, }; @@ -17,6 +18,7 @@ enum unwind_user_type_bits { enum unwind_user_type { /* Type "none" for the start of stack walk iteration. */ UNWIND_USER_TYPE_NONE = 0, + UNWIND_USER_TYPE_SFRAME = BIT(UNWIND_USER_TYPE_SFRAME_BIT), UNWIND_USER_TYPE_FP = BIT(UNWIND_USER_TYPE_FP_BIT), }; diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c index 39e270789444..7644ab9f6a61 100644 --- a/kernel/unwind/user.c +++ b/kernel/unwind/user.c @@ -7,6 +7,7 @@ #include #include #include +#include #define for_each_user_frame(state) \ for (unwind_user_start(state); !(state)->done; unwind_user_next(state)) @@ -86,6 +87,16 @@ static int unwind_user_next_fp(struct unwind_user_state *state) #endif } +static int unwind_user_next_sframe(struct unwind_user_state *state) +{ + struct unwind_user_frame frame; + + /* sframe expects the frame to be local storage */ + if (sframe_find(state->ip, &frame)) + return -ENOENT; + return unwind_user_next_common(state, &frame); +} + static int unwind_user_next(struct unwind_user_state *state) { unsigned long iter_mask = state->available_types; @@ -99,6 +110,16 @@ static int unwind_user_next(struct unwind_user_state *state) state->current_type = type; switch (type) { + case UNWIND_USER_TYPE_SFRAME: + switch (unwind_user_next_sframe(state)) { + case 0: + return 0; + case -ENOENT: + continue; /* Try next method. */ + default: + state->done = true; + } + break; case UNWIND_USER_TYPE_FP: if (!unwind_user_next_fp(state)) return 0; @@ -127,6 +148,8 @@ static int unwind_user_start(struct unwind_user_state *state) return -EINVAL; } + if (current_has_sframe()) + state->available_types |= UNWIND_USER_TYPE_SFRAME; if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP)) state->available_types |= UNWIND_USER_TYPE_FP; -- 2.48.1 Add an indication for an outermost frame to the unwind user frame structure and stop unwinding when reaching an outermost frame. This will be used by unwind user sframe, as SFrame may represent an undefined return address as indication for an outermost frame. Cc: Steven Rostedt Cc: Josh Poimboeuf Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Jens Remus --- Notes (jremus): Changes in v12: - Adjust to Peter's tip perf/core commit ae25884ad749 ("unwind_user/x86: Teach FP unwind about start of function"). Changes in v11: - New patch. arch/x86/include/asm/unwind_user.h | 6 ++++-- include/linux/unwind_user_types.h | 1 + kernel/unwind/user.c | 6 ++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h index 12064284bc4e..f9a1c460150d 100644 --- a/arch/x86/include/asm/unwind_user.h +++ b/arch/x86/include/asm/unwind_user.h @@ -11,13 +11,15 @@ .cfa_off = 2*(ws), \ .ra_off = -1*(ws), \ .fp_off = -2*(ws), \ - .use_fp = true, + .use_fp = true, \ + .outermost = false, #define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \ .cfa_off = 1*(ws), \ .ra_off = -1*(ws), \ .fp_off = 0, \ - .use_fp = false, + .use_fp = false, \ + .outermost = false, static inline int unwind_user_word_size(struct pt_regs *regs) { diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h index 43e4b160883f..616cc5ee4586 100644 --- a/include/linux/unwind_user_types.h +++ b/include/linux/unwind_user_types.h @@ -32,6 +32,7 @@ struct unwind_user_frame { s32 ra_off; s32 fp_off; bool use_fp; + bool outermost; }; struct unwind_user_state { diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c index 7644ab9f6a61..d053295b1f7e 100644 --- a/kernel/unwind/user.c +++ b/kernel/unwind/user.c @@ -32,6 +32,12 @@ static int unwind_user_next_common(struct unwind_user_state *state, { unsigned long cfa, fp, ra; + /* Stop unwinding when reaching an outermost frame. */ + if (frame->outermost) { + state->done = true; + return 0; + } + if (frame->use_fp) { if (state->fp < state->sp) return -EINVAL; -- 2.48.1 SFrame may represent an undefined return address (RA) as SFrame FRE without any offsets as indication for an outermost frame. Cc: Steven Rostedt Cc: Josh Poimboeuf Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Jens Remus --- Notes (jremus): Changes in v11: - New patch. kernel/unwind/sframe.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index d4ef825b1cbc..1e877c3e5417 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -33,6 +33,7 @@ struct sframe_fre_internal { s32 ra_off; s32 fp_off; u8 info; + bool ra_undefined; }; DEFINE_STATIC_SRCU(sframe_srcu); @@ -187,6 +188,7 @@ static __always_inline int __read_fre(struct sframe_section *sec, unsigned char offset_count, offset_size; s32 cfa_off, ra_off, fp_off; unsigned long cur = fre_addr; + bool ra_undefined = false; unsigned char addr_size; u32 ip_off; u8 info; @@ -205,7 +207,7 @@ static __always_inline int __read_fre(struct sframe_section *sec, UNSAFE_GET_USER_INC(info, cur, 1, Efault); offset_count = SFRAME_FRE_OFFSET_COUNT(info); offset_size = offset_size_enum_to_size(SFRAME_FRE_OFFSET_SIZE(info)); - if (!offset_count || !offset_size) + if (!offset_size) return -EFAULT; if (cur + (offset_count * offset_size) > sec->fres_end) @@ -213,6 +215,14 @@ static __always_inline int __read_fre(struct sframe_section *sec, fre->size = addr_size + 1 + (offset_count * offset_size); + if (!offset_count) { + cfa_off = 0; + ra_off = 0; + fp_off = 0; + ra_undefined = true; + goto done; + } + UNSAFE_GET_USER_INC(cfa_off, cur, offset_size, Efault); offset_count--; @@ -233,11 +243,13 @@ static __always_inline int __read_fre(struct sframe_section *sec, if (offset_count) return -EFAULT; +done: fre->ip_off = ip_off; fre->cfa_off = cfa_off; fre->ra_off = ra_off; fre->fp_off = fp_off; fre->info = info; + fre->ra_undefined = ra_undefined; return 0; @@ -298,6 +310,7 @@ static __always_inline int __find_fre(struct sframe_section *sec, frame->ra_off = fre->ra_off; frame->fp_off = fre->fp_off; frame->use_fp = SFRAME_FRE_CFA_BASE_REG_ID(fre->info) == SFRAME_BASE_REG_FP; + frame->outermost = fre->ra_undefined; return 0; } -- 2.48.1 From: Josh Poimboeuf The x86 SFrame V2 implementation works fairly well, starting with binutils 2.45 with PC-relative SFrame FDE function start address encoding (though some issues are getting fixed in later versions). Enable it. [ Jens Remus: Reword commit message for SFrame V2 PC-relative FDE function start address encoding, starting with binutils 2.45. ] Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- Notes (jremus): Changes in v12: - Reword commit message for SFrame V2 PC-relative FDE function start address encoding, starting with binutils 2.45. arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c4c21e8d0772..0e9a9c5ca0bc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -299,6 +299,7 @@ config X86 select HAVE_UACCESS_VALIDATION if HAVE_OBJTOOL select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_UNWIND_USER_FP if X86_64 + select HAVE_UNWIND_USER_SFRAME if X86_64 select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO select VDSO_GETRANDOM if X86_64 -- 2.48.1 From: Josh Poimboeuf To avoid continued attempted use of a bad .sframe section, remove it on demand when the first sign of corruption is detected. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- kernel/unwind/sframe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 1e877c3e5417..61340ee524c2 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -342,6 +342,10 @@ int sframe_find(unsigned long ip, struct unwind_user_frame *frame) ret = __find_fre(sec, &fde, ip, frame); end: user_read_access_end(); + + if (ret == -EFAULT) + WARN_ON_ONCE(sframe_remove_section(sec->sframe_start)); + return ret; } -- 2.48.1 From: Josh Poimboeuf When debugging sframe issues, the error messages aren't all that helpful without knowing what file a corresponding .sframe section belongs to. Prefix debug output strings with the file name. [ Jens Remus: Fix checkpatch error "space prohibited before that close parenthesis ')'". ] Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- Notes (jremus): Changes in v12: - Fix checkpatch error "space prohibited before that close parenthesis ')'". include/linux/sframe.h | 4 +++- kernel/unwind/sframe.c | 23 ++++++++++-------- kernel/unwind/sframe_debug.h | 45 +++++++++++++++++++++++++++++++----- 3 files changed, 56 insertions(+), 16 deletions(-) diff --git a/include/linux/sframe.h b/include/linux/sframe.h index 9a72209696f9..b79c5ec09229 100644 --- a/include/linux/sframe.h +++ b/include/linux/sframe.h @@ -10,7 +10,9 @@ struct sframe_section { struct rcu_head rcu; - +#ifdef CONFIG_DYNAMIC_DEBUG + const char *filename; +#endif unsigned long sframe_start; unsigned long sframe_end; unsigned long text_start; diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 61340ee524c2..7d3e286c1b23 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -343,14 +343,17 @@ int sframe_find(unsigned long ip, struct unwind_user_frame *frame) end: user_read_access_end(); - if (ret == -EFAULT) + if (ret == -EFAULT) { + dbg_sec("removing bad .sframe section\n"); WARN_ON_ONCE(sframe_remove_section(sec->sframe_start)); + } return ret; } static void free_section(struct sframe_section *sec) { + dbg_free(sec); kfree(sec); } @@ -361,7 +364,7 @@ static int sframe_read_header(struct sframe_section *sec) unsigned int num_fdes; if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))) { - dbg("header usercopy failed\n"); + dbg_sec("header usercopy failed\n"); return -EFAULT; } @@ -370,18 +373,18 @@ static int sframe_read_header(struct sframe_section *sec) !(shdr.preamble.flags & SFRAME_F_FDE_SORTED) || !(shdr.preamble.flags & SFRAME_F_FDE_FUNC_START_PCREL) || shdr.auxhdr_len) { - dbg("bad/unsupported sframe header\n"); + dbg_sec("bad/unsupported sframe header\n"); return -EINVAL; } if (!shdr.num_fdes || !shdr.num_fres) { - dbg("no fde/fre entries\n"); + dbg_sec("no fde/fre entries\n"); return -EINVAL; } header_end = sec->sframe_start + SFRAME_HEADER_SIZE(shdr); if (header_end >= sec->sframe_end) { - dbg("header doesn't fit in section\n"); + dbg_sec("header doesn't fit in section\n"); return -EINVAL; } @@ -393,7 +396,7 @@ static int sframe_read_header(struct sframe_section *sec) fres_end = fres_start + shdr.fre_len; if (fres_start < fdes_end || fres_end > sec->sframe_end) { - dbg("inconsistent fde/fre offsets\n"); + dbg_sec("inconsistent fde/fre offsets\n"); return -EINVAL; } @@ -449,6 +452,8 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, sec->text_start = text_start; sec->text_end = text_end; + dbg_init(sec); + ret = sframe_read_header(sec); if (ret) { dbg_print_header(sec); @@ -457,8 +462,8 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, ret = mtree_insert_range(sframe_mt, sec->text_start, sec->text_end, sec, GFP_KERNEL); if (ret) { - dbg("mtree_insert_range failed: text=%lx-%lx\n", - sec->text_start, sec->text_end); + dbg_sec("mtree_insert_range failed: text=%lx-%lx\n", + sec->text_start, sec->text_end); goto err_free; } @@ -480,7 +485,7 @@ static int __sframe_remove_section(struct mm_struct *mm, struct sframe_section *sec) { if (!mtree_erase(&mm->sframe_mt, sec->text_start)) { - dbg("mtree_erase failed: text=%lx\n", sec->text_start); + dbg_sec("mtree_erase failed: text=%lx\n", sec->text_start); return -EINVAL; } diff --git a/kernel/unwind/sframe_debug.h b/kernel/unwind/sframe_debug.h index 055c8c8fae24..cd3e95e3961f 100644 --- a/kernel/unwind/sframe_debug.h +++ b/kernel/unwind/sframe_debug.h @@ -10,26 +10,59 @@ #define dbg(fmt, ...) \ pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) +#define dbg_sec(fmt, ...) \ + dbg("%s: " fmt, sec->filename, ##__VA_ARGS__) + static __always_inline void dbg_print_header(struct sframe_section *sec) { unsigned long fdes_end; fdes_end = sec->fdes_start + (sec->num_fdes * sizeof(struct sframe_fde)); - dbg("SEC: sframe:0x%lx-0x%lx text:0x%lx-0x%lx " - "fdes:0x%lx-0x%lx fres:0x%lx-0x%lx " - "ra_off:%d fp_off:%d\n", - sec->sframe_start, sec->sframe_end, sec->text_start, sec->text_end, - sec->fdes_start, fdes_end, sec->fres_start, sec->fres_end, - sec->ra_off, sec->fp_off); + dbg_sec("SEC: sframe:0x%lx-0x%lx text:0x%lx-0x%lx " + "fdes:0x%lx-0x%lx fres:0x%lx-0x%lx " + "ra_off:%d fp_off:%d\n", + sec->sframe_start, sec->sframe_end, sec->text_start, sec->text_end, + sec->fdes_start, fdes_end, sec->fres_start, sec->fres_end, + sec->ra_off, sec->fp_off); +} + +static inline void dbg_init(struct sframe_section *sec) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + guard(mmap_read_lock)(mm); + vma = vma_lookup(mm, sec->sframe_start); + if (!vma) + sec->filename = kstrdup("(vma gone???)", GFP_KERNEL); + else if (vma->vm_file) + sec->filename = kstrdup_quotable_file(vma->vm_file, GFP_KERNEL); + else if (vma->vm_ops && vma->vm_ops->name) + sec->filename = kstrdup(vma->vm_ops->name(vma), GFP_KERNEL); + else if (arch_vma_name(vma)) + sec->filename = kstrdup(arch_vma_name(vma), GFP_KERNEL); + else if (!vma->vm_mm) + sec->filename = kstrdup("(vdso)", GFP_KERNEL); + else + sec->filename = kstrdup("(anonymous)", GFP_KERNEL); +} + +static inline void dbg_free(struct sframe_section *sec) +{ + kfree(sec->filename); } #else /* !CONFIG_DYNAMIC_DEBUG */ #define dbg(args...) no_printk(args) +#define dbg_sec(args...) no_printk(args) static inline void dbg_print_header(struct sframe_section *sec) {} +static inline void dbg_init(struct sframe_section *sec) {} +static inline void dbg_free(struct sframe_section *sec) {} + #endif /* !CONFIG_DYNAMIC_DEBUG */ #endif /* _SFRAME_DEBUG_H */ -- 2.48.1 From: Josh Poimboeuf Add a debug feature to validate all .sframe sections when first loading the file rather than on demand. [ Jens Remus: Add support for PC-relative FDE function start address. Adjust to rename of struct sframe_fre to sframe_fre_internal. ] Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- Notes (jremus): Changes in v12: - Adjust to use internal SFrame FDE representation. - Adjust to rename of struct sframe_fre to sframe_fre_internal. Changes in v11: - Support for SFrame V2 PC-relative FDE function start address. arch/Kconfig | 19 +++++++++ kernel/unwind/sframe.c | 97 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index ab1941ef1411..06c4f909398c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -493,6 +493,25 @@ config HAVE_UNWIND_USER_SFRAME bool select UNWIND_USER +config SFRAME_VALIDATION + bool "Enable .sframe section debugging" + depends on HAVE_UNWIND_USER_SFRAME + depends on DYNAMIC_DEBUG + help + When adding an .sframe section for a task, validate the entire + section immediately rather than on demand. + + This is a debug feature which is helpful for rooting out .sframe + section issues. If the .sframe section is corrupt, it will fail to + load immediately, with more information provided in dynamic printks. + + This has a significant page cache footprint due to its reading of the + entire .sframe section for every loaded executable and shared + library. Also, it's done for all processes, even those which don't + get stack traced by the kernel. Not recommended for general use. + + If unsure, say N. + config HAVE_PERF_REGS bool help diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 7d3e286c1b23..6465e7a315bc 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -351,6 +351,99 @@ int sframe_find(unsigned long ip, struct unwind_user_frame *frame) return ret; } +#ifdef CONFIG_SFRAME_VALIDATION + +static int safe_read_fde(struct sframe_section *sec, + unsigned int fde_num, struct sframe_fde_internal *fde) +{ + int ret; + + if (!user_read_access_begin((void __user *)sec->sframe_start, + sec->sframe_end - sec->sframe_start)) + return -EFAULT; + ret = __read_fde(sec, fde_num, fde); + user_read_access_end(); + return ret; +} + +static int safe_read_fre(struct sframe_section *sec, + struct sframe_fde_internal *fde, + unsigned long fre_addr, + struct sframe_fre_internal *fre) +{ + int ret; + + if (!user_read_access_begin((void __user *)sec->sframe_start, + sec->sframe_end - sec->sframe_start)) + return -EFAULT; + ret = __read_fre(sec, fde, fre_addr, fre); + user_read_access_end(); + return ret; +} + +static int sframe_validate_section(struct sframe_section *sec) +{ + unsigned long prev_ip = 0; + unsigned int i; + + for (i = 0; i < sec->num_fdes; i++) { + struct sframe_fre_internal *fre, *prev_fre = NULL; + unsigned long ip, fre_addr; + struct sframe_fde_internal fde; + struct sframe_fre_internal fres[2]; + bool which = false; + unsigned int j; + int ret; + + ret = safe_read_fde(sec, i, &fde); + if (ret) + return ret; + + ip = fde.func_start_addr; + if (ip <= prev_ip) { + dbg_sec("fde %u not sorted\n", i); + return -EFAULT; + } + prev_ip = ip; + + fre_addr = sec->fres_start + fde.fres_off; + for (j = 0; j < fde.fres_num; j++) { + int ret; + + fre = which ? fres : fres + 1; + which = !which; + + ret = safe_read_fre(sec, &fde, fre_addr, fre); + if (ret) { + dbg_sec("fde %u: __read_fre(%u) failed\n", i, j); + dbg_sec("FDE: func_start_addr:0x%lx func_size:0x%x fres_off:0x%x fres_num:%d info:%u rep_size:%u\n", + fde.func_start_addr, fde.func_size, + fde.fres_off, fde.fres_num, + fde.info, fde.rep_size); + return ret; + } + + fre_addr += fre->size; + + if (prev_fre && fre->ip_off <= prev_fre->ip_off) { + dbg_sec("fde %u: fre %u not sorted\n", i, j); + return -EFAULT; + } + + prev_fre = fre; + } + } + + return 0; +} + +#else /* !CONFIG_SFRAME_VALIDATION */ + +static int sframe_validate_section(struct sframe_section *sec) { return 0; } + +#endif /* !CONFIG_SFRAME_VALIDATION */ + + static void free_section(struct sframe_section *sec) { dbg_free(sec); @@ -460,6 +553,10 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, goto err_free; } + ret = sframe_validate_section(sec); + if (ret) + goto err_free; + ret = mtree_insert_range(sframe_mt, sec->text_start, sec->text_end, sec, GFP_KERNEL); if (ret) { dbg_sec("mtree_insert_range failed: text=%lx-%lx\n", -- 2.48.1 From: Josh Poimboeuf The kernel doesn't have direct visibility to the ELF contents of shared libraries. Add some prctl() interfaces which allow glibc to tell the kernel where to find .sframe sections. [ This adds an interface for prctl() for testing loading of sframes for libraries. But this interface should really be a system call. This patch is for testing purposes only and should not be applied to mainline. ] Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Andrii Nakryiko Cc: Indu Bhagat Cc: "Jose E. Marchesi" Cc: Beau Belgrave Cc: Jens Remus Cc: Linus Torvalds Cc: Andrew Morton Cc: Florian Weimer Cc: Sam James Cc: Kees Cook Cc: "Carlos O'Donell" Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Jens Remus --- include/uapi/linux/prctl.h | 6 +++++- kernel/sys.c | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 51c4e8c82b1e..91d2c9eb3aa3 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -368,7 +368,7 @@ struct prctl_mm_map { * configuration. All bits may be locked via this call, including * undefined bits. */ -#define PR_LOCK_SHADOW_STACK_STATUS 76 +#define PR_LOCK_SHADOW_STACK_STATUS 76 /* * Controls the mode of timer_create() for CRIU restore operations. @@ -386,4 +386,8 @@ struct prctl_mm_map { # define PR_FUTEX_HASH_SET_SLOTS 1 # define PR_FUTEX_HASH_GET_SLOTS 2 +/* SFRAME management */ +#define PR_ADD_SFRAME 79 +#define PR_REMOVE_SFRAME 80 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 8b58eece4e58..ea088b643eb4 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -65,6 +65,7 @@ #include #include #include +#include #include @@ -2868,6 +2869,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_FUTEX_HASH: error = futex_hash_prctl(arg2, arg3, arg4); break; + case PR_ADD_SFRAME: + error = sframe_add_section(arg2, arg3, arg4, arg5); + break; + case PR_REMOVE_SFRAME: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = sframe_remove_section(arg2); + break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); error = -EINVAL; -- 2.48.1