Nested uses of kpkeys guards are about to be introduced, which means that kpkeys_set_level() may not actually need to change the value of POR_EL1. Since updating POR_EL1 requires an expensive ISB, let's skip the write if the value is unchanged, by returning KPKEYS_PKEY_REG_INVAL. This will cause the matching kpkeys_restore_pkey_reg() call to bail out without calling arch_kpkeys_restore_pkey_reg(). Signed-off-by: Kevin Brodsky --- arch/arm64/include/asm/kpkeys.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/kpkeys.h b/arch/arm64/include/asm/kpkeys.h index 64d6e22740ec..70e21df84252 100644 --- a/arch/arm64/include/asm/kpkeys.h +++ b/arch/arm64/include/asm/kpkeys.h @@ -43,6 +43,9 @@ static __always_inline int arch_kpkeys_set_level(int level) u64 prev_por = read_sysreg_s(SYS_POR_EL1); u64 new_por = por_set_kpkeys_level(prev_por, level); + if (new_por == prev_por) + return KPKEYS_PKEY_REG_INVAL; + __kpkeys_set_pkey_reg_nosync(new_por); isb(); -- 2.47.0 Highly privileged components, such as allocators, may require write access to arbitrary data. To that end, introduce a kpkeys level that grants write access to all kpkeys. Signed-off-by: Kevin Brodsky --- arch/arm64/include/asm/kpkeys.h | 4 +++- include/linux/kpkeys.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kpkeys.h b/arch/arm64/include/asm/kpkeys.h index 70e21df84252..ded5d6e988dc 100644 --- a/arch/arm64/include/asm/kpkeys.h +++ b/arch/arm64/include/asm/kpkeys.h @@ -28,7 +28,9 @@ static inline u64 por_set_kpkeys_level(u64 por, int level) { por = por_elx_set_pkey_perms(por, KPKEYS_PKEY_DEFAULT, POE_RWX); por = por_elx_set_pkey_perms(por, KPKEYS_PKEY_PGTABLES, - level == KPKEYS_LVL_PGTABLES ? POE_RW : POE_R); + level == KPKEYS_LVL_PGTABLES || + level == KPKEYS_LVL_UNRESTRICTED + ? POE_RW : POE_R); return por; } diff --git a/include/linux/kpkeys.h b/include/linux/kpkeys.h index 5f4b096374ba..48f240bea8e1 100644 --- a/include/linux/kpkeys.h +++ b/include/linux/kpkeys.h @@ -10,9 +10,10 @@ struct folio; #define KPKEYS_LVL_DEFAULT 0 #define KPKEYS_LVL_PGTABLES 1 +#define KPKEYS_LVL_UNRESTRICTED 2 #define KPKEYS_LVL_MIN KPKEYS_LVL_DEFAULT -#define KPKEYS_LVL_MAX KPKEYS_LVL_PGTABLES +#define KPKEYS_LVL_MAX KPKEYS_LVL_UNRESTRICTED #define __KPKEYS_GUARD(name, set_level, restore_pkey_reg, set_arg, ...) \ __DEFINE_CLASS_IS_CONDITIONAL(name, false); \ -- 2.47.0 Introduce the SLAB_SET_PKEY flag to request a kmem_cache whose slabs are mapped with a non-default pkey, if kernel pkeys (kpkeys) are supported. The pkey to be used is specified via a new pkey field in struct kmem_cache_args. The setting/resetting of the pkey is done directly at the slab level (allocate_slab/__free_slab) to avoid having to propagate the pkey value down to the page level. Memory mapped with a non-default pkey cannot be written to at the default kpkeys level. This is handled by switching to the unrestricted kpkeys level (granting write access to all pkeys) when writing to a slab with SLAB_SET_PKEY. The merging of slabs with SLAB_SET_PKEY is conservatively prevented, though it should be possible to merge slabs with the same configured pkey. Signed-off-by: Kevin Brodsky --- include/linux/slab.h | 21 ++++++++++++++++ mm/slab.h | 7 +++++- mm/slab_common.c | 2 +- mm/slub.c | 58 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 85 insertions(+), 3 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index d5a8ab98035c..8cf8f655e794 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -59,6 +59,9 @@ enum _slab_flag_bits { _SLAB_CMPXCHG_DOUBLE, #ifdef CONFIG_SLAB_OBJ_EXT _SLAB_NO_OBJ_EXT, +#endif +#ifdef CONFIG_ARCH_HAS_KPKEYS + _SLAB_SET_PKEY, #endif _SLAB_FLAGS_LAST_BIT }; @@ -244,6 +247,12 @@ enum _slab_flag_bits { #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED #endif +#ifdef CONFIG_ARCH_HAS_KPKEYS +#define SLAB_SET_PKEY __SLAB_FLAG_BIT(_SLAB_SET_PKEY) +#else +#define SLAB_SET_PKEY __SLAB_FLAG_UNUSED +#endif + /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * @@ -335,6 +344,18 @@ struct kmem_cache_args { * %NULL means no constructor. */ void (*ctor)(void *); + /** + * @pkey: The pkey to map the allocated pages with. + * + * If the SLAB flags include SLAB_SET_PKEY, and if kernel pkeys are + * supported, objects are allocated in pages mapped with the protection + * key specified by @pkey. Otherwise, this field is ignored. + * + * Note that if @pkey is a non-default pkey, some overhead is incurred + * when internal slab functions switch the pkey register to write to the + * slab (e.g. setting a free pointer). + */ + int pkey; }; struct kmem_cache *__kmem_cache_create_args(const char *name, diff --git a/mm/slab.h b/mm/slab.h index 248b34c839b7..01c404f2f1db 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -287,6 +287,10 @@ struct kmem_cache { unsigned int usersize; /* Usercopy region size */ #endif +#ifdef CONFIG_ARCH_HAS_KPKEYS + int pkey; +#endif + struct kmem_cache_node *node[MAX_NUMNODES]; }; @@ -438,7 +442,8 @@ static inline bool is_kmalloc_normal(struct kmem_cache *s) SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS | \ SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | SLAB_ACCOUNT | \ - SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE) + SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE | \ + SLAB_SET_PKEY) #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) diff --git a/mm/slab_common.c b/mm/slab_common.c index bfe7c40eeee1..7b26062629a1 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -47,7 +47,7 @@ struct kmem_cache *kmem_cache; */ #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ - SLAB_FAILSLAB | SLAB_NO_MERGE) + SLAB_FAILSLAB | SLAB_NO_MERGE | SLAB_SET_PKEY) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ SLAB_CACHE_DMA32 | SLAB_ACCOUNT) diff --git a/mm/slub.c b/mm/slub.c index 30003763d224..f6aec6ed7135 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -477,6 +478,15 @@ static nodemask_t slab_nodes; static struct workqueue_struct *flushwq; #endif +#ifdef CONFIG_ARCH_HAS_KPKEYS +KPKEYS_GUARD_COND(kpkeys_slab_write, + KPKEYS_LVL_UNRESTRICTED, + unlikely(s->flags & SLAB_SET_PKEY), + struct kmem_cache *s) +#else +KPKEYS_GUARD_NOOP(kpkeys_slab_write, struct kmem_cache *s) +#endif + /******************************************************************** * Core slab cache functions *******************************************************************/ @@ -563,6 +573,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) BUG_ON(object == fp); /* naive detection of double free or corruption */ #endif + guard(kpkeys_slab_write)(s); + freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr); *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr); } @@ -802,6 +814,8 @@ static inline void set_orig_size(struct kmem_cache *s, p += get_info_end(s); p += sizeof(struct track) * 2; + guard(kpkeys_slab_write)(s); + *(unsigned int *)p = orig_size; } @@ -986,6 +1000,8 @@ static void set_track_update(struct kmem_cache *s, void *object, { struct track *p = get_track(s, object, alloc); + guard(kpkeys_slab_write)(s); + #ifdef CONFIG_STACKDEPOT p->handle = handle; #endif @@ -1010,6 +1026,8 @@ static void init_tracking(struct kmem_cache *s, void *object) if (!(s->flags & SLAB_STORE_USER)) return; + guard(kpkeys_slab_write)(s); + p = get_track(s, object, TRACK_ALLOC); memset(p, 0, 2*sizeof(struct track)); } @@ -1191,6 +1209,8 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) u8 *p = kasan_reset_tag(object); unsigned int poison_size = s->object_size; + guard(kpkeys_slab_write)(s); + if (s->flags & SLAB_RED_ZONE) { /* * Here and below, avoid overwriting the KMSAN shadow. Keeping @@ -2399,6 +2419,8 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init, int rsize; unsigned int inuse, orig_size; + guard(kpkeys_slab_write)(s); + inuse = get_info_end(s); orig_size = get_orig_size(s, x); if (!kasan_has_integrated_init()) @@ -2631,6 +2653,8 @@ static __always_inline void unaccount_slab(struct slab *slab, int order, -(PAGE_SIZE << order)); } +static void __free_slab(struct kmem_cache *s, struct slab *slab); + static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { struct slab *slab; @@ -2681,6 +2705,18 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) setup_slab_debug(s, slab, start); +#ifdef CONFIG_ARCH_HAS_KPKEYS + if (unlikely(s->flags & SLAB_SET_PKEY)) { + int ret = set_memory_pkey((unsigned long)start, + 1 << oo_order(oo), s->pkey); + + if (WARN_ON(ret)) { + __free_slab(s, slab); + return NULL; + } + } +#endif + shuffle = shuffle_freelist(s, slab); if (!shuffle) { @@ -2721,6 +2757,11 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab) __folio_clear_slab(folio); mm_account_reclaimed_pages(pages); unaccount_slab(slab, order, s); +#ifdef CONFIG_ARCH_HAS_KPKEYS + if (unlikely(s->flags & SLAB_SET_PKEY)) + WARN_ON(set_memory_pkey((unsigned long)folio_address(folio), + pages, 0)); +#endif free_frozen_pages(&folio->page, order); } @@ -4118,9 +4159,11 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, void *obj) { if (unlikely(slab_want_init_on_free(s)) && obj && - !freeptr_outside_object(s)) + !freeptr_outside_object(s)) { + guard(kpkeys_slab_write)(s); memset((void *)((char *)kasan_reset_tag(obj) + s->offset), 0, sizeof(void *)); + } } static __fastpath_inline @@ -4920,6 +4963,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags) /* Zero out spare memory. */ if (want_init_on_alloc(flags)) { kasan_disable_current(); + guard(kpkeys_slab_write)(s); if (orig_size && orig_size < new_size) memset(kasan_reset_tag(p) + orig_size, 0, new_size - orig_size); else @@ -4929,6 +4973,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags) /* Setup kmalloc redzone when needed */ if (s && slub_debug_orig_size(s)) { + guard(kpkeys_slab_write)(s); set_orig_size(s, (void *)p, new_size); if (s->flags & SLAB_RED_ZONE && new_size < ks) memset_no_sanitize_memory(kasan_reset_tag(p) + new_size, @@ -6410,6 +6455,17 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name, s->useroffset = args->useroffset; s->usersize = args->usersize; #endif +#ifdef CONFIG_ARCH_HAS_KPKEYS + s->pkey = args->pkey; + + if (s->flags & SLAB_SET_PKEY) { + if (s->pkey >= arch_max_pkey()) + goto out; + + if (!arch_kpkeys_enabled() || s->pkey == KPKEYS_PKEY_DEFAULT) + s->flags &= ~SLAB_SET_PKEY; + } +#endif if (!calculate_sizes(args, s)) goto out; -- 2.47.0 Data assigned a non-default pkey is not writable at the default kpkeys level. If such data is managed via RCU, some mechanism is required to temporarily grant write access to the data's struct rcu_head, for instance when zeroing the callback pointer. There is unfortunately no straightforward way for RCU to know whether the managed data is mapped with a non-default pkey. This patch takes the easy route and switches to the unrestricted kpkeys level whenever struct rcu_head is written; this should work reliably but it is clearly suboptimal. That behaviour is enabled by selecting CONFIG_KPKEYS_UNRESTRICTED_RCU. This patch isn't comprehensive, in particular it does not take care of Tiny RCU. Signed-off-by: Kevin Brodsky --- kernel/rcu/rcu.h | 7 +++++++ kernel/rcu/rcu_segcblist.c | 13 +++++++++---- kernel/rcu/tree.c | 3 ++- mm/Kconfig | 2 ++ 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 9cf01832a6c3..71e9a695f4eb 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -10,6 +10,7 @@ #ifndef __LINUX_RCU_H #define __LINUX_RCU_H +#include #include #include @@ -691,4 +692,10 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v); static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; } #endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) +#ifdef CONFIG_KPKEYS_UNRESTRICTED_RCU +KPKEYS_GUARD(kpkeys_rcu, KPKEYS_LVL_UNRESTRICTED) +#else +KPKEYS_GUARD_NOOP(kpkeys_rcu) +#endif + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index 298a2c573f02..e7d6c8370b70 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -12,6 +12,7 @@ #include #include +#include "rcu.h" #include "rcu_segcblist.h" /* Initialize simple callback list. */ @@ -332,7 +333,8 @@ void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, rcu_segcblist_inc_len(rsclp); rcu_segcblist_inc_seglen(rsclp, RCU_NEXT_TAIL); rhp->next = NULL; - WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp); + scoped_guard(kpkeys_rcu) + WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp); WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next); } @@ -360,7 +362,8 @@ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, if (!rcu_segcblist_segempty(rsclp, i)) break; rcu_segcblist_inc_seglen(rsclp, i); - WRITE_ONCE(*rsclp->tails[i], rhp); + scoped_guard(kpkeys_rcu) + WRITE_ONCE(*rsclp->tails[i], rhp); for (; i <= RCU_NEXT_TAIL; i++) WRITE_ONCE(rsclp->tails[i], &rhp->next); return true; @@ -381,7 +384,8 @@ void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, rclp->len = rcu_segcblist_get_seglen(rsclp, RCU_DONE_TAIL); *rclp->tail = rsclp->head; WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]); - WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); + scoped_guard(kpkeys_rcu) + WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL); rclp->tail = rsclp->tails[RCU_DONE_TAIL]; for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) @@ -436,7 +440,8 @@ void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, if (!rclp->head) return; /* No callbacks to move. */ rcu_segcblist_add_seglen(rsclp, RCU_DONE_TAIL, rclp->len); - *rclp->tail = rsclp->head; + scoped_guard(kpkeys_rcu) + *rclp->tail = rsclp->head; WRITE_ONCE(rsclp->head, rclp->head); for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) if (&rsclp->head == rsclp->tails[i]) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 174ee243b349..2eada18c04d5 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2601,7 +2601,8 @@ static void rcu_do_batch(struct rcu_data *rdp) f = rhp->func; debug_rcu_head_callback(rhp); - WRITE_ONCE(rhp->func, (rcu_callback_t)0L); + scoped_guard(kpkeys_rcu) + WRITE_ONCE(rhp->func, (rcu_callback_t)0L); f(rhp); rcu_lock_release(&rcu_callback_map); diff --git a/mm/Kconfig b/mm/Kconfig index e34edf5c41e7..c023f74a2201 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1178,6 +1178,8 @@ config ARCH_HAS_KPKEYS # ARCH_HAS_KPKEYS must be selected when selecting this option config ARCH_HAS_KPKEYS_HARDENED_PGTABLES bool +config KPKEYS_UNRESTRICTED_RCU + bool config ARCH_USES_PG_ARCH_2 bool -- 2.47.0 We will need a separate pkey to protect struct cred. Allocate one as well as a new kpkeys level that grants write access to that pkey. Signed-off-by: Kevin Brodsky --- arch/arm64/include/asm/kpkeys.h | 7 ++++++- include/asm-generic/kpkeys.h | 4 ++++ include/linux/kpkeys.h | 3 ++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kpkeys.h b/arch/arm64/include/asm/kpkeys.h index ded5d6e988dc..ffb7622391c3 100644 --- a/arch/arm64/include/asm/kpkeys.h +++ b/arch/arm64/include/asm/kpkeys.h @@ -13,7 +13,8 @@ * used in assembly. */ #define POR_EL1_INIT (POR_ELx_PERM_PREP(KPKEYS_PKEY_DEFAULT, POE_RWX) | \ - POR_ELx_PERM_PREP(KPKEYS_PKEY_PGTABLES, POE_R)) + POR_ELx_PERM_PREP(KPKEYS_PKEY_PGTABLES, POE_R) | \ + POR_ELx_PERM_PREP(KPKEYS_PKEY_CRED, POE_R)) #ifndef __ASSEMBLY__ @@ -31,6 +32,10 @@ static inline u64 por_set_kpkeys_level(u64 por, int level) level == KPKEYS_LVL_PGTABLES || level == KPKEYS_LVL_UNRESTRICTED ? POE_RW : POE_R); + por = por_elx_set_pkey_perms(por, KPKEYS_PKEY_CRED, + level == KPKEYS_LVL_CRED || + level == KPKEYS_LVL_UNRESTRICTED + ? POE_RW : POE_R); return por; } diff --git a/include/asm-generic/kpkeys.h b/include/asm-generic/kpkeys.h index cec92334a9f3..56a2fc9fe4a6 100644 --- a/include/asm-generic/kpkeys.h +++ b/include/asm-generic/kpkeys.h @@ -2,6 +2,10 @@ #ifndef __ASM_GENERIC_KPKEYS_H #define __ASM_GENERIC_KPKEYS_H +#ifndef KPKEYS_PKEY_CRED +#define KPKEYS_PKEY_CRED 2 +#endif + #ifndef KPKEYS_PKEY_PGTABLES #define KPKEYS_PKEY_PGTABLES 1 #endif diff --git a/include/linux/kpkeys.h b/include/linux/kpkeys.h index 48f240bea8e1..0e555b505b33 100644 --- a/include/linux/kpkeys.h +++ b/include/linux/kpkeys.h @@ -10,7 +10,8 @@ struct folio; #define KPKEYS_LVL_DEFAULT 0 #define KPKEYS_LVL_PGTABLES 1 -#define KPKEYS_LVL_UNRESTRICTED 2 +#define KPKEYS_LVL_CRED 2 +#define KPKEYS_LVL_UNRESTRICTED 3 #define KPKEYS_LVL_MIN KPKEYS_LVL_DEFAULT #define KPKEYS_LVL_MAX KPKEYS_LVL_UNRESTRICTED -- 2.47.0 This patch introduces a feature to prevent unintended modifications of live credentials, by moving them to protected memory when they are installed via commit_creds(). The protection mechanism is kernel pkeys (kpkeys): protected memory is mapped with a non-default pkey and write access is disabled by default. As a result, task->{cred,real_cred} can only be written to by switching to a higher kpkeys level. The kpkeys_hardened_cred feature is enabled by choosing CONFIG_KPKEYS_HARDENED_CRED=y and running on a system supporting kpkeys. Credentials are not directly allocated in protected memory, as that would force all code preparing new credentials to switch kpkeys level. To avoid such disruption, prepare_creds() and variants still allocate standard memory. When commit_creds() is called, the credentials are copied to protected memory, and the temporary object (in a standard kmalloc slab) is freed. This approach does not work so transparently when it comes to override_creds(), because it does not consume the reference: the object it gets passed cannot be moved. Callers of override_creds() will need to explicitly call a new protect_creds() helper to move the credentials to protected memory once they are done preparing them. Some of these callers use the unmodified output of prepare_creds(); prepare_protected_creds() is introduced to avoid an unnecessary copy in such cases. This patch does not handle these situations, but it does not break them either (credentials installed by override_creds() will simply be unprotected). Various helpers need to modify live credentials. To that end, guard(kpkeys_hardened_cred) is introduced to switch to the kpkeys level that enables write access to KPKEYS_PKEY_CRED. Signed-off-by: Kevin Brodsky --- include/linux/cred.h | 12 +++ kernel/cred.c | 179 +++++++++++++++++++++++++++++++------ security/Kconfig.hardening | 13 +++ 3 files changed, 177 insertions(+), 27 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index a102a10f833f..8eacc4f3de60 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -16,10 +16,17 @@ #include #include #include +#include struct cred; struct inode; +#ifdef CONFIG_KPKEYS_HARDENED_CRED +KPKEYS_GUARD(kpkeys_hardened_cred, KPKEYS_LVL_CRED) +#else +KPKEYS_GUARD_NOOP(kpkeys_hardened_cred) +#endif + /* * COW Supplementary groups list */ @@ -162,6 +169,8 @@ extern int set_create_files_as(struct cred *, struct inode *); extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); extern int set_cred_ucounts(struct cred *); +extern struct cred *prepare_protected_creds(void); +extern struct cred *protect_creds(struct cred *); static inline bool cap_ambient_invariant_ok(const struct cred *cred) { @@ -199,6 +208,7 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; + guard(kpkeys_hardened_cred)(); nonconst_cred->non_rcu = 0; atomic_long_add(nr, &nonconst_cred->usage); return cred; @@ -223,6 +233,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; + guard(kpkeys_hardened_cred)(); if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; nonconst_cred->non_rcu = 0; @@ -246,6 +257,7 @@ static inline void put_cred_many(const struct cred *_cred, int nr) struct cred *cred = (struct cred *) _cred; if (cred) { + guard(kpkeys_hardened_cred)(); if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); } diff --git a/kernel/cred.c b/kernel/cred.c index 9676965c0981..95d316f73786 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -20,6 +20,8 @@ #include #include +#include "../mm/slab.h" + #if 0 #define kdebug(FMT, ...) \ printk("[%-5.5s%5u] " FMT "\n", \ @@ -62,6 +64,48 @@ struct cred init_cred = { .ucounts = &init_ucounts, }; +static bool hardened_cred_enabled(void) +{ + return IS_ENABLED(CONFIG_KPKEYS_HARDENED_CRED) && arch_kpkeys_enabled(); +} + +static bool cred_is_protected(const struct cred *cred) +{ + struct slab *slab; + + slab = virt_to_slab(cred); + if (!slab) + return false; + + return slab->slab_cache->flags & SLAB_SET_PKEY; +} + +static struct cred *alloc_unprotected_creds(gfp_t flags) +{ + if (hardened_cred_enabled()) + return kmalloc(sizeof(struct cred), flags); + else + return kmem_cache_alloc(cred_jar, flags); +} + +static struct cred *alloc_protected_creds(gfp_t flags) +{ + return kmem_cache_alloc(cred_jar, flags); +} + +static void free_creds(struct cred *cred) +{ + bool cred_in_jar = true; + + if (hardened_cred_enabled()) + cred_in_jar = cred_is_protected(cred); + + if (cred_in_jar) + kmem_cache_free(cred_jar, cred); + else + kfree(cred); +} + /* * The RCU callback to actually dispose of a set of credentials */ @@ -75,7 +119,8 @@ static void put_cred_rcu(struct rcu_head *rcu) panic("CRED: put_cred_rcu() sees %p with usage %ld\n", cred, atomic_long_read(&cred->usage)); - security_cred_free(cred); + scoped_guard(kpkeys_hardened_cred) + security_cred_free(cred); key_put(cred->session_keyring); key_put(cred->process_keyring); key_put(cred->thread_keyring); @@ -86,7 +131,7 @@ static void put_cred_rcu(struct rcu_head *rcu) if (cred->ucounts) put_ucounts(cred->ucounts); put_user_ns(cred->user_ns); - kmem_cache_free(cred_jar, cred); + free_creds(cred); } /** @@ -174,7 +219,7 @@ struct cred *cred_alloc_blank(void) { struct cred *new; - new = kmem_cache_zalloc(cred_jar, GFP_KERNEL); + new = alloc_unprotected_creds(GFP_KERNEL | __GFP_ZERO); if (!new) return NULL; @@ -189,29 +234,10 @@ struct cred *cred_alloc_blank(void) return NULL; } -/** - * prepare_creds - Prepare a new set of credentials for modification - * - * Prepare a new set of task credentials for modification. A task's creds - * shouldn't generally be modified directly, therefore this function is used to - * prepare a new copy, which the caller then modifies and then commits by - * calling commit_creds(). - * - * Preparation involves making a copy of the objective creds for modification. - * - * Returns a pointer to the new creds-to-be if successful, NULL otherwise. - * - * Call commit_creds() or abort_creds() to clean up. - */ -struct cred *prepare_creds(void) +static struct cred *__prepare_creds(struct cred *new) { struct task_struct *task = current; const struct cred *old; - struct cred *new; - - new = kmem_cache_alloc(cred_jar, GFP_KERNEL); - if (!new) - return NULL; kdebug("prepare_creds() alloc %p", new); @@ -248,8 +274,57 @@ struct cred *prepare_creds(void) abort_creds(new); return NULL; } + +/** + * prepare_creds - Prepare a new set of credentials for modification + * + * Prepare a new set of task credentials for modification. A task's creds + * shouldn't generally be modified directly, therefore this function is used to + * prepare a new copy, which the caller then modifies and then commits by + * calling commit_creds(). + * + * Preparation involves making a copy of the objective creds for modification. + * + * Returns a pointer to the new creds-to-be if successful, NULL otherwise. + * + * Call commit_creds() or abort_creds() to clean up. + */ +struct cred *prepare_creds(void) +{ + struct cred *new; + + new = alloc_unprotected_creds(GFP_KERNEL); + if (!new) + return NULL; + + return __prepare_creds(new); +} EXPORT_SYMBOL(prepare_creds); + +/** + * prepare_protected_creds - Prepare a new set of credentials in protected + * memory + * + * This function is equivalent to protect_creds(prepare_creds()), but avoids + * the copy in prepare_creds() by directly allocating the credentials in + * protected memory. The returned object may only be modified by switching to + * a higher kpkeys level, if kpkeys_hardened_cred is enabled. + */ +struct cred *prepare_protected_creds(void) +{ + struct cred *new; + + new = alloc_protected_creds(GFP_KERNEL); + if (!new) + return NULL; + + guard(kpkeys_hardened_cred)(); + + return __prepare_creds(new); +} +EXPORT_SYMBOL(prepare_protected_creds); + /* * Prepare credentials for current to perform an execve() * - The caller must hold ->cred_guard_mutex @@ -309,7 +384,9 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) return 0; } - new = prepare_creds(); + guard(kpkeys_hardened_cred)(); + + new = prepare_protected_creds(); if (!new) return -ENOMEM; @@ -400,6 +477,10 @@ int commit_creds(struct cred *new) BUG_ON(task->cred != old); BUG_ON(atomic_long_read(&new->usage) < 1); + guard(kpkeys_hardened_cred)(); + + new = protect_creds(new); + get_cred(new); /* we will require a ref for the subj creds too */ /* dumpability changes */ @@ -555,9 +636,16 @@ int set_cred_ucounts(struct cred *new) */ void __init cred_init(void) { + slab_flags_t flags = SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT; + struct kmem_cache_args args = {}; + + if (hardened_cred_enabled()) { + flags |= SLAB_SET_PKEY; + args.pkey = KPKEYS_PKEY_CRED; + } + /* allocate a slab in which we can store credentials */ - cred_jar = KMEM_CACHE(cred, - SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); + cred_jar = kmem_cache_create("cred", sizeof(struct cred), &args, flags); } /** @@ -584,7 +672,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (WARN_ON_ONCE(!daemon)) return NULL; - new = kmem_cache_alloc(cred_jar, GFP_KERNEL); + new = alloc_unprotected_creds(GFP_KERNEL); if (!new) return NULL; @@ -627,6 +715,43 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) } EXPORT_SYMBOL(prepare_kernel_cred); +/** + * protect_creds - Move a set of credentials to protected memory + * @cred: The credentials to protect + * + * If kpkeys_hardened_cred is enabled, this function transfers @cred to + * protected memory. The returned object may only be modified by switching to a + * higher kpkeys level, for instance by using guard(kpkeys_hardened_cred). + * + * Because the credentials are copied to a new location and the old location is + * freed, any exising reference to @cred becomes invalid after this function is + * called. For this reason only the caller should have a reference to @cred. + * + * If any failure occurs, or if kpkeys_hardened_cred is disabled, @cred is + * returned unmodified. + */ +struct cred *protect_creds(struct cred *cred) +{ + struct cred *protected_cred; + + if (!hardened_cred_enabled()) + return cred; + + if (WARN_ON(atomic_long_read(&cred->usage) != 1)) + return cred; + + protected_cred = alloc_protected_creds(GFP_KERNEL); + if (WARN_ON(!protected_cred)) + return cred; + + guard(kpkeys_hardened_cred)(); + + *protected_cred = *cred; + kfree(cred); + return protected_cred; +} +EXPORT_SYMBOL(protect_creds); + /** * set_security_override - Set the security ID in a set of credentials * @new: The credentials to alter diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 653663008096..cb494448c7ae 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -289,6 +289,19 @@ config KPKEYS_HARDENED_PGTABLES_KUNIT_TEST If unsure, say N. +config KPKEYS_HARDENED_CRED + bool "Harden task credentials using kernel pkeys" + depends on ARCH_HAS_KPKEYS + select KPKEYS_UNRESTRICTED_RCU + help + This option enforces the immutability of tasks credentials + (struct cred) by allocating them with a non-default protection (pkey) + and only enabling write access to that pkey in a limited set of cred + helpers. + + This option has no effect if the system does not support + kernel pkeys. + endmenu config CC_HAS_RANDSTRUCT -- 2.47.0 The kpkeys_hardened_cred feature, when enabled, automatically protects credentials installed by commit_creds(). However, because override_creds() does not consume its argument, it is up to its callers to protect the credentials before calling override_creds(). This is done by calling protect_creds(), moving the credentials to a protected memory location. In some cases, the credentials returned by prepare_creds() are passed to override_creds() as-is. In such situation where write access to the credentials is not needed, prepare_protected_creds() is used to avoid the copy incurred by a separate call to protect_creds(). This patch covers the main users of override_creds(), but it is not comprehensive. This patch is a no-op if kpkeys_hardened_cred isn't enabled. Signed-off-by: Kevin Brodsky --- fs/aio.c | 2 +- fs/fuse/passthrough.c | 2 +- fs/nfs/nfs4idmap.c | 2 +- fs/nfsd/auth.c | 2 +- fs/nfsd/nfs4recover.c | 2 +- fs/nfsd/nfsfh.c | 2 +- fs/open.c | 2 +- fs/overlayfs/dir.c | 1 + fs/overlayfs/super.c | 2 +- 9 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 7fc7b6221312..7529399bb71d 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1658,7 +1658,7 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, if (unlikely(!req->file->f_op->fsync)) return -EINVAL; - req->creds = prepare_creds(); + req->creds = prepare_protected_creds(); if (!req->creds) return -ENOMEM; diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index 607ef735ad4a..4451651b1e51 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -248,7 +248,7 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) goto out_fput; fb->file = file; - fb->cred = prepare_creds(); + fb->cred = prepare_protected_creds(); refcount_set(&fb->count, 1); res = fuse_backing_id_alloc(fc, fb); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 00932500fce4..6eef34b02513 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -228,7 +228,7 @@ int nfs_idmap_init(void) set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; - id_resolver_cache = cred; + id_resolver_cache = protect_creds(cred); return 0; failed_reg_legacy: diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 4dc327e02456..09b377a97147 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -79,7 +79,7 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - put_cred(override_creds(new)); + put_cred(override_creds(protect_creds(new))); return 0; oom: diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 2231192ec33f..63ffa7936246 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -82,7 +82,7 @@ nfs4_save_creds(const struct cred **original_creds) new->fsuid = GLOBAL_ROOT_UID; new->fsgid = GLOBAL_ROOT_GID; - *original_creds = override_creds(new); + *original_creds = override_creds(protect_creds(new)); return 0; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 74cf1f4de174..887ee5adb2dc 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -223,7 +223,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - put_cred(override_creds(new)); + put_cred(override_creds(protect_creds(new))); } else { error = nfsd_setuser_and_check_port(rqstp, cred, exp); if (error) diff --git a/fs/open.c b/fs/open.c index 9655158c3885..351ac9e86a15 100644 --- a/fs/open.c +++ b/fs/open.c @@ -461,7 +461,7 @@ static const struct cred *access_override_creds(void) * freeing. */ override_cred->non_rcu = 1; - return override_creds(override_cred); + return override_creds(protect_creds(override_cred)); } static int do_faccessat(int dfd, const char __user *filename, int mode, int flags) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 70b8687dc45e..7e7d4f26198d 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -575,6 +575,7 @@ static const struct cred *ovl_setup_cred_for_create(struct dentry *dentry, * We must be called with creator creds already, otherwise we risk * leaking creds. */ + override_cred = protect_creds(override_cred); old_cred = override_creds(override_cred); WARN_ON_ONCE(old_cred != ovl_creds(dentry->d_sb)); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index df85a76597e9..0a45760ff7ae 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1326,7 +1326,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) err = -ENOMEM; if (!ofs->creator_cred) - ofs->creator_cred = cred = prepare_creds(); + ofs->creator_cred = cred = prepare_protected_creds(); else cred = (struct cred *)ofs->creator_cred; if (!cred) -- 2.47.0 Add basic tests for the kpkeys_hardened_pgtables feature: try to perform a direct write to current->{cred,real_cred} and ensure it fails. Also check that prepare_creds, protect_creds, prepare_protected_creds behave as expected. Signed-off-by: Kevin Brodsky --- mm/Makefile | 1 + mm/tests/kpkeys_hardened_cred_kunit.c | 79 +++++++++++++++++++++++++++ security/Kconfig.hardening | 11 ++++ 3 files changed, 91 insertions(+) create mode 100644 mm/tests/kpkeys_hardened_cred_kunit.c diff --git a/mm/Makefile b/mm/Makefile index b1e6cf7f753c..c79af57c0aa5 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -149,3 +149,4 @@ obj-$(CONFIG_TMPFS_QUOTA) += shmem_quota.o obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o obj-$(CONFIG_KPKEYS_HARDENED_PGTABLES) += kpkeys_hardened_pgtables.o obj-$(CONFIG_KPKEYS_HARDENED_PGTABLES_KUNIT_TEST) += tests/kpkeys_hardened_pgtables_kunit.o +obj-$(CONFIG_KPKEYS_HARDENED_CRED_KUNIT_TEST) += tests/kpkeys_hardened_cred_kunit.o diff --git a/mm/tests/kpkeys_hardened_cred_kunit.c b/mm/tests/kpkeys_hardened_cred_kunit.c new file mode 100644 index 000000000000..ed07469b504c --- /dev/null +++ b/mm/tests/kpkeys_hardened_cred_kunit.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +static int increment_cred_uid_nofault(struct cred *cred) +{ + uid_t val = __kuid_val(cred->uid) + 1; + + return copy_to_kernel_nofault(&cred->uid, &val, sizeof(cred->uid)); +} + +static void write_current_creds(struct kunit *test) +{ + int ret; + + if (!arch_kpkeys_enabled()) + kunit_skip(test, "kpkeys are not supported"); + + ret = increment_cred_uid_nofault((struct cred *)current->cred); + KUNIT_EXPECT_EQ_MSG(test, ret, -EFAULT, + "Write to current->cred wasn't prevented"); + + ret = increment_cred_uid_nofault((struct cred *)current->real_cred); + KUNIT_EXPECT_EQ_MSG(test, ret, -EFAULT, + "Write to current->real_cred wasn't prevented"); +} + +static void write_new_creds(struct kunit *test) +{ + struct cred *cred, *protected_cred; + int ret; + + if (!arch_kpkeys_enabled()) + kunit_skip(test, "kpkeys are not supported"); + + /* prepare_creds() + protect_creds() */ + cred = prepare_creds(); + KUNIT_ASSERT_NOT_NULL(test, cred); + + ret = increment_cred_uid_nofault(cred); + KUNIT_EXPECT_EQ_MSG(test, ret, 0, + "Failed to write to unprotected creds"); + + protected_cred = protect_creds(cred); + KUNIT_EXPECT_PTR_NE_MSG(test, cred, protected_cred, + "protect_creds() failed to move creds to protected memory"); + + ret = increment_cred_uid_nofault(protected_cred); + KUNIT_EXPECT_EQ_MSG(test, ret, -EFAULT, + "Write to protected_cred wasn't prevented"); + + put_cred(protected_cred); + + /* prepare_protected_creds() */ + protected_cred = prepare_protected_creds(); + + ret = increment_cred_uid_nofault(protected_cred); + KUNIT_EXPECT_EQ_MSG(test, ret, -EFAULT, + "Write to protected_cred wasn't prevented"); + + put_cred(protected_cred); + +} + +static struct kunit_case kpkeys_hardened_cred_test_cases[] = { + KUNIT_CASE(write_current_creds), + KUNIT_CASE(write_new_creds), + {} +}; + +static struct kunit_suite kpkeys_hardened_cred_test_suite = { + .name = "Hardened credentials using kpkeys", + .test_cases = kpkeys_hardened_cred_test_cases, +}; +kunit_test_suite(kpkeys_hardened_cred_test_suite); + +MODULE_DESCRIPTION("Tests for the kpkeys_hardened_cred feature"); +MODULE_LICENSE("GPL"); diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index cb494448c7ae..7ceb1e6846f2 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -302,6 +302,17 @@ config KPKEYS_HARDENED_CRED This option has no effect if the system does not support kernel pkeys. +config KPKEYS_HARDENED_CRED_KUNIT_TEST + tristate "KUnit tests for kpkeys_hardened_cred" if !KUNIT_ALL_TESTS + depends on KPKEYS_HARDENED_CRED + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to check that the kpkeys_hardened_cred feature + functions as intended, i.e. prevents arbitrary writes to live credentials. + + If unsure, say N. + endmenu config CC_HAS_RANDSTRUCT -- 2.47.0