k[v]free_rcu() repurposes two fields of struct rcu_head: 'func' to store the start address of the object, and 'next' to link objects. However, using 'func' to store the start address is unnecessary: 1. slab can get the start address from the address of struct rcu_head field via nearest_obj(), and 2. vmalloc and large kmalloc can get the start address by aligning down the address of the struct rcu_head field to the page boundary. Therefore, allow an 8-byte (on 64-bit) field (of a new type called struct rcu_ptr) to be used with k[v]free_rcu() with two arguments. Some users use both call_rcu() and k[v]free_rcu() to process callbacks (e.g., maple tree), so it makes sense to have struct rcu_head field to handle both cases. However, many users that simply free objects via kvfree_rcu() can save one pointer by using struct rcu_ptr instead of struct rcu_head. Note that struct rcu_ptr is a single pointer only when CONFIG_KVFREE_RCU_BATCHED=y. To keep kvfree_rcu() implementation minimal when CONFIG_KVFREE_RCU_BATCHED is disabled, struct rcu_ptr is the size as struct rcu_head, and the implementation of kvfree_rcu() remains unchanged in that configuration. Suggested-by: Alexei Starovoitov Signed-off-by: Harry Yoo --- include/linux/rcupdate.h | 61 +++++++++++++++++++++++++++------------- include/linux/types.h | 9 ++++++ mm/slab_common.c | 40 +++++++++++++++----------- 3 files changed, 75 insertions(+), 35 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c5b30054cd01..8924edf7e8c1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1059,22 +1059,30 @@ static inline void rcu_read_unlock_migrate(void) /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree for double-argument invocations. - * @rhf: the name of the struct rcu_head within the type of @ptr. + * @rf: the name of the struct rcu_head or struct rcu_ptr within the type of @ptr. * * Many rcu callbacks functions just call kfree() on the base structure. * These functions are trivial, but their size adds up, and furthermore * when they are used in a kernel module, that module must invoke the * high-latency rcu_barrier() function at module-unload time. + * The kfree_rcu() function handles this issue by batching. * - * The kfree_rcu() function handles this issue. In order to have a universal - * callback function handling different offsets of rcu_head, the callback needs - * to determine the starting address of the freed object, which can be a large - * kmalloc or vmalloc allocation. To allow simply aligning the pointer down to - * page boundary for those, only offsets up to 4095 bytes can be accommodated. - * If the offset is larger than 4095 bytes, a compile-time error will - * be generated in kvfree_rcu_arg_2(). If this error is triggered, you can - * either fall back to use of call_rcu() or rearrange the structure to - * position the rcu_head structure into the first 4096 bytes. + * Typically, struct rcu_head is used to process RCU callbacks, but it requires + * two pointers. However, since kfree_rcu() uses kfree() as the callback + * function, it can process callbacks with struct rcu_ptr, which is only + * one pointer in size (unless !CONFIG_KVFREE_RCU_BATCHED). + * + * The type of @rf can be either struct rcu_head or struct rcu_ptr, and when + * possible, it is recommended to use struct rcu_ptr due to its smaller size. + * + * In order to have a universal callback function handling different offsets + * of @rf, the callback needs to determine the starting address of the freed + * object, which can be a large kmalloc or vmalloc allocation. To allow simply + * aligning the pointer down to page boundary for those, only offsets up to + * 4095 bytes can be accommodated. If the offset is larger than 4095 bytes, + * a compile-time error will be generated in kvfree_rcu_arg_2(). + * If this error is triggered, you can either fall back to use of call_rcu() + * or rearrange the structure to position @rf into the first 4096 bytes. * * The object to be freed can be allocated either by kmalloc() or * kmem_cache_alloc(). @@ -1084,8 +1092,8 @@ static inline void rcu_read_unlock_migrate(void) * The BUILD_BUG_ON check must not involve any function calls, hence the * checks are done in macros here. */ -#define kfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf) -#define kvfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf) +#define kfree_rcu(ptr, rf) kvfree_rcu_arg_2(ptr, rf) +#define kvfree_rcu(ptr, rf) kvfree_rcu_arg_2(ptr, rf) /** * kfree_rcu_mightsleep() - kfree an object after a grace period. @@ -1107,22 +1115,37 @@ static inline void rcu_read_unlock_migrate(void) #define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) #define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) -/* - * In mm/slab_common.c, no suitable header to include here. - */ -void kvfree_call_rcu(struct rcu_head *head, void *ptr); + +#ifdef CONFIG_KVFREE_RCU_BATCHED +void kvfree_call_rcu_ptr(struct rcu_ptr *head, void *ptr); +#define kvfree_call_rcu(head, ptr) \ + _Generic((head), \ + struct rcu_head *: kvfree_call_rcu_ptr, \ + struct rcu_ptr *: kvfree_call_rcu_ptr, \ + void *: kvfree_call_rcu_ptr \ + )((struct rcu_ptr *)(head), (ptr)) +#else +void kvfree_call_rcu_head(struct rcu_head *head, void *ptr); +static_assert(sizeof(struct rcu_head) == sizeof(struct rcu_ptr)); +#define kvfree_call_rcu(head, ptr) \ + _Generic((head), \ + struct rcu_head *: kvfree_call_rcu_head, \ + struct rcu_ptr *: kvfree_call_rcu_head, \ + void *: kvfree_call_rcu_head \ + )((struct rcu_head *)(head), (ptr)) +#endif /* * The BUILD_BUG_ON() makes sure the rcu_head offset can be handled. See the * comment of kfree_rcu() for details. */ -#define kvfree_rcu_arg_2(ptr, rhf) \ +#define kvfree_rcu_arg_2(ptr, rf) \ do { \ typeof (ptr) ___p = (ptr); \ \ if (___p) { \ - BUILD_BUG_ON(offsetof(typeof(*(ptr)), rhf) >= 4096); \ - kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \ + BUILD_BUG_ON(offsetof(typeof(*(ptr)), rf) >= 4096); \ + kvfree_call_rcu(&((___p)->rf), (void *) (___p)); \ } \ } while (0) diff --git a/include/linux/types.h b/include/linux/types.h index d4437e9c452c..e5596ebab29c 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -245,6 +245,15 @@ struct callback_head { } __attribute__((aligned(sizeof(void *)))); #define rcu_head callback_head + +struct rcu_ptr { +#ifdef CONFIG_KVFREE_RCU_BATCHED + struct rcu_ptr *next; +#else + struct callback_head; +#endif +} __attribute__((aligned(sizeof(void *)))); + typedef void (*rcu_callback_t)(struct rcu_head *head); typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func); diff --git a/mm/slab_common.c b/mm/slab_common.c index d5a70a831a2a..3ec99a5463d3 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1265,7 +1265,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); #ifndef CONFIG_KVFREE_RCU_BATCHED -void kvfree_call_rcu(struct rcu_head *head, void *ptr) +void kvfree_call_rcu_head(struct rcu_head *head, void *ptr) { if (head) { kasan_record_aux_stack(ptr); @@ -1278,7 +1278,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr) synchronize_rcu(); kvfree(ptr); } -EXPORT_SYMBOL_GPL(kvfree_call_rcu); +EXPORT_SYMBOL_GPL(kvfree_call_rcu_head); void __init kvfree_rcu_init(void) { @@ -1346,7 +1346,7 @@ struct kvfree_rcu_bulk_data { struct kfree_rcu_cpu_work { struct rcu_work rcu_work; - struct rcu_head *head_free; + struct rcu_ptr *head_free; struct rcu_gp_oldstate head_free_gp_snap; struct list_head bulk_head_free[FREE_N_CHANNELS]; struct kfree_rcu_cpu *krcp; @@ -1381,8 +1381,7 @@ struct kfree_rcu_cpu_work { */ struct kfree_rcu_cpu { // Objects queued on a linked list - // through their rcu_head structures. - struct rcu_head *head; + struct rcu_ptr *head; unsigned long head_gp_snap; atomic_t head_count; @@ -1523,18 +1522,28 @@ kvfree_rcu_bulk(struct kfree_rcu_cpu *krcp, } static void -kvfree_rcu_list(struct rcu_head *head) +kvfree_rcu_list(struct rcu_ptr *head) { - struct rcu_head *next; + struct rcu_ptr *next; for (; head; head = next) { - void *ptr = (void *) head->func; - unsigned long offset = (void *) head - ptr; + void *ptr; + unsigned long offset; + struct slab *slab; + + slab = virt_to_slab(head); + if (is_vmalloc_addr(head) || !slab) + ptr = (void *)PAGE_ALIGN_DOWN((unsigned long)head); + else + ptr = nearest_obj(slab->slab_cache, slab, head); + offset = (void *)head - ptr; next = head->next; debug_rcu_head_unqueue((struct rcu_head *)ptr); rcu_lock_acquire(&rcu_callback_map); - trace_rcu_invoke_kvfree_callback("slab", head, offset); + trace_rcu_invoke_kvfree_callback("slab", + (struct rcu_head *)head, + offset); kvfree(ptr); @@ -1552,7 +1561,7 @@ static void kfree_rcu_work(struct work_struct *work) unsigned long flags; struct kvfree_rcu_bulk_data *bnode, *n; struct list_head bulk_head[FREE_N_CHANNELS]; - struct rcu_head *head; + struct rcu_ptr *head; struct kfree_rcu_cpu *krcp; struct kfree_rcu_cpu_work *krwp; struct rcu_gp_oldstate head_gp_snap; @@ -1675,7 +1684,7 @@ kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp) { struct list_head bulk_ready[FREE_N_CHANNELS]; struct kvfree_rcu_bulk_data *bnode, *n; - struct rcu_head *head_ready = NULL; + struct rcu_ptr *head_ready = NULL; unsigned long flags; int i; @@ -1938,7 +1947,7 @@ void __init kfree_rcu_scheduler_running(void) * be free'd in workqueue context. This allows us to: batch requests together to * reduce the number of grace periods during heavy kfree_rcu()/kvfree_rcu() load. */ -void kvfree_call_rcu(struct rcu_head *head, void *ptr) +void kvfree_call_rcu_ptr(struct rcu_ptr *head, void *ptr) { unsigned long flags; struct kfree_rcu_cpu *krcp; @@ -1960,7 +1969,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr) // Queue the object but don't yet schedule the batch. if (debug_rcu_head_queue(ptr)) { // Probable double kfree_rcu(), just leak. - WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n", + WARN_ONCE(1, "%s(): Double-freed call. rcu_ptr %p\n", __func__, head); // Mark as success and leave. @@ -1976,7 +1985,6 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr) // Inline if kvfree_rcu(one_arg) call. goto unlock_return; - head->func = ptr; head->next = krcp->head; WRITE_ONCE(krcp->head, head); atomic_inc(&krcp->head_count); @@ -2012,7 +2020,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr) kvfree(ptr); } } -EXPORT_SYMBOL_GPL(kvfree_call_rcu); +EXPORT_SYMBOL_GPL(kvfree_call_rcu_ptr); static inline void __kvfree_rcu_barrier(void) { -- 2.43.0