From: Wenchao Hao With class_idx encoded in obj, zs_free() can locate the size_class without holding pool->lock on 64-bit systems. Page migration also takes class->lock and only rewrites the PFN field of obj, so: 1. read obj locklessly, 2. lock the size_class derived from obj's class_idx, 3. re-read obj under class->lock to get a stable PFN. This eliminates the rwlock read-side cacheline bouncing between zs_free() and migration/compaction on multi-core systems. Annotate handle_to_obj()/record_obj() with READ_ONCE()/WRITE_ONCE() to prevent load/store tearing on the lockless read path and silence KCSAN data race reports. When ZS_OBJ_CLASS_BITS == 0 (32-bit, or 64-bit with obj too narrow to hold class_idx), zs_free() keeps pool->lock. Signed-off-by: Wenchao Hao --- mm/zsmalloc.c | 75 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index f84258d63917..fe20ab297542 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -21,6 +21,10 @@ * pool->lock * class->lock * zspage->lock + * + * When ZS_OBJ_CLASS_BITS > 0, zs_free() skips pool->lock; it picks + * the size_class from obj's encoded class_idx and serializes against + * page migration via class->lock. */ #include @@ -457,10 +461,13 @@ static void cache_free_zspage(struct zspage *zspage) kmem_cache_free(zspage_cachep, zspage); } -/* class->lock(which owns the handle) synchronizes races */ +/* + * Pairs with READ_ONCE() in handle_to_obj(): zs_free() may read the + * handle locklessly, so prevent store tearing here. + */ static void record_obj(unsigned long handle, unsigned long obj) { - *(unsigned long *)handle = obj; + WRITE_ONCE(*(unsigned long *)handle, obj); } static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc) @@ -817,7 +824,7 @@ static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx static unsigned long handle_to_obj(unsigned long handle) { - return *(unsigned long *)handle; + return READ_ONCE(*(unsigned long *)handle); } static inline bool obj_allocated(struct zpdesc *zpdesc, void *obj, @@ -1451,10 +1458,58 @@ static void obj_free(int class_size, unsigned long obj) mod_zspage_inuse(zspage, -1); } +/* + * Resolve @handle to its zspage / size_class and acquire class->lock. + * + * When class_idx is encoded in obj (ZS_OBJ_CLASS_BITS > 0), it is + * invariant under page migration, so the handle can be read locklessly + * to pick the size_class. Once class->lock is held migration is + * blocked and the handle is re-read to obtain a stable PFN. + * + * Otherwise (32-bit, or 64-bit fallback paths like UML where the + * encoding is disabled), fall back to pool->lock for the lookup. + */ +#if ZS_OBJ_CLASS_BITS > 0 +static inline void obj_handle_class_lock(struct zs_pool *pool, unsigned long handle, + unsigned long *objp, struct zspage **zspagep, + struct size_class **classp) + __acquires(&(*classp)->lock) +{ + struct zpdesc *f_zpdesc; + unsigned long obj; + + obj = handle_to_obj(handle); + *classp = pool->size_class[obj_to_class_idx(obj)]; + spin_lock(&(*classp)->lock); + /* Re-read under class->lock: PFN is now stable vs migration. */ + obj = handle_to_obj(handle); + obj_to_zpdesc(obj, &f_zpdesc); + *zspagep = get_zspage(f_zpdesc); + *objp = obj; +} +#else +static inline void obj_handle_class_lock(struct zs_pool *pool, unsigned long handle, + unsigned long *objp, struct zspage **zspagep, + struct size_class **classp) + __acquires(&(*classp)->lock) +{ + struct zpdesc *f_zpdesc; + unsigned long obj; + + read_lock(&pool->lock); + obj = handle_to_obj(handle); + obj_to_zpdesc(obj, &f_zpdesc); + *zspagep = get_zspage(f_zpdesc); + *classp = zspage_class(pool, *zspagep); + spin_lock(&(*classp)->lock); + read_unlock(&pool->lock); + *objp = obj; +} +#endif + void zs_free(struct zs_pool *pool, unsigned long handle) { struct zspage *zspage; - struct zpdesc *f_zpdesc; unsigned long obj; struct size_class *class; int fullness; @@ -1462,17 +1517,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle) if (IS_ERR_OR_NULL((void *)handle)) return; - /* - * The pool->lock protects the race with zpage's migration - * so it's safe to get the page from handle. - */ - read_lock(&pool->lock); - obj = handle_to_obj(handle); - obj_to_zpdesc(obj, &f_zpdesc); - zspage = get_zspage(f_zpdesc); - class = zspage_class(pool, zspage); - spin_lock(&class->lock); - read_unlock(&pool->lock); + obj_handle_class_lock(pool, handle, &obj, &zspage, &class); class_stat_sub(class, ZS_OBJS_INUSE, 1); obj_free(class->size, obj); -- 2.34.1