The possible vma->vm_refcnt values are confusing and vague, explain in detail what these can be in a comment describing the vma->vm_refcnt field and reference this comment in various places that read/write this field. No functional change intended. Signed-off-by: Lorenzo Stoakes --- include/linux/mm_types.h | 42 +++++++++++++++++++++++++++++++++++++-- include/linux/mmap_lock.h | 7 +++++++ mm/mmap_lock.c | 6 ++++++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bdbf17c4f26b..12281a1128c9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -758,7 +758,8 @@ static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) * set the VM_REFCNT_EXCLUDE_READERS_FLAG in vma->vm_refcnt to indiciate to * vma_start_read() that the reference count should be left alone. * - * Once the operation is complete, this value is subtracted from vma->vm_refcnt. + * See the comment describing vm_refcnt in vm_area_struct for details as to + * which values the VMA reference count can be. */ #define VM_REFCNT_EXCLUDE_READERS_BIT (30) #define VM_REFCNT_EXCLUDE_READERS_FLAG (1U << VM_REFCNT_EXCLUDE_READERS_BIT) @@ -989,7 +990,44 @@ struct vm_area_struct { struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif #ifdef CONFIG_PER_VMA_LOCK - /* Unstable RCU readers are allowed to read this. */ + /* + * Used to keep track of firstly, whether the VMA is attached, secondly, + * if attached, how many read locks are taken, and thirdly, if the + * VM_REFCNT_EXCLUDE_READERS_FLAG is set, whether any read locks held + * are currently in the process of being excluded. + * + * This value can be equal to: + * + * 0 - Detached. IMPORTANT: when the refcnt is zero, readers cannot + * increment it. + * + * 1 - Attached and either unlocked or write-locked. Write locks are + * identified via __is_vma_write_locked() which checks for equality of + * vma->vm_lock_seq and mm->mm_lock_seq. + * + * >1, < VM_REFCNT_EXCLUDE_READERS_FLAG - Read-locked or (unlikely) + * write-locked with other threads having temporarily incremented the + * reference count prior to determining it is write-locked and + * decrementing it again. + * + * VM_REFCNT_EXCLUDE_READERS_FLAG - Detached, pending + * __vma_exit_locked() completion which will decrement the reference + * count to zero. IMPORTANT - at this stage no further readers can + * increment the reference count. It can only be reduced. + * + * VM_REFCNT_EXCLUDE_READERS_FLAG + 1 - A thread is either write-locking + * an attached VMA and has yet to invoke __vma_exit_locked(), OR a + * thread is detaching a VMA and is waiting on a single spurious reader + * in order to decrement the reference count. IMPORTANT - as above, no + * further readers can increment the reference count. + * + * > VM_REFCNT_EXCLUDE_READERS_FLAG + 1 - A thread is either + * write-locking or detaching a VMA is waiting on readers to + * exit. IMPORTANT - as above, no ruther readers can increment the + * reference count. + * + * NOTE: Unstable RCU readers are allowed to read this. + */ refcount_t vm_refcnt ____cacheline_aligned_in_smp; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map vmlock_dep_map; diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 5acbd4ba1b52..a764439d0276 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -130,6 +130,9 @@ static inline bool is_vma_writer_only(int refcnt) * attached. Waiting on a detached vma happens only in * vma_mark_detached() and is a rare case, therefore most of the time * there will be no unnecessary wakeup. + * + * See the comment describing the vm_area_struct->vm_refcnt field for + * details of possible refcnt values. */ return (refcnt & VM_REFCNT_EXCLUDE_READERS_FLAG) && refcnt <= VM_REFCNT_EXCLUDE_READERS_FLAG + 1; @@ -249,6 +252,10 @@ static inline void vma_assert_locked(struct vm_area_struct *vma) { unsigned int mm_lock_seq; + /* + * See the comment describing the vm_area_struct->vm_refcnt field for + * details of possible refcnt values. + */ VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && !__is_vma_write_locked(vma, &mm_lock_seq), vma); } diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index 1d23b48552e9..75dc098aea14 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -65,6 +65,9 @@ static inline int __vma_enter_locked(struct vm_area_struct *vma, /* * If vma is detached then only vma_mark_attached() can raise the * vm_refcnt. mmap_write_lock prevents racing with vma_mark_attached(). + * + * See the comment describing the vm_area_struct->vm_refcnt field for + * details of possible refcnt values. */ if (!refcount_add_not_zero(VM_REFCNT_EXCLUDE_READERS_FLAG, &vma->vm_refcnt)) return 0; @@ -137,6 +140,9 @@ void vma_mark_detached(struct vm_area_struct *vma) * before they check vm_lock_seq, realize the vma is locked and drop * back the vm_refcnt. That is a narrow window for observing a raised * vm_refcnt. + * + * See the comment describing the vm_area_struct->vm_refcnt field for + * details of possible refcnt values. */ if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { /* Wait until vma is detached with no readers. */ -- 2.52.0