Add the contended_release trace event. This tracepoint fires on the holder side when a contended lock is released, complementing the existing contention_begin/contention_end tracepoints which fire on the waiter side. This enables correlating lock hold time under contention with waiter events by lock address. Subsequent patches wire this tracepoint into the individual lock implementations. Signed-off-by: Dmitry Ilvokhin --- include/trace/events/lock.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h index 8e89baa3775f..4f28e41977ec 100644 --- a/include/trace/events/lock.h +++ b/include/trace/events/lock.h @@ -138,6 +138,23 @@ TRACE_EVENT(contention_end, TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret) ); +TRACE_EVENT(contended_release, + + TP_PROTO(void *lock), + + TP_ARGS(lock), + + TP_STRUCT__entry( + __field(void *, lock_addr) + ), + + TP_fast_assign( + __entry->lock_addr = lock; + ), + + TP_printk("%p", __entry->lock_addr) +); + #endif /* _TRACE_LOCK_H */ /* This part must be outside protection */ -- 2.47.3 Move the percpu_up_read() slowpath out of the inline function into a new __percpu_up_read_slowpath() to avoid binary size increase from adding a tracepoint to an inlined function. Signed-off-by: Dmitry Ilvokhin --- include/linux/percpu-rwsem.h | 15 +++------------ kernel/locking/percpu-rwsem.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index c8cb010d655e..89506895365c 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) return ret; } +void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem); + static inline void percpu_up_read(struct percpu_rw_semaphore *sem) { rwsem_release(&sem->dep_map, _RET_IP_); @@ -118,18 +120,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) if (likely(rcu_sync_is_idle(&sem->rss))) { this_cpu_dec(*sem->read_count); } else { - /* - * slowpath; reader will only ever wake a single blocked - * writer. - */ - smp_mb(); /* B matches C */ - /* - * In other words, if they see our decrement (presumably to - * aggregate zero, as that is the only time it matters) they - * will also see our critical section. - */ - this_cpu_dec(*sem->read_count); - rcuwait_wake_up(&sem->writer); + __percpu_up_read_slowpath(sem); } preempt_enable(); } diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index ef234469baac..4190635458da 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -288,3 +288,21 @@ void percpu_up_write(struct percpu_rw_semaphore *sem) rcu_sync_exit(&sem->rss); } EXPORT_SYMBOL_GPL(percpu_up_write); + +void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem) +{ + lockdep_assert_preemption_disabled(); + /* + * slowpath; reader will only ever wake a single blocked + * writer. + */ + smp_mb(); /* B matches C */ + /* + * In other words, if they see our decrement (presumably to + * aggregate zero, as that is the only time it matters) they + * will also see our critical section. + */ + this_cpu_dec(*sem->read_count); + rcuwait_wake_up(&sem->writer); +} +EXPORT_SYMBOL_GPL(__percpu_up_read_slowpath); -- 2.47.3 Add trace_contended_release() calls to the slowpath unlock paths of sleepable locks: mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and RT-specific rwbase locks. Each call site fires only when there are blocked waiters being woken, except percpu_up_write() which always wakes via __wake_up(). Signed-off-by: Dmitry Ilvokhin --- kernel/locking/mutex.c | 1 + kernel/locking/percpu-rwsem.c | 3 +++ kernel/locking/rtmutex.c | 1 + kernel/locking/rwbase_rt.c | 8 +++++++- kernel/locking/rwsem.c | 9 +++++++-- kernel/locking/semaphore.c | 4 +++- 6 files changed, 22 insertions(+), 4 deletions(-) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index c867f6c15530..54ca045987a2 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -970,6 +970,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne next = waiter->task; + trace_contended_release(lock); debug_mutex_wake_waiter(lock, waiter); __clear_task_blocked_on(next, lock); wake_q_add(&wake_q, next); diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 4190635458da..0f2e8e63d252 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -263,6 +263,8 @@ void percpu_up_write(struct percpu_rw_semaphore *sem) { rwsem_release(&sem->dep_map, _RET_IP_); + trace_contended_release(sem); + /* * Signal the writer is done, no fast path yet. * @@ -297,6 +299,7 @@ void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem) * writer. */ smp_mb(); /* B matches C */ + trace_contended_release(sem); /* * In other words, if they see our decrement (presumably to * aggregate zero, as that is the only time it matters) they diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index c80902eacd79..e0873f0ed982 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1457,6 +1457,7 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) raw_spin_lock_irqsave(&lock->wait_lock, flags); } + trace_contended_release(lock); /* * The wakeup next waiter path does not suffer from the above * race. See the comments there. diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 9f4322c07486..42f3658c0059 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -162,8 +162,10 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, * worst case which can happen is a spurious wakeup. */ owner = rt_mutex_owner(rtm); - if (owner) + if (owner) { + trace_contended_release(rwb); rt_mutex_wake_q_add_task(&wqh, owner, state); + } /* Pairs with the preempt_enable in rt_mutex_wake_up_q() */ preempt_disable(); @@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb) unsigned long flags; raw_spin_lock_irqsave(&rtm->wait_lock, flags); + if (rt_mutex_has_waiters(rtm)) + trace_contended_release(rwb); __rwbase_write_unlock(rwb, WRITER_BIAS, flags); } @@ -213,6 +217,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb) unsigned long flags; raw_spin_lock_irqsave(&rtm->wait_lock, flags); + if (rt_mutex_has_waiters(rtm)) + trace_contended_release(rwb); /* Release it and account current as reader */ __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags); } diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 24df4d98f7d2..4e61dc0bb045 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1360,6 +1360,7 @@ static inline void __up_read(struct rw_semaphore *sem) if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) == RWSEM_FLAG_WAITERS)) { clear_nonspinnable(sem); + trace_contended_release(sem); rwsem_wake(sem); } preempt_enable(); @@ -1383,8 +1384,10 @@ static inline void __up_write(struct rw_semaphore *sem) preempt_disable(); rwsem_clear_owner(sem); tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); - if (unlikely(tmp & RWSEM_FLAG_WAITERS)) + if (unlikely(tmp & RWSEM_FLAG_WAITERS)) { + trace_contended_release(sem); rwsem_wake(sem); + } preempt_enable(); } @@ -1407,8 +1410,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem) tmp = atomic_long_fetch_add_release( -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count); rwsem_set_reader_owned(sem); - if (tmp & RWSEM_FLAG_WAITERS) + if (tmp & RWSEM_FLAG_WAITERS) { + trace_contended_release(sem); rwsem_downgrade_wake(sem); + } preempt_enable(); } diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 3ef032e22f7e..3cef5ba88f7e 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -231,8 +231,10 @@ void __sched up(struct semaphore *sem) else __up(sem, &wake_q); raw_spin_unlock_irqrestore(&sem->lock, flags); - if (!wake_q_empty(&wake_q)) + if (!wake_q_empty(&wake_q)) { + trace_contended_release(sem); wake_up_q(&wake_q); + } } EXPORT_SYMBOL(up); -- 2.47.3