Like ppp_generic.c, convert the PPPoE socket hash table to use RCU for lookups and a spinlock for updates. This removes rwlock usage and allows lockless readers on the fast path. - Mark hash table and list pointers as __rcu. - Use spin_lock() to protect writers. - Readers use rcu_dereference() under rcu_read_lock(). All known callers of get_item() already hold the RCU read lock, so no additional locking is needed. - get_item() now uses refcount_inc_not_zero() instead of sock_hold() to safely take a reference. This prevents crashes if a socket is already in the process of being freed (sk_refcnt == 0). - Set SOCK_RCU_FREE to defer socket freeing until after an RCU grace period. - Move skb_queue_purge() into sk_destruct callback to ensure purge happens after an RCU grace period. Signed-off-by: Qingfang Deng --- v3: Move skb_queue_purge() into sk_destruct callback. Link to v2: https://lore.kernel.org/netdev/20250827023045.25002-1-dqfext@gmail.com/ v2: Use refcount_inc_not_zero() in get_item() to avoid taking a reference of a zero refcount socket. Link to v1: https://lore.kernel.org/netdev/20250826023346.26046-1-dqfext@gmail.com/ drivers/net/ppp/pppoe.c | 94 ++++++++++++++++++++++------------------ include/linux/if_pppox.h | 2 +- 2 files changed, 54 insertions(+), 42 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 410effa42ade..54522b26b728 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -100,8 +100,8 @@ struct pppoe_net { * as well, moreover in case of SMP less locking * controversy here */ - struct pppox_sock *hash_table[PPPOE_HASH_SIZE]; - rwlock_t hash_lock; + struct pppox_sock __rcu *hash_table[PPPOE_HASH_SIZE]; + spinlock_t hash_lock; }; /* @@ -162,13 +162,13 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, int hash = hash_item(sid, addr); struct pppox_sock *ret; - ret = pn->hash_table[hash]; + ret = rcu_dereference(pn->hash_table[hash]); while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) return ret; - ret = ret->next; + ret = rcu_dereference(ret->next); } return NULL; @@ -177,19 +177,20 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, static int __set_item(struct pppoe_net *pn, struct pppox_sock *po) { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); - struct pppox_sock *ret; + struct pppox_sock *ret, *first; - ret = pn->hash_table[hash]; + first = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); + ret = first; while (ret) { if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) && ret->pppoe_ifindex == po->pppoe_ifindex) return -EALREADY; - ret = ret->next; + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } - po->next = pn->hash_table[hash]; - pn->hash_table[hash] = po; + RCU_INIT_POINTER(po->next, first); + rcu_assign_pointer(pn->hash_table[hash], po); return 0; } @@ -198,20 +199,24 @@ static void __delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { int hash = hash_item(sid, addr); - struct pppox_sock *ret, **src; + struct pppox_sock *ret, __rcu **src; - ret = pn->hash_table[hash]; + ret = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); src = &pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) { - *src = ret->next; + struct pppox_sock *next; + + next = rcu_dereference_protected(ret->next, + lockdep_is_held(&pn->hash_lock)); + rcu_assign_pointer(*src, next); break; } src = &ret->next; - ret = ret->next; + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } } @@ -225,11 +230,9 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid, { struct pppox_sock *po; - read_lock_bh(&pn->hash_lock); po = __get_item(pn, sid, addr, ifindex); - if (po) - sock_hold(sk_pppox(po)); - read_unlock_bh(&pn->hash_lock); + if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt)) + po = NULL; return po; } @@ -258,9 +261,9 @@ static inline struct pppox_sock *get_item_by_addr(struct net *net, static inline void delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { - write_lock_bh(&pn->hash_lock); + spin_lock(&pn->hash_lock); __delete_item(pn, sid, addr, ifindex); - write_unlock_bh(&pn->hash_lock); + spin_unlock(&pn->hash_lock); } /*************************************************************************** @@ -276,14 +279,16 @@ static void pppoe_flush_dev(struct net_device *dev) int i; pn = pppoe_pernet(dev_net(dev)); - write_lock_bh(&pn->hash_lock); + spin_lock(&pn->hash_lock); for (i = 0; i < PPPOE_HASH_SIZE; i++) { - struct pppox_sock *po = pn->hash_table[i]; + struct pppox_sock *po = rcu_dereference_protected(pn->hash_table[i], + lockdep_is_held(&pn->hash_lock)); struct sock *sk; while (po) { while (po && po->pppoe_dev != dev) { - po = po->next; + po = rcu_dereference_protected(po->next, + lockdep_is_held(&pn->hash_lock)); } if (!po) @@ -300,7 +305,7 @@ static void pppoe_flush_dev(struct net_device *dev) */ sock_hold(sk); - write_unlock_bh(&pn->hash_lock); + spin_unlock(&pn->hash_lock); lock_sock(sk); if (po->pppoe_dev == dev && @@ -320,11 +325,12 @@ static void pppoe_flush_dev(struct net_device *dev) */ BUG_ON(pppoe_pernet(dev_net(dev)) == NULL); - write_lock_bh(&pn->hash_lock); - po = pn->hash_table[i]; + spin_lock(&pn->hash_lock); + po = rcu_dereference_protected(pn->hash_table[i], + lockdep_is_held(&pn->hash_lock)); } } - write_unlock_bh(&pn->hash_lock); + spin_unlock(&pn->hash_lock); } static int pppoe_device_event(struct notifier_block *this, @@ -528,6 +534,11 @@ static struct proto pppoe_sk_proto __read_mostly = { .obj_size = sizeof(struct pppox_sock), }; +static void pppoe_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + /*********************************************************************** * * Initialize a new struct sock. @@ -542,11 +553,13 @@ static int pppoe_create(struct net *net, struct socket *sock, int kern) return -ENOMEM; sock_init_data(sock, sk); + sock_set_flag(sk, SOCK_RCU_FREE); sock->state = SS_UNCONNECTED; sock->ops = &pppoe_ops; sk->sk_backlog_rcv = pppoe_rcv_core; + sk->sk_destruct = pppoe_destruct; sk->sk_state = PPPOX_NONE; sk->sk_type = SOCK_STREAM; sk->sk_family = PF_PPPOX; @@ -599,7 +612,6 @@ static int pppoe_release(struct socket *sock) sock_orphan(sk); sock->sk = NULL; - skb_queue_purge(&sk->sk_receive_queue); release_sock(sk); sock_put(sk); @@ -681,9 +693,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, &sp->sa_addr.pppoe, sizeof(struct pppoe_addr)); - write_lock_bh(&pn->hash_lock); + spin_lock(&pn->hash_lock); error = __set_item(pn, po); - write_unlock_bh(&pn->hash_lock); + spin_unlock(&pn->hash_lock); if (error < 0) goto err_put; @@ -1052,11 +1064,11 @@ static inline struct pppox_sock *pppoe_get_idx(struct pppoe_net *pn, loff_t pos) int i; for (i = 0; i < PPPOE_HASH_SIZE; i++) { - po = pn->hash_table[i]; + po = rcu_dereference(pn->hash_table[i]); while (po) { if (!pos--) goto out; - po = po->next; + po = rcu_dereference(po->next); } } @@ -1065,19 +1077,19 @@ static inline struct pppox_sock *pppoe_get_idx(struct pppoe_net *pn, loff_t pos) } static void *pppoe_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(pn->hash_lock) + __acquires(RCU) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); loff_t l = *pos; - read_lock_bh(&pn->hash_lock); + rcu_read_lock(); return l ? pppoe_get_idx(pn, --l) : SEQ_START_TOKEN; } static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); - struct pppox_sock *po; + struct pppox_sock *po, *next; ++*pos; if (v == SEQ_START_TOKEN) { @@ -1085,14 +1097,15 @@ static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) goto out; } po = v; - if (po->next) - po = po->next; + next = rcu_dereference(po->next); + if (next) + po = next; else { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); po = NULL; while (++hash < PPPOE_HASH_SIZE) { - po = pn->hash_table[hash]; + po = rcu_dereference(pn->hash_table[hash]); if (po) break; } @@ -1103,10 +1116,9 @@ static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void pppoe_seq_stop(struct seq_file *seq, void *v) - __releases(pn->hash_lock) + __releases(RCU) { - struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); - read_unlock_bh(&pn->hash_lock); + rcu_read_unlock(); } static const struct seq_operations pppoe_seq_ops = { @@ -1149,7 +1161,7 @@ static __net_init int pppoe_init_net(struct net *net) struct pppoe_net *pn = pppoe_pernet(net); struct proc_dir_entry *pde; - rwlock_init(&pn->hash_lock); + spin_lock_init(&pn->hash_lock); pde = proc_create_net("pppoe", 0444, net->proc_net, &pppoe_seq_ops, sizeof(struct seq_net_private)); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index ff3beda1312c..db45d6f1c4f4 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -43,7 +43,7 @@ struct pppox_sock { /* struct sock must be the first member of pppox_sock */ struct sock sk; struct ppp_channel chan; - struct pppox_sock *next; /* for hash table */ + struct pppox_sock __rcu *next; /* for hash table */ union { struct pppoe_opt pppoe; struct pptp_opt pptp; -- 2.43.0