We will convert RTM_GETNEIGH to RCU. neigh_get() looks up pneigh_entry by pneigh_lookup() and passes it to pneigh_fill_info(). Then, we must ensure that the entry is alive till pneigh_fill_info() completes, but read_lock_bh(&tbl->lock) in pneigh_lookup() does not guarantee that. Also, we will convert all readers of tbl->phash_buckets[] to RCU. Let's use call_rcu() to free pneigh_entry and update phash_buckets[] and ->next by rcu_assign_pointer(). pneigh_ifdown_and_unlock() uses list_head to avoid overwriting ->next and moving RCU iterators to another list. pndisc_destructor() (only IPv6 ndisc uses this) uses a mutex, so it is not delayed to call_rcu(), where we cannot sleep. This is fine because the mcast code works with RCU and ipv6_dev_mc_dec() frees mcast objects after RCU grace period. While at it, we change the return type of pneigh_ifdown_and_unlock() to void. Signed-off-by: Kuniyuki Iwashima --- include/net/neighbour.h | 4 ++++ net/core/neighbour.c | 51 +++++++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7f3d57da5689a..a877e56210b22 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -180,6 +180,10 @@ struct pneigh_entry { possible_net_t net; struct net_device *dev; netdevice_tracker dev_tracker; + union { + struct list_head free_node; + struct rcu_head rcu; + }; u32 flags; u8 protocol; bool permanent; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 814a45fb1962e..6725a40b2db3a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -54,9 +54,9 @@ static void neigh_timer_handler(struct timer_list *t); static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid); static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); -static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, - struct net_device *dev, - bool skip_perm); +static void pneigh_ifdown_and_unlock(struct neigh_table *tbl, + struct net_device *dev, + bool skip_perm); #ifdef CONFIG_PROC_FS static const struct seq_operations neigh_stat_seq_ops; @@ -803,12 +803,20 @@ struct pneigh_entry *pneigh_create(struct neigh_table *tbl, write_lock_bh(&tbl->lock); n->next = tbl->phash_buckets[hash_val]; - tbl->phash_buckets[hash_val] = n; + rcu_assign_pointer(tbl->phash_buckets[hash_val], n); write_unlock_bh(&tbl->lock); out: return n; } +static void pneigh_destroy(struct rcu_head *rcu) +{ + struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu); + + netdev_put(n->dev, &n->dev_tracker); + kfree(n); +} + int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { @@ -821,12 +829,13 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, np = &n->next) { if (!memcmp(n->key, pkey, key_len) && n->dev == dev && net_eq(pneigh_net(n), net)) { - *np = n->next; + rcu_assign_pointer(*np, n->next); write_unlock_bh(&tbl->lock); + if (tbl->pdestructor) tbl->pdestructor(n); - netdev_put(n->dev, &n->dev_tracker); - kfree(n); + + call_rcu(&n->rcu, pneigh_destroy); return 0; } } @@ -834,11 +843,12 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, return -ENOENT; } -static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, - struct net_device *dev, - bool skip_perm) +static void pneigh_ifdown_and_unlock(struct neigh_table *tbl, + struct net_device *dev, + bool skip_perm) { - struct pneigh_entry *n, **np, *freelist = NULL; + struct pneigh_entry *n, **np; + LIST_HEAD(head); u32 h; for (h = 0; h <= PNEIGH_HASHMASK; h++) { @@ -847,25 +857,26 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, if (skip_perm && n->permanent) goto skip; if (!dev || n->dev == dev) { - *np = n->next; - n->next = freelist; - freelist = n; + rcu_assign_pointer(*np, n->next); + list_add(&n->free_node, &head); continue; } skip: np = &n->next; } } + write_unlock_bh(&tbl->lock); - while ((n = freelist)) { - freelist = n->next; - n->next = NULL; + + while (!list_empty(&head)) { + n = list_first_entry(&head, typeof(*n), free_node); + list_del(&n->free_node); + if (tbl->pdestructor) tbl->pdestructor(n); - netdev_put(n->dev, &n->dev_tracker); - kfree(n); + + call_rcu(&n->rcu, pneigh_destroy); } - return -ENOENT; } static inline void neigh_parms_put(struct neigh_parms *parms) -- 2.50.0.727.gbf7dc18ff4-goog