ipmr_rtm_getroute() calls __ipmr_get_table(), ipmr_cache_find(), and ipmr_fill_mroute(). The table is not removed until netns dismantle, and net->ipv4.mr_tables is managed with RCU list API, so __ipmr_get_table() is safe under RCU. struct mfc_cache is freed by mr_cache_put() after RCU grace period, so we can use ipmr_cache_find() under RCU. rcu_read_lock() around it was just to avoid lockdep splat for rhl_for_each_entry_rcu(). ipmr_fill_mroute() calls mr_fill_mroute(), which properly uses RCU. Let's drop RTNL for ipmr_rtm_getroute() and use RCU instead. Signed-off-by: Kuniyuki Iwashima --- net/ipv4/ipmr.c | 34 ++++++++++++++++++---------------- net/ipv4/ipmr_base.c | 4 ++-- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7b5177b673d3..53dda438ba32 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2680,9 +2680,9 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX + 1]; - struct sk_buff *skb = NULL; struct mfc_cache *cache; struct mr_table *mrt; + struct sk_buff *skb; __be32 src, grp; u32 tableid; int err; @@ -2695,39 +2695,40 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, grp = nla_get_in_addr_default(tb[RTA_DST], 0); tableid = nla_get_u32_default(tb[RTA_TABLE], 0); + skb = nlmsg_new(mroute_msgsize(false), GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto errout; + } + + rcu_read_lock(); + mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); if (!mrt) { err = -ENOENT; - goto errout_free; + goto errout_unlock; } - /* entries are added/deleted only under RTNL */ - rcu_read_lock(); cache = ipmr_cache_find(mrt, src, grp); - rcu_read_unlock(); if (!cache) { err = -ENOENT; - goto errout_free; - } - - skb = nlmsg_new(mroute_msgsize(false), GFP_KERNEL); - if (!skb) { - err = -ENOBUFS; - goto errout_free; + goto errout_unlock; } err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); if (err < 0) - goto errout_free; + goto errout_unlock; - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); + rcu_read_unlock(); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; -errout_free: +errout_unlock: + rcu_read_unlock(); kfree_skb(skb); goto errout; } @@ -3297,7 +3298,8 @@ static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, .doit = ipmr_rtm_route}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, - .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute}, + .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, }; int __init ip_mr_init(void) diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 28d77d454d44..9a38419e0f26 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -223,7 +223,7 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, rcu_read_lock(); vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev); - if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) { + if (vif_dev && nla_put_u32(skb, RTA_IIF, READ_ONCE(vif_dev->ifindex)) < 0) { rcu_read_unlock(); return -EMSGSIZE; } @@ -252,7 +252,7 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = vif_dev->ifindex; + nhp->rtnh_ifindex = READ_ONCE(vif_dev->ifindex); nhp->rtnh_len = sizeof(*nhp); } } -- 2.53.0.414.gf7e9f6c205-goog