Inline ip6_route_get_saddr()'s functionality in rt6_fill_node(), to prepare for replacing the former with a dst based function. NB: the l3mdev handling introduced by 252442f2ae31 "ipv6: fix source address selection with route leak" is dropped here - the l3mdev ifindex was a constant 0 on this call site, so that code was in fact dead. Signed-off-by: David Lamparter --- net/ipv6/route.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index aee6a10b112a..9508e46b9e56 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5822,9 +5822,19 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nla_put_u32(skb, RTA_IIF, iif)) goto nla_put_failure; } else if (dest) { - struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && - nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) + struct in6_addr saddr_buf, *saddr = NULL; + + if (rt->fib6_prefsrc.plen) { + saddr = &rt->fib6_prefsrc.addr; + } else { + struct net_device *dev = fib6_info_nh_dev(rt); + + if (ipv6_dev_get_saddr(net, dev, dest, 0, + &saddr_buf) == 0) + saddr = &saddr_buf; + } + + if (saddr && nla_put_in6_addr(skb, RTA_PREFSRC, saddr)) goto nla_put_failure; } -- 2.50.1 This adds passing the relevant flow information as well as selected nexthop into the source address selection code, to allow the RFC6724 rule 5.5 code to look at its details. Signed-off-by: David Lamparter --- include/net/addrconf.h | 4 ++++ net/ipv6/addrconf.c | 45 +++++++++++++++++++++++++++++++----------- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 78e8b877fb25..577debd34c11 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -141,6 +141,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); +int ipv6_fl_get_saddr(struct net *net, const struct dst_entry *dst, + const struct net_device *dst_dev, + const struct sock *sk, unsigned int srcprefs, + struct flowi6 *fl6); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 40e9c336f6c5..f20367156062 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1533,7 +1533,9 @@ struct ipv6_saddr_score { }; struct ipv6_saddr_dst { - const struct in6_addr *addr; + const struct flowi6 *fl6; + const struct dst_entry *dst; + const struct sock *sk; int ifindex; int scope; int label; @@ -1610,7 +1612,7 @@ static int ipv6_get_saddr_eval(struct net *net, break; case IPV6_SADDR_RULE_LOCAL: /* Rule 1: Prefer same address */ - ret = ipv6_addr_equal(&score->ifa->addr, dst->addr); + ret = ipv6_addr_equal(&score->ifa->addr, &dst->fl6->daddr); break; case IPV6_SADDR_RULE_SCOPE: /* Rule 2: Prefer appropriate scope @@ -1688,11 +1690,11 @@ static int ipv6_get_saddr_eval(struct net *net, * non-ORCHID vs non-ORCHID */ ret = !(ipv6_addr_orchid(&score->ifa->addr) ^ - ipv6_addr_orchid(dst->addr)); + ipv6_addr_orchid(&dst->fl6->daddr)); break; case IPV6_SADDR_RULE_PREFIX: /* Rule 8: Use longest matching prefix */ - ret = ipv6_addr_diff(&score->ifa->addr, dst->addr); + ret = ipv6_addr_diff(&score->ifa->addr, &dst->fl6->daddr); if (ret > score->ifa->prefix_len) ret = score->ifa->prefix_len; score->matchlen = ret; @@ -1810,9 +1812,10 @@ static int ipv6_get_saddr_master(struct net *net, return hiscore_idx; } -int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, - const struct in6_addr *daddr, unsigned int prefs, - struct in6_addr *saddr) +int ipv6_fl_get_saddr(struct net *net, const struct dst_entry *dst_entry, + const struct net_device *dst_dev, + const struct sock *sk, unsigned int prefs, + struct flowi6 *fl6) { struct ipv6_saddr_score scores[2], *hiscore; struct ipv6_saddr_dst dst; @@ -1823,11 +1826,13 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, int hiscore_idx = 0; int ret = 0; - dst_type = __ipv6_addr_type(daddr); - dst.addr = daddr; + dst_type = __ipv6_addr_type(&fl6->daddr); + dst.fl6 = fl6; + dst.sk = sk; + dst.dst = dst_entry; dst.ifindex = dst_dev ? dst_dev->ifindex : 0; dst.scope = __ipv6_addr_src_scope(dst_type); - dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex); + dst.label = ipv6_addr_label(net, &fl6->daddr, dst_type, dst.ifindex); dst.prefs = prefs; scores[hiscore_idx].rule = -1; @@ -1902,11 +1907,29 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, if (!hiscore->ifa) ret = -EADDRNOTAVAIL; else - *saddr = hiscore->ifa->addr; + fl6->saddr = hiscore->ifa->addr; rcu_read_unlock(); return ret; } +EXPORT_SYMBOL(ipv6_fl_get_saddr); + +int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, + const struct in6_addr *daddr, unsigned int prefs, + struct in6_addr *saddr) +{ + struct flowi6 fl6; + int ret; + + memset(&fl6, 0, sizeof(fl6)); + fl6.daddr = *daddr; + + ret = ipv6_fl_get_saddr(net, NULL, dst_dev, NULL, prefs, &fl6); + if (!ret) + *saddr = fl6.saddr; + + return ret; +} EXPORT_SYMBOL(ipv6_dev_get_saddr); static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, -- 2.50.1 Flatten ip6_route_get_saddr() into ip6_dst_lookup_tail (which really just means handling fib6_prefsrc), and then replace ipv6_dev_get_saddr with ipv6_fl_get_saddr to pass down the flow information. Signed-off-by: David Lamparter --- net/ipv6/ip6_output.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f904739e99b9..28406ed5ddfb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1126,27 +1126,40 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, int flags = 0; /* The correct way to handle this would be to do - * ip6_route_get_saddr, and then ip6_route_output; however, + * ipv6_fl_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the - * ip6_route_output call _before_ ip6_route_get_saddr. + * ip6_route_output call _before_ ipv6_fl_get_saddr. * * In source specific routing (no src=any default route), * ip6_route_output will fail given src=any saddr, though, so * that's why we try it again later. */ if (ipv6_addr_any(&fl6->saddr)) { + struct net_device *l3mdev; + struct net_device *dev; struct fib6_info *from; struct rt6_info *rt; + bool same_vrf; *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : dst_rt6_info(*dst); rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; - err = ip6_route_get_saddr(net, from, &fl6->daddr, - sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, - fl6->flowi6_l3mdev, - &fl6->saddr); + + l3mdev = dev_get_by_index_rcu(net, fl6->flowi6_l3mdev); + if (!from || !from->fib6_prefsrc.plen || l3mdev) + dev = from ? fib6_info_nh_dev(from) : NULL; + same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; + if (from && from->fib6_prefsrc.plen && same_vrf) { + fl6->saddr = from->fib6_prefsrc.addr; + err = 0; + } else + err = ipv6_fl_get_saddr(net, *dst, + same_vrf ? dev : l3mdev, sk, + sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, + fl6); + rcu_read_unlock(); if (err) -- 2.50.1 It's no longer used anywhere. Signed-off-by: David Lamparter --- include/net/ip6_route.h | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7c5512baa4b2..3b64c30fd882 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -125,32 +125,6 @@ void rt6_flush_exceptions(struct fib6_info *f6i); void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, unsigned long now); -static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, - const struct in6_addr *daddr, - unsigned int prefs, int l3mdev_index, - struct in6_addr *saddr) -{ - struct net_device *l3mdev; - struct net_device *dev; - bool same_vrf; - int err = 0; - - rcu_read_lock(); - - l3mdev = dev_get_by_index_rcu(net, l3mdev_index); - if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) - dev = f6i ? fib6_info_nh_dev(f6i) : NULL; - same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; - if (f6i && f6i->fib6_prefsrc.plen && same_vrf) - *saddr = f6i->fib6_prefsrc.addr; - else - err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); - - rcu_read_unlock(); - - return err; -} - struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, const struct sk_buff *skb, int flags); -- 2.50.1