Commit 5eb902b8e719 ("net/ipv6: Remove expired routes with a separated list of routes.") introduced a per-table GC list and changed GC to iterate over that list instead of traversing the entire route table. However, it forgot to add permanent routes to tb6_gc_hlist when exception routes are added. Commit cfe82469a00f ("ipv6: add exception routes to GC list in rt6_insert_exception") fixed that issue but introduced another one. Even after all exception routes expire, the permanent routes remain in tb6_gc_hlist, potentially negating the performance benefits intended by the initial change. Let's count gc_args->more before and after rt6_age_exceptions() and remove the permanent route when the delta is 0. Note that the next patch will reuse fib6_age_exceptions(). Fixes: cfe82469a00f ("ipv6: add exception routes to GC list in rt6_insert_exception") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Xin Long --- net/ipv6/ip6_fib.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9058e71241dc..fadfca49d6b1 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2348,6 +2348,17 @@ static void fib6_flush_trees(struct net *net) /* * Garbage collection */ +static void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now) +{ + bool may_expire = rt->fib6_flags & RTF_EXPIRES && rt->expires; + int old_more = gc_args->more; + + rt6_age_exceptions(rt, gc_args, now); + + if (!may_expire && old_more == gc_args->more) + fib6_remove_gc_list(rt); +} static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) { @@ -2370,7 +2381,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Note, that clones are aged out * only if they are not in use now. */ - rt6_age_exceptions(rt, gc_args, now); + fib6_age_exceptions(rt, gc_args, now); return 0; } -- 2.53.0.959.g497ff81fa9-goog The cited commit mechanically put fib6_remove_gc_list() just after every fib6_clean_expires() call. When a temporary route is promoted to a permanent route, there may already be exception routes tied to it. If fib6_remove_gc_list() removes the route from tb6_gc_hlist, such exception routes will no longer be aged. Let's replace fib6_remove_gc_list() with a new helper fib6_may_remove_gc_list() and use fib6_age_exceptions() there. Note that net->ipv6 is only compiled when CONFIG_IPV6 is enabled, so fib6_{add,remove,may_remove}_gc_list() are guarded. Fixes: 5eb902b8e719 ("net/ipv6: Remove expired routes with a separated list of routes.") Signed-off-by: Kuniyuki Iwashima --- v6: Call fib6_age_exceptions() under RCU v4: s/,/;/ for fib6_gc_args initialisation v3: Use IS_ENABLED() v2: Fix build failure when CONFIG_IPV6=n (no net->ipv6 definition) squash --- include/net/ip6_fib.h | 21 ++++++++++++++++++++- net/ipv6/addrconf.c | 4 ++-- net/ipv6/ip6_fib.c | 6 +++--- net/ipv6/route.c | 2 +- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 88b0dd4d8e09..9f8b6814a96a 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -507,12 +507,14 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt, void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now); void fib6_run_gc(unsigned long expires, struct net *net, bool force); - void fib6_gc_cleanup(void); int fib6_init(void); +#if IS_ENABLED(CONFIG_IPV6) /* Add the route to the gc list if it is not already there * * The callers should hold f6i->fib6_table->tb6_lock. @@ -545,6 +547,23 @@ static inline void fib6_remove_gc_list(struct fib6_info *f6i) hlist_del_init(&f6i->gc_link); } +static inline void fib6_may_remove_gc_list(struct net *net, + struct fib6_info *f6i) +{ + struct fib6_gc_args gc_args; + + if (hlist_unhashed(&f6i->gc_link)) + return; + + gc_args.timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_interval); + gc_args.more = 0; + + rcu_read_lock(); + fib6_age_exceptions(f6i, &gc_args, jiffies); + rcu_read_unlock(); +} +#endif + struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e55f139e05d..f4e23b543585 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2862,7 +2862,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) fib6_add_gc_list(rt); } else { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } spin_unlock_bh(&table->tb6_lock); @@ -4840,7 +4840,7 @@ static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp, if (!(flags & RTF_EXPIRES)) { fib6_clean_expires(f6i); - fib6_remove_gc_list(f6i); + fib6_may_remove_gc_list(net, f6i); } else { fib6_set_expires(f6i, expires); fib6_add_gc_list(f6i); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fadfca49d6b1..dd26657b6a4a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1133,7 +1133,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) { fib6_clean_expires(iter); - fib6_remove_gc_list(iter); + fib6_may_remove_gc_list(info->nl_net, iter); } else { fib6_set_expires(iter, rt->expires); fib6_add_gc_list(iter); @@ -2348,8 +2348,8 @@ static void fib6_flush_trees(struct net *net) /* * Garbage collection */ -static void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, - unsigned long now) +void fib6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, + unsigned long now) { bool may_expire = rt->fib6_flags & RTF_EXPIRES && rt->expires; int old_more = gc_args->more; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 08cd86f49bf9..cb521700cee7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1033,7 +1033,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (!addrconf_finite_timeout(lifetime)) { fib6_clean_expires(rt); - fib6_remove_gc_list(rt); + fib6_may_remove_gc_list(net, rt); } else { fib6_set_expires(rt, jiffies + HZ * lifetime); fib6_add_gc_list(rt); -- 2.53.0.959.g497ff81fa9-goog Without the prior commit, IPv6 GC cannot track exceptions tied to permanent routes if they were originally added as temporary routes. Let's add a test case for the issue. 1. Add temporary routes 2. Create exceptions for the temporary routes 3. Promote the routes to permanent routes 4. Check if GC can find and purge the exceptions A few notes: + At step 4, unlike other test cases, we cannot wait for $GC_WAIT_TIME. While the exceptions are always iterable via netlink (since it traverses the entire fib tree instead of tb6_gc_hlist), rt6_nh_dump_exceptions() skips expired entries. If we waited for the expiration time, we would be unable to distinguish whether the exceptions were truly purged by GC or just hidden due to being expired. + For the same reason, at step 2, we use ICMPv6 redirect message instead of Packet Too Big message. This is because MTU exceptions always have RTF_EXPIRES, and rt6_age_examine_exception() does not respect the period specified by net.ipv6.route.flush=1. + We add a neighbour entry for the redirect target with NTF_ROUTER. Without this, the exceptions would be removed at step 3 when the fib6_may_remove_gc_list() is called. Without the fix, the exceptions remain even after GC is triggered by sysctl -wq net.ipv6.route.flush=1. FAIL: Expected 0 routes, got 5 TEST: ipv6 route garbage collection (promote to permanent routes) [FAIL] With the fix, GC purges the exceptions properly. TEST: ipv6 route garbage collection (promote to permanent routes) [ OK ] Signed-off-by: Kuniyuki Iwashima --- tools/testing/selftests/net/fib_tests.sh | 61 ++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index c5694cc4ddd2..829f72c8ee07 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -868,6 +868,64 @@ fib6_gc_test() check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 + + # rd6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v rd6)" ]; then + echo "SKIP: rd6 not found." + set +e + cleanup &> /dev/null + return + fi + + setup_ns ns2 + $IP link add veth1 type veth peer veth2 netns $ns2 + $IP link set veth1 up + ip -netns $ns2 link set veth2 up + $IP addr add fe80:dead::1/64 dev veth1 + ip -netns $ns2 addr add fe80:dead::2/64 dev veth2 + + # Add NTF_ROUTER neighbour to prevent rt6_age_examine_exception() + # from removing not-yet-expired exceptions. + ip -netns $ns2 link set veth2 address 00:11:22:33:44:55 + $IP neigh add fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_redirects=1 + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.forwarding=0 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:10::$i \ + via fe80:dead::2 dev veth1 expires $EXPIRE + + ip netns exec $ns2 rd6 -i veth2 \ + -s fe80:dead::2 -d fe80:dead::1 \ + -r 2001:10::$i -t fe80:dead::3 -p ICMP6 + done + + check_rt_num 5 $($IP -6 route list | grep expires | grep 2001:10:: | wc -l) + + # Promote to permanent routes by "prepend" (w/o NLM_F_EXCL and NLM_F_REPLACE) + for i in $(seq 1 5); do + # -EEXIST, but the temporary route becomes the permanent route. + $IP -6 route append 2001:10::$i \ + via fe80:dead::2 dev veth1 2>/dev/null || true + done + + check_rt_num 5 $($IP -6 route list | grep -v expires | grep 2001:10:: | wc -l) + check_rt_num 5 $($IP -6 route list cache | grep 2001:10:: | wc -l) + + # Trigger GC instead of waiting $GC_WAIT_TIME. + # rt6_nh_dump_exceptions() just skips expired exceptions. + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list cache | grep 2001:10:: | wc -l) + log_test $ret 0 "ipv6 route garbage collection (promote to permanent routes)" + + $IP neigh del fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router + $IP link del veth1 + # ra6 is required for the next test. (ipv6toolkit) if [ ! -x "$(command -v ra6)" ]; then echo "SKIP: ra6 not found." @@ -876,9 +934,6 @@ fib6_gc_test() return fi - # Delete dummy_10 and remove all routes - $IP link del dev dummy_10 - # Create a pair of veth devices to send a RA message from one # device to another. $IP link add veth1 type veth peer name veth2 -- 2.53.0.959.g497ff81fa9-goog